rsx/gl: Upgrade interpreter model to use a minimal set of reusable pipelines as a base

- We generate optimized variants in the background if we have a partial miss
- A full miss is impossible
This commit is contained in:
kd-11 2026-04-12 20:00:20 +03:00 committed by kd-11
parent 5c4b0a165b
commit af7ae45888
5 changed files with 248 additions and 82 deletions

View file

@ -13,6 +13,7 @@
namespace gl
{
using glsl::shader;
using enum program_common::interpreter::compiler_option;
namespace interpreter
{
@ -52,14 +53,25 @@ namespace gl
dlg->create("Precompiling interpreter variants.\nPlease wait...", "Shader Compilation");
const auto variants = program_common::interpreter::get_interpreter_variants();
const u32 limit = ::size32(variants);
dlg->set_limit(0, limit);
dlg->set_limit(1, 1);
const u32 limit1 = ::size32(variants.base_pipelines);
const u32 limit2 = ::size32(variants.pipelines);
dlg->set_limit(0, limit1);
dlg->set_limit(1, limit2);
atomic_t<u32> ctr = 0;
auto progress_hook = [&](interpreter::cached_program*) { ctr++; };
for (auto& variant : variants)
auto update_progress = [&](u32 stage)
{
const auto completed = ctr.load();
const auto limit = stage ? limit2 : limit1;
const auto message = fmt::format("%s variant %u of %u...", stage ? "Linking" : "Building", ctr.load(), limit);
dlg->update_msg(stage, message);
dlg->set_value(stage, completed);
};
// We only need to build the base "compatible pipeline" pairs.
for (const auto& variant : variants.base_pipelines)
{
build_program_async(variant.first | variant.second, progress_hook);
}
@ -67,14 +79,47 @@ namespace gl
do
{
std::this_thread::sleep_for(16ms);
update_progress(0);
}
while (ctr < limit1);
const u32 completed = ctr.load();
dlg->update_msg(0, fmt::format("Building variant %u of %u...", completed, limit));
dlg->set_value(0, completed);
} while (ctr < limit);
// Show final progress
update_progress(0);
dlg->inc_value(1, 1);
dlg->refresh();
// Second stage. Propagate base pipelines to all compatible
ctr = 0;
std::lock_guard lock(m_program_cache_lock);
for (const auto& variant : variants.pipelines)
{
const u64 compiler_options = variant.vs_opts.shader_opt | variant.fs_opts.shader_opt;
if (m_program_cache.find(compiler_options) != m_program_cache.end())
{
// Base variant
continue;
}
const u64 compatible_options = variant.vs_opts.compatible_shader_opts | variant.fs_opts.compatible_shader_opts;
auto base_pipeline = m_program_cache.find(compatible_options);
if (base_pipeline == m_program_cache.end())
{
fmt::throw_exception("Base variant was not found in the cache.");
}
auto data = new interpreter::cached_program();
data->flags |= interpreter::CACHED_PIPE_UNOPTIMIZED;
data->allocator = base_pipeline->second->allocator;
data->vertex_shader = base_pipeline->second->vertex_shader;
data->fragment_shader = base_pipeline->second->fragment_shader;
data->prog = base_pipeline->second->prog;
m_program_cache[compiler_options].reset(data);
}
ctr = limit2;
update_progress(1);
// Minor stall to avoid visual flashing
std::this_thread::sleep_for(16ms);
}
void shader_interpreter::destroy()
@ -85,6 +130,16 @@ namespace gl
prog.second->fragment_shader->remove();
prog.second->prog->remove();
}
for (auto& shader : m_vs_cache)
{
shader.second->remove();
}
for (auto& shader : m_fs_cache)
{
shader.second->remove();
}
}
glsl::program* shader_interpreter::get(const interpreter::program_metadata& metadata, u32 vp_ctrl, u32 fp_ctrl)
@ -100,49 +155,51 @@ namespace gl
case rsx::comparison_function::never:
return nullptr;
case rsx::comparison_function::greater_or_equal:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE;
opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_GE;
break;
case rsx::comparison_function::greater:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G;
opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_G;
break;
case rsx::comparison_function::less_or_equal:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE;
opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_LE;
break;
case rsx::comparison_function::less:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L;
opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_L;
break;
case rsx::comparison_function::equal:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ;
opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_EQ;
break;
case rsx::comparison_function::not_equal:
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE;
opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_NE;
break;
}
}
if (fp_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT;
if (fp_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT;
if (fp_ctrl & RSX_SHADER_CONTROL_USES_KIL) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL;
if (metadata.referenced_textures_mask) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES;
if (metadata.has_branch_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL;
if (metadata.has_pack_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING;
if (rsx::method_registers.polygon_stipple_enabled()) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_STIPPLING;
if (vp_ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING;
if (fp_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) opt |= COMPILER_OPT_ENABLE_DEPTH_EXPORT;
if (fp_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) opt |= COMPILER_OPT_ENABLE_F32_EXPORT;
if (fp_ctrl & RSX_SHADER_CONTROL_USES_KIL) opt |= COMPILER_OPT_ENABLE_KIL;
if (metadata.referenced_textures_mask) opt |= COMPILER_OPT_ENABLE_TEXTURES;
if (metadata.has_branch_instructions) opt |= COMPILER_OPT_ENABLE_FLOW_CTRL;
if (metadata.has_pack_instructions) opt |= COMPILER_OPT_ENABLE_PACKING;
if (rsx::method_registers.polygon_stipple_enabled()) opt |= COMPILER_OPT_ENABLE_STIPPLING;
if (vp_ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) opt |= COMPILER_OPT_ENABLE_INSTANCING;
if (auto it = m_program_cache.find(opt); it != m_program_cache.end()) [[likely]]
{
m_current_interpreter = it->second.get();
}
else
{
m_current_interpreter = build_program(opt);
std::lock_guard lock(m_program_cache_lock);
if (auto it = m_program_cache.find(opt); it != m_program_cache.end()) [[likely]]
{
m_current_interpreter = it->second.get();
}
return m_current_interpreter->prog.get();
}
m_current_interpreter = build_program(opt);
return m_current_interpreter->prog.get();
}
void shader_interpreter::build_vs(u64 compiler_options, interpreter::cached_program& prog_data)
{
compiler_options &= COMPILER_OPT_ALL_VS_MASK;
{
std::lock_guard lock(m_vs_cache_lock);
@ -165,7 +222,7 @@ namespace gl
std::string shader_str;
ParamArray arr;
null_prog.ctrl = (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING)
null_prog.ctrl = (compiler_options & COMPILER_OPT_ENABLE_INSTANCING)
? RSX_SHADER_CONTROL_INSTANCED_CONSTANTS
: 0;
GLVertexDecompilerThread comp(null_prog, shader_str, arr);
@ -195,7 +252,7 @@ namespace gl
" uvec4 vp_instructions[];\n"
"};\n\n";
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING)
if (compiler_options & COMPILER_OPT_ENABLE_INSTANCING)
{
builder << "#define _ENABLE_INSTANCED_CONSTANTS\n";
}
@ -230,20 +287,9 @@ namespace gl
void shader_interpreter::build_fs(u64 compiler_options, interpreter::cached_program& prog_data)
{
{
std::lock_guard lock(m_fs_cache_lock);
if (auto found = m_fs_cache.find(compiler_options);
found != m_fs_cache.end())
{
prog_data.fragment_shader = found->second;
return;
}
}
// Allocate TIUs
auto& allocator = prog_data.allocator;
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES)
{
allocator.create(::glsl::program_domain::glsl_fragment_program);
if (allocator.max_image_units >= 32)
@ -277,6 +323,19 @@ namespace gl
}
}
// Cache lookup
compiler_options &= COMPILER_OPT_ALL_FS_MASK;
{
std::lock_guard lock(m_fs_cache_lock);
if (auto found = m_fs_cache.find(compiler_options);
found != m_fs_cache.end())
{
prog_data.fragment_shader = found->second;
return;
}
}
u32 len;
ParamArray arr;
std::string shader_str;
@ -291,67 +350,67 @@ namespace gl
::glsl::insert_subheader_block(builder);
comp.insertConstants(builder);
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE)
if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_GE)
{
builder << "#define ALPHA_TEST_GEQUAL\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G)
if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_G)
{
builder << "#define ALPHA_TEST_GREATER\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE)
if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_LE)
{
builder << "#define ALPHA_TEST_LEQUAL\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L)
if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_L)
{
builder << "#define ALPHA_TEST_LESS\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ)
if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_EQ)
{
builder << "#define ALPHA_TEST_EQUAL\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE)
if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_NE)
{
builder << "#define ALPHA_TEST_NEQUAL\n";
}
if (!(compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT))
if (!(compiler_options & COMPILER_OPT_ENABLE_F32_EXPORT))
{
builder << "#define WITH_HALF_OUTPUT_REGISTER\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT)
if (compiler_options & COMPILER_OPT_ENABLE_DEPTH_EXPORT)
{
builder << "#define WITH_DEPTH_EXPORT\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL)
if (compiler_options & COMPILER_OPT_ENABLE_FLOW_CTRL)
{
builder << "#define WITH_FLOW_CTRL\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_PACKING)
if (compiler_options & COMPILER_OPT_ENABLE_PACKING)
{
builder << "#define WITH_PACKING\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_KIL)
if (compiler_options & COMPILER_OPT_ENABLE_KIL)
{
builder << "#define WITH_KIL\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_STIPPLING)
if (compiler_options & COMPILER_OPT_ENABLE_STIPPLING)
{
builder << "#define WITH_STIPPLING\n";
}
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES)
{
builder << "#define WITH_TEXTURES\n\n";
@ -460,7 +519,7 @@ namespace gl
data->prog->uniforms[0] = GL_STREAM_BUFFER_START + 0;
data->prog->uniforms[1] = GL_STREAM_BUFFER_START + 1;
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES)
{
// Initialize texture bindings
int assigned = 0;
@ -478,12 +537,13 @@ namespace gl
}
}
std::lock_guard lock(m_program_cache_lock);
m_program_cache[compiler_options].reset(data);
}
bool shader_interpreter::is_interpreter(const glsl::program* program) const
{
return (program == m_current_interpreter->prog.get());
return (m_current_interpreter && program == m_current_interpreter->prog.get());
}
void shader_interpreter::update_fragment_textures(

View file

@ -85,10 +85,10 @@ namespace gl
shared_mutex m_vs_cache_lock;
shared_mutex m_fs_cache_lock;
shared_mutex m_program_cache_lock;
shader_cache_t m_vs_cache;
shader_cache_t m_fs_cache;
pipeline_cache_t m_program_cache;
void build_vs(u64 compiler_options, interpreter::cached_program& prog_data);

View file

@ -5,6 +5,20 @@
namespace program_common::interpreter
{
struct vs_variants_metadata
{
u32 vs_base_mask = 0;
std::unordered_set<u32> vs_base_opts; // Set of options that covers all possible variants regardless of optimization
std::unordered_set<u32> vs_opts; // Full set of options that covers all variants with full optimization
};
struct fs_variants_metadata
{
u32 fs_base_mask = 0;
std::unordered_set<u32> fs_base_opts; // Set of options that covers all possible variants regardless of optimization
std::unordered_set<u32> fs_opts; // Full set of options that covers all variants with full optimization
};
void bitrange_foreach(u32 min, u32 max, std::function<void(u32)> func)
{
if (max <= min)
@ -22,50 +36,118 @@ namespace program_common::interpreter
}
}
std::vector<interpreter_variant_t> get_interpreter_variants()
vs_variants_metadata prepare_vs_variants_data()
{
// Separable passes to fetch all possible variants
std::unordered_set<u32> fs_masks;
fs_masks.insert(0);
vs_variants_metadata result;
result.vs_opts.insert(0);
result.vs_base_opts.insert(0);
const u32 base_vs_mask = COMPILER_OPT_ALL_VS_MASK & ~(COMPILER_OPT_VS_EXCL_MASK);
bitrange_foreach(COMPILER_OPT_VS_MIN, COMPILER_OPT_VS_MAX, [&](u32 vs_opt)
{
result.vs_opts.insert(vs_opt);
if (const auto excl_mask = (vs_opt & COMPILER_OPT_VS_EXCL_MASK);
excl_mask != 0)
{
result.vs_base_opts.insert(base_vs_mask | excl_mask);
}
});
result.vs_opts.insert(base_vs_mask);
result.vs_base_opts.insert(base_vs_mask);
result.vs_base_mask = base_vs_mask;
return result;
}
fs_variants_metadata prepare_fs_variants_data()
{
fs_variants_metadata result;
result.fs_opts.insert(0);
result.fs_base_opts.insert(0);
const u32 base_fs_mask = COMPILER_OPT_BASE_FS_MASK & ~(COMPILER_OPT_FS_EXCL_MASK);
bitrange_foreach(COMPILER_OPT_FS_MIN, COMPILER_OPT_FS_MAX, [&](u32 fs_opt)
{
fs_masks.insert(fs_opt);
result.fs_opts.insert(fs_opt);
if (const auto excl_mask = (fs_opt & COMPILER_OPT_FS_EXCL_MASK);
excl_mask != 0)
{
result.fs_base_opts.insert(base_fs_mask | excl_mask);
}
});
result.fs_opts.insert(base_fs_mask);
result.fs_base_opts.insert(base_fs_mask);
// Now we add in the alpha testing variants for all fs variants.
// Only one alpha test type is usable at once
std::unordered_set<u32> fs_alpha_test_masks;
std::unordered_set<u32> fs_alpha_test_base_masks;
for (u32 alpha_test_bit = COMPILER_OPT_ENABLE_ALPHA_TEST_GE;
alpha_test_bit <= COMPILER_OPT_ENABLE_ALPHA_TEST_NE;
alpha_test_bit <<= 1)
{
for (const auto& mask : fs_masks)
for (const auto& mask : result.fs_opts)
{
fs_alpha_test_masks.insert(mask | alpha_test_bit);
}
for (const auto& mask : result.fs_base_opts)
{
fs_alpha_test_base_masks.insert(mask | alpha_test_bit);
}
}
// VS
std::unordered_set<u32> vs_masks;
vs_masks.insert(0);
bitrange_foreach(COMPILER_OPT_VS_MIN, COMPILER_OPT_VS_MAX, [&](u32 vs_opt)
{
vs_masks.insert(vs_opt);
});
// Merge all FS variants
fs_masks.merge(fs_alpha_test_masks);
result.fs_opts.merge(fs_alpha_test_masks);
result.fs_base_opts.merge(fs_alpha_test_base_masks);
result.fs_base_mask = base_fs_mask;
return result;
}
interpreter_variants_t get_interpreter_variants()
{
const auto vs_metadata = prepare_vs_variants_data();
const auto fs_metadata = prepare_fs_variants_data();
const auto& vs_masks = vs_metadata.vs_opts;
const auto& fs_masks = fs_metadata.fs_opts;
const auto base_vs_mask = vs_metadata.vs_base_mask;
const auto base_fs_mask = fs_metadata.fs_base_mask;
// Prepare outputs
std::vector<interpreter_variant_t> results;
interpreter_variants_t result;
for (const auto& vs_opt : vs_masks)
{
for (const auto& fs_opt : fs_masks)
{
results.push_back({ vs_opt, fs_opt });
interpreter_pipeline_variant_t variant{};
variant.vs_opts.shader_opt = vs_opt;
variant.vs_opts.compatible_shader_opts = (vs_opt & ~base_vs_mask) | base_vs_mask;
variant.fs_opts.shader_opt = fs_opt;
variant.fs_opts.compatible_shader_opts = (fs_opt & ~base_fs_mask) | base_fs_mask;
result.pipelines.emplace_back(std::move(variant));
}
}
return results;
// Calculate base pipelines (minimal set)
const auto& vs_base_masks = vs_metadata.vs_base_opts;
const auto& fs_base_masks = fs_metadata.fs_base_opts;
result.base_pipelines.push_back({ base_vs_mask, base_fs_mask });
for (const u32 vs_opt : vs_base_masks)
{
for (const u32 fs_opt : fs_base_masks)
{
result.base_pipelines.push_back({ vs_opt, fs_opt });
}
}
return result;
}
}

View file

@ -32,6 +32,11 @@ namespace program_common
// Meta
COMPILER_OPT_MAX = COMPILER_OPT_ENABLE_ALPHA_TEST_NE,
COMPILER_OPT_ALPHA_TEST_MASK = (0b111111 << 9),
COMPILER_OPT_ALL_VS_MASK = COMPILER_OPT_ENABLE_INSTANCING | COMPILER_OPT_ENABLE_VTX_TEXTURES,
COMPILER_OPT_BASE_FS_MASK = 0b1111111,
COMPILER_OPT_ALL_FS_MASK = COMPILER_OPT_BASE_FS_MASK | COMPILER_OPT_ALPHA_TEST_MASK,
COMPILER_OPT_VS_EXCL_MASK = COMPILER_OPT_ENABLE_INSTANCING,
COMPILER_OPT_FS_EXCL_MASK = COMPILER_OPT_ALPHA_TEST_MASK | COMPILER_OPT_ENABLE_STIPPLING | COMPILER_OPT_ENABLE_DEPTH_EXPORT | COMPILER_OPT_ENABLE_F32_EXPORT,
// Bounds
COMPILER_OPT_FS_MAX = COMPILER_OPT_ENABLE_FLOW_CTRL,
@ -56,7 +61,24 @@ namespace program_common
return s;
}
using interpreter_variant_t = std::pair<u32, u32>;
std::vector<interpreter_variant_t> get_interpreter_variants();
struct interpreter_shader_variant_t
{
u32 shader_opt = 0;
u32 compatible_shader_opts = 0;
};
struct interpreter_pipeline_variant_t
{
interpreter_shader_variant_t vs_opts;
interpreter_shader_variant_t fs_opts;
};
struct interpreter_variants_t
{
std::vector<interpreter_pipeline_variant_t> pipelines;
std::vector<std::pair<u32, u32>> base_pipelines;
};
interpreter_variants_t get_interpreter_variants();
}
}

View file

@ -579,6 +579,7 @@ namespace vk
void shader_interpreter::preload(rsx::shader_loading_dialog* dlg)
{
#if 0
dlg->create("Precompiling interpreter variants.\nPlease wait...", "Shader Compilation");
const auto variants = program_common::interpreter::get_interpreter_variants();
@ -589,13 +590,14 @@ namespace vk
u32 ctr = 0;
for (auto& variant : variants)
{
build_fs(variant.first | variant.second);
build_vs(variant.first | variant.second);
//build_fs(variant.first | variant.second);
//build_vs(variant.first | variant.second);
dlg->update_msg(0, fmt::format("Building variant %u of %u...", ++ctr, limit));
dlg->inc_value(0, 1);
}
dlg->inc_value(1, 1);
dlg->refresh();
#endif
}
};