gl: Use bindless textures for the shader interpreter

This commit is contained in:
kd-11 2026-04-18 20:36:15 +03:00 committed by kd-11
parent 58eb7d2c04
commit 9c143d3d45
5 changed files with 155 additions and 201 deletions

View file

@ -545,6 +545,93 @@ void GLGSRender::bind_texture_env()
}
}
void GLGSRender::bind_interpreter_texture_env()
{
// Bind textures and resolve external copy operations
gl::command_context cmd{ gl_state };
const bool is_interpreter = m_shader_interpreter.is_interpreter(m_program);
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1))
{
continue;
}
gl::texture_view* primary_view = nullptr;
gl::texture_view* stencil_mirror = nullptr;
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
{
if (primary_view = sampler_state->image_handle; !primary_view) [[unlikely]]
{
primary_view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
}
}
if (!primary_view)
{
const auto target = gl::get_target(current_fragment_program.get_texture_dimension(i));
primary_view = m_null_textures[target]->get_view(rsx::default_remap_vector);
stencil_mirror = primary_view;
}
else if (current_fragment_program.texture_state.redirected_textures & (1 << i))
{
auto root_texture = static_cast<gl::viewable_image*>(primary_view->image());
stencil_mirror = root_texture->get_view(rsx::default_remap_vector.with_encoding(gl::GL_REMAP_IDENTITY), gl::image_aspect::stencil);
}
if (is_interpreter) [[ unlikely ]]
{
m_shader_interpreter.bind_fragment_texture(i, primary_view->handle(), *sampler_state);
continue;
}
primary_view->bind(cmd, GL_FRAGMENT_TEXTURES_START + i);
if (stencil_mirror)
{
stencil_mirror->bind(cmd, GL_STENCIL_MIRRORS_START + i);
}
}
for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1))
{
continue;
}
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
gl::texture_view* view = nullptr;
if (rsx::method_registers.vertex_textures[i].enabled() &&
sampler_state->validate())
{
if (view = sampler_state->image_handle; !view)
{
view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
}
}
if (view) [[likely]]
{
view->bind(cmd, GL_VERTEX_TEXTURES_START + i);
}
else
{
cmd->bind_texture(GL_VERTEX_TEXTURES_START + i, GL_TEXTURE_2D, GL_NONE);
}
}
if (is_interpreter)
{
m_shader_interpreter.flush_texture_bindings();
}
}
void GLGSRender::emit_geometry(u32 sub_index)
{
const auto do_heap_cleanup = [this]()

View file

@ -176,8 +176,7 @@ void GLGSRender::on_init_thread()
rsx_log.warning("Texture barriers are not supported by your GPU. Feedback loops will have undefined results.");
}
// NOTE: We currently aren't using the bindless version of the interpreter
if (false) //!gl_caps.ARB_bindless_texture_supported)
if (!gl_caps.ARB_bindless_texture_supported)
{
switch (shadermode)
{
@ -253,19 +252,19 @@ void GLGSRender::on_init_thread()
const rsx::io_buffer src_buf = std::span<u32>(pixeldata);
// 1D
auto tex1D = std::make_unique<gl::texture>(GL_TEXTURE_1D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
auto tex1D = std::make_unique<gl::viewable_image>(GL_TEXTURE_1D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
tex1D->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
// 2D
auto tex2D = std::make_unique<gl::texture>(GL_TEXTURE_2D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
auto tex2D = std::make_unique<gl::viewable_image>(GL_TEXTURE_2D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
tex2D->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
// 3D
auto tex3D = std::make_unique<gl::texture>(GL_TEXTURE_3D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
auto tex3D = std::make_unique<gl::viewable_image>(GL_TEXTURE_3D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
tex3D->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
// CUBE
auto texCUBE = std::make_unique<gl::texture>(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
auto texCUBE = std::make_unique<gl::viewable_image>(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR);
texCUBE->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
m_null_textures[GL_TEXTURE_1D] = std::move(tex1D);
@ -1066,7 +1065,7 @@ void GLGSRender::load_program_env()
if (m_interpreter_state & rsx::fragment_program_dirty)
{
// Attach fragment buffer data
const auto fp_block_length = current_fp_metadata.program_ucode_length + 80;
const auto fp_block_length = current_fp_metadata.program_ucode_length + 16;
auto fp_mapping = m_fragment_instructions_buffer->alloc_from_heap(fp_block_length, 16);
auto fp_buf = static_cast<u8*>(fp_mapping.first);
@ -1074,11 +1073,9 @@ void GLGSRender::load_program_env()
const auto control_masks = reinterpret_cast<u32*>(fp_buf);
control_masks[0] = rsx::method_registers.shader_control();
control_masks[1] = current_fragment_program.texture_state.texture_dimensions;
control_masks[2] = current_fp_metadata.referenced_textures_mask;
// Bind textures
m_shader_interpreter.update_fragment_textures(fs_sampler_state, current_fp_metadata.referenced_textures_mask, reinterpret_cast<u32*>(fp_buf + 16));
std::memcpy(fp_buf + 80, current_fragment_program.get_data(), current_fragment_program.ucode_length);
std::memcpy(fp_buf + 16, current_fragment_program.get_data(), current_fragment_program.ucode_length);
m_fragment_instructions_buffer->bind_range(GL_INTERPRETER_FRAGMENT_BLOCK, fp_mapping.second, fp_block_length);
m_fragment_instructions_buffer->notify();

View file

@ -147,7 +147,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
shared_mutex m_sampler_mutex;
atomic_t<bool> m_samplers_dirty = {true};
std::unordered_map<GLenum, std::unique_ptr<gl::texture>> m_null_textures;
std::unordered_map<GLenum, std::unique_ptr<gl::viewable_image>> m_null_textures;
rsx::simple_array<u8> m_scratch_buffer;
// Occlusion query type, can be SAMPLES_PASSED or ANY_SAMPLES_PASSED
@ -183,6 +183,7 @@ private:
void load_texture_env();
void bind_texture_env();
void bind_interpreter_texture_env();
gl::texture* get_present_source(gl::present_surface_info* info, const rsx::avconf& avconfig);

View file

@ -15,39 +15,6 @@ namespace gl
using enum program_common::interpreter::compiler_option;
using enum program_common::interpreter::cached_pipeline_flags;
namespace interpreter
{
void texture_pool_allocator::create(::glsl::program_domain domain)
{
GLenum pname;
switch (domain)
{
default:
rsx_log.fatal("Unexpected program domain %d", static_cast<int>(domain));
[[fallthrough]];
case ::glsl::program_domain::glsl_vertex_program:
pname = GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS; break;
case ::glsl::program_domain::glsl_fragment_program:
pname = GL_MAX_TEXTURE_IMAGE_UNITS; break;
}
glGetIntegerv(pname, &max_image_units);
}
void texture_pool_allocator::allocate(int size)
{
if ((used + size) > max_image_units)
{
rsx_log.fatal("Out of image binding slots!");
}
used += size;
texture_pool pool;
pool.pool_size = size;
pools.push_back(pool);
}
}
void shader_interpreter::create(rsx::shader_loading_dialog* dlg)
{
dlg->create("Precompiling interpreter variants.\nPlease wait...", "Shader Compilation");
@ -109,7 +76,6 @@ namespace gl
auto data = new interpreter::cached_program();
data->flags = base_pipeline->second->flags | CACHED_PIPE_UNOPTIMIZED;
data->build_compiler_options = base_pipeline->second->build_compiler_options;
data->allocator = base_pipeline->second->allocator;
data->vertex_shader = base_pipeline->second->vertex_shader;
data->fragment_shader = base_pipeline->second->fragment_shader;
data->prog = base_pipeline->second->prog;
@ -308,42 +274,6 @@ namespace gl
void shader_interpreter::build_fs(u64 compiler_options, interpreter::cached_program& prog_data)
{
// Allocate TIUs
auto& allocator = prog_data.allocator;
if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES)
{
allocator.create(::glsl::program_domain::glsl_fragment_program);
if (allocator.max_image_units >= 32)
{
// 16 + 4 + 4 + 4
allocator.allocate(4); // 1D
allocator.allocate(16); // 2D
allocator.allocate(4); // CUBE
allocator.allocate(4); // 3D
}
else if (allocator.max_image_units >= 24)
{
// 16 + 4 + 2 + 2
allocator.allocate(2); // 1D
allocator.allocate(16); // 2D
allocator.allocate(2); // CUBE
allocator.allocate(4); // 3D
}
else if (allocator.max_image_units >= 16)
{
// 10 + 2 + 2 + 2
allocator.allocate(2); // 1D
allocator.allocate(10); // 2D
allocator.allocate(2); // CUBE
allocator.allocate(2); // 3D
}
else
{
// Unusable
rsx_log.fatal("Failed to allocate enough TIUs for shader interpreter.");
}
}
// Cache lookup
compiler_options &= COMPILER_OPT_ALL_FS_MASK;
{
@ -366,7 +296,7 @@ namespace gl
std::stringstream builder;
builder <<
"#version 450\n"
"//#extension GL_ARB_bindless_texture : require\n\n";
"#extension GL_ARB_bindless_texture : require\n\n";
::glsl::insert_subheader_block(builder);
comp.insertConstants(builder);
@ -438,17 +368,17 @@ namespace gl
const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" };
for (int i = 0; i < 4; ++i)
{
builder << "uniform " << type_names[i] << " " << type_names[i] << "_array[" << allocator.pools[i].pool_size << "];\n";
builder << "layout(bindless_sampler) uniform " << type_names[i] << " " << type_names[i] << "_array[16];\n";
}
builder << "\n"
"#undef TEX_PARAM\n"
"#define TEX_PARAM(index) texture_parameters[index + texture_base_index]\n"
"#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n"
"#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n"
"#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n"
"#define SAMPLER3D(index) sampler3D_array[texture_handles[index]]\n"
"#define SAMPLERCUBE(index) samplerCube_array[texture_handles[index]]\n\n";
"#define IS_TEXTURE_RESIDENT(index) TEST_BIT(textures_resident, int(index))\n"
"#define SAMPLER1D(index) sampler1D_array[index]\n"
"#define SAMPLER2D(index) sampler2D_array[index]\n"
"#define SAMPLER3D(index) sampler3D_array[index]\n"
"#define SAMPLERCUBE(index) samplerCube_array[index]\n\n";
}
else if (compiler_options)
{
@ -460,9 +390,8 @@ namespace gl
"{\n"
" uint shader_control;\n"
" uint texture_control;\n"
" uint reserved1;\n"
" uint textures_resident;\n"
" uint reserved2;\n"
" uint texture_handles[16];\n"
" uvec4 fp_instructions[];\n"
"};\n\n";
@ -547,19 +476,7 @@ namespace gl
if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES)
{
// Initialize texture bindings
int assigned = 0;
auto& allocator = data->allocator;
const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" };
for (int i = 0; i < 4; ++i)
{
for (int j = 0; j < allocator.pools[i].pool_size; ++j)
{
allocator.pools[i].allocate(assigned++);
}
data->prog->uniforms[type_names[i]] = allocator.pools[i].allocated;
}
flush_texture_bindings(data->prog.get());
}
data->flags &= ~CACHED_PIPE_UNINITIALIZED;
@ -578,99 +495,27 @@ namespace gl
return (m_current_interpreter && program == m_current_interpreter->prog.get());
}
void shader_interpreter::update_fragment_textures(
const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, 16>& descriptors,
u16 reference_mask, u32* out)
void shader_interpreter::bind_fragment_texture(int i, handle64_t handle, const rsx::sampled_image_descriptor_base& descriptor)
{
if (reference_mask == 0 || !m_current_interpreter)
m_texture_bindings.get(descriptor.image_type)[i] = handle;
}
void shader_interpreter::flush_texture_bindings(glsl::program* program)
{
using enum rsx::texture_dimension_extended;
if (!program)
{
return;
ensure(m_current_interpreter);
program = m_current_interpreter->prog.get();
}
// Reset allocation
auto& allocator = m_current_interpreter->allocator;
for (unsigned i = 0; i < 4; ++i)
const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" };
const rsx::texture_dimension_extended types[] = { texture_dimension_1d, texture_dimension_2d, texture_dimension_cubemap, texture_dimension_3d };
for (int i = 0; i < 4; ++i)
{
allocator.pools[i].num_used = 0;
allocator.pools[i].flags = 0;
program->uniforms[type_names[i]] = m_texture_bindings.get(types[i]);
}
rsx::simple_array<utils::pair<int, int>> replacement_map;
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
if (reference_mask & (1 << i))
{
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(descriptors[i].get());
ensure(sampler_state);
int pool_id = static_cast<int>(sampler_state->image_type);
auto& pool = allocator.pools[pool_id];
const int old = pool.allocated[pool.num_used];
if (!pool.allocate(i))
{
rsx_log.error("Could not allocate texture resource for shader interpreter.");
break;
}
out[i] = (pool.num_used - 1);
if (old != i)
{
// Check if the candidate target has also been replaced
bool found = false;
for (auto& e : replacement_map)
{
if (e.second == old)
{
// This replacement consumed this 'old' value
e.second = i;
found = true;
break;
}
}
if (!found)
{
replacement_map.push_back({ old, i });
}
}
}
else
{
out[i] = 0xFF;
}
}
// Bind TIU locations
if (replacement_map.empty()) [[likely]]
{
return;
}
// Overlapping texture bindings are trouble. Cannot bind one TIU to two types of samplers simultaneously
for (unsigned i = 0; i < replacement_map.size(); ++i)
{
for (int j = 0; j < 4; ++j)
{
auto& pool = allocator.pools[j];
for (int k = pool.num_used; k < pool.pool_size; ++k)
{
if (pool.allocated[k] == replacement_map[i].second)
{
pool.allocated[k] = replacement_map[i].first;
pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
// Exit nested loop
j = 4;
break;
}
}
}
}
if (allocator.pools[0].flags) m_current_interpreter->prog->uniforms["sampler1D_array"] = allocator.pools[0].allocated;
if (allocator.pools[1].flags) m_current_interpreter->prog->uniforms["sampler2D_array"] = allocator.pools[1].allocated;
if (allocator.pools[2].flags) m_current_interpreter->prog->uniforms["samplerCube_array"] = allocator.pools[2].allocated;
if (allocator.pools[3].flags) m_current_interpreter->prog->uniforms["sampler3D_array"] = allocator.pools[3].allocated;
}
}

View file

@ -53,14 +53,29 @@ namespace gl
}
};
struct texture_pool_allocator
struct bindless_textures_t
{
int max_image_units = 0;
int used = 0;
std::vector<texture_pool> pools;
std::array<handle64_t, 16> sampler1D;
std::array<handle64_t, 16> sampler2D;
std::array<handle64_t, 16> sampler3D;
std::array<handle64_t, 16> samplerCUBE;
void create(::glsl::program_domain domain);
void allocate(int size);
std::span<handle64_t> get(rsx::texture_dimension_extended type)
{
using enum rsx::texture_dimension_extended;
switch (type)
{
default:
case texture_dimension_2d:
return sampler2D;
case texture_dimension_cubemap:
return samplerCUBE;
case texture_dimension_1d:
return sampler1D;
case texture_dimension_3d:
return sampler3D;
}
}
};
struct cached_program
@ -69,12 +84,11 @@ namespace gl
// Compiler options mask - May not always match the storage compiler options in case of compatible pipelines
// However the storage mask must be a subset of this options mask
u32 build_compiler_options = 0;
u64 build_compiler_options = 0;
std::shared_ptr<glsl::shader> vertex_shader;
std::shared_ptr<glsl::shader> fragment_shader;
std::shared_ptr<glsl::program> prog;
texture_pool_allocator allocator;
};
}
@ -92,6 +106,9 @@ namespace gl
shader_cache_t m_fs_cache;
pipeline_cache_t m_program_cache;
// Texture binding information.
interpreter::bindless_textures_t m_texture_bindings{};
void build_vs(u64 compiler_options, interpreter::cached_program& prog_data);
void build_fs(u64 compiler_options, interpreter::cached_program& prog_data);
@ -103,10 +120,17 @@ namespace gl
std::shared_ptr<interpreter::cached_program> m_current_interpreter;
public:
shader_interpreter()
{
std::memset(&m_texture_bindings, 0, sizeof(m_texture_bindings));
}
void create(rsx::shader_loading_dialog* dlg);
void destroy();
void update_fragment_textures(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, 16>& descriptors, u16 reference_mask, u32* out);
// Update texture bindings based on the incoming descriptor structures
void bind_fragment_texture(int i, handle64_t handle, const rsx::sampled_image_descriptor_base& descriptor);
void flush_texture_bindings(glsl::program* program = nullptr);
glsl::program* get(const interpreter::program_metadata& fp_metadata, u32 vp_ctrl, u32 fp_ctrl);
bool is_interpreter(const glsl::program* program) const;