From 9c143d3d4512042f27a23f3a07b0381ef160c606 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 18 Apr 2026 20:36:15 +0300 Subject: [PATCH] gl: Use bindless textures for the shader interpreter --- rpcs3/Emu/RSX/GL/GLDraw.cpp | 87 ++++++++++ rpcs3/Emu/RSX/GL/GLGSRender.cpp | 19 +-- rpcs3/Emu/RSX/GL/GLGSRender.h | 3 +- rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp | 205 +++-------------------- rpcs3/Emu/RSX/GL/GLShaderInterpreter.h | 42 ++++- 5 files changed, 155 insertions(+), 201 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index 0abf0111e6..c57eba811e 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -545,6 +545,93 @@ void GLGSRender::bind_texture_env() } } +void GLGSRender::bind_interpreter_texture_env() +{ + // Bind textures and resolve external copy operations + gl::command_context cmd{ gl_state }; + const bool is_interpreter = m_shader_interpreter.is_interpreter(m_program); + + for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) + { + if (!(textures_ref & 1)) + { + continue; + } + + gl::texture_view* primary_view = nullptr; + gl::texture_view* stencil_mirror = nullptr; + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled() && + sampler_state->validate()) + { + if (primary_view = sampler_state->image_handle; !primary_view) [[unlikely]] + { + primary_view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); + } + } + + if (!primary_view) + { + const auto target = gl::get_target(current_fragment_program.get_texture_dimension(i)); + primary_view = m_null_textures[target]->get_view(rsx::default_remap_vector); + stencil_mirror = primary_view; + } + else if (current_fragment_program.texture_state.redirected_textures & (1 << i)) + { + auto root_texture = static_cast(primary_view->image()); + stencil_mirror = root_texture->get_view(rsx::default_remap_vector.with_encoding(gl::GL_REMAP_IDENTITY), gl::image_aspect::stencil); + } + + if (is_interpreter) [[ unlikely ]] + { + m_shader_interpreter.bind_fragment_texture(i, primary_view->handle(), *sampler_state); + continue; + } + + primary_view->bind(cmd, GL_FRAGMENT_TEXTURES_START + i); + + if (stencil_mirror) + { + stencil_mirror->bind(cmd, GL_STENCIL_MIRRORS_START + i); + } + } + + for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) + { + if (!(textures_ref & 1)) + { + continue; + } + + auto sampler_state = static_cast(vs_sampler_state[i].get()); + gl::texture_view* view = nullptr; + + if (rsx::method_registers.vertex_textures[i].enabled() && + sampler_state->validate()) + { + if (view = sampler_state->image_handle; !view) + { + view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); + } + } + + if (view) [[likely]] + { + view->bind(cmd, GL_VERTEX_TEXTURES_START + i); + } + else + { + cmd->bind_texture(GL_VERTEX_TEXTURES_START + i, GL_TEXTURE_2D, GL_NONE); + } + } + + if (is_interpreter) + { + m_shader_interpreter.flush_texture_bindings(); + } +} + void GLGSRender::emit_geometry(u32 sub_index) { const auto do_heap_cleanup = [this]() diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index e32759d181..15226ab485 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -176,8 +176,7 @@ void GLGSRender::on_init_thread() rsx_log.warning("Texture barriers are not supported by your GPU. Feedback loops will have undefined results."); } - // NOTE: We currently aren't using the bindless version of the interpreter - if (false) //!gl_caps.ARB_bindless_texture_supported) + if (!gl_caps.ARB_bindless_texture_supported) { switch (shadermode) { @@ -253,19 +252,19 @@ void GLGSRender::on_init_thread() const rsx::io_buffer src_buf = std::span(pixeldata); // 1D - auto tex1D = std::make_unique(GL_TEXTURE_1D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); + auto tex1D = std::make_unique(GL_TEXTURE_1D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); tex1D->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); // 2D - auto tex2D = std::make_unique(GL_TEXTURE_2D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); + auto tex2D = std::make_unique(GL_TEXTURE_2D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); tex2D->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); // 3D - auto tex3D = std::make_unique(GL_TEXTURE_3D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); + auto tex3D = std::make_unique(GL_TEXTURE_3D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); tex3D->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); // CUBE - auto texCUBE = std::make_unique(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); + auto texCUBE = std::make_unique(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); texCUBE->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); m_null_textures[GL_TEXTURE_1D] = std::move(tex1D); @@ -1066,7 +1065,7 @@ void GLGSRender::load_program_env() if (m_interpreter_state & rsx::fragment_program_dirty) { // Attach fragment buffer data - const auto fp_block_length = current_fp_metadata.program_ucode_length + 80; + const auto fp_block_length = current_fp_metadata.program_ucode_length + 16; auto fp_mapping = m_fragment_instructions_buffer->alloc_from_heap(fp_block_length, 16); auto fp_buf = static_cast(fp_mapping.first); @@ -1074,11 +1073,9 @@ void GLGSRender::load_program_env() const auto control_masks = reinterpret_cast(fp_buf); control_masks[0] = rsx::method_registers.shader_control(); control_masks[1] = current_fragment_program.texture_state.texture_dimensions; + control_masks[2] = current_fp_metadata.referenced_textures_mask; - // Bind textures - m_shader_interpreter.update_fragment_textures(fs_sampler_state, current_fp_metadata.referenced_textures_mask, reinterpret_cast(fp_buf + 16)); - - std::memcpy(fp_buf + 80, current_fragment_program.get_data(), current_fragment_program.ucode_length); + std::memcpy(fp_buf + 16, current_fragment_program.get_data(), current_fragment_program.ucode_length); m_fragment_instructions_buffer->bind_range(GL_INTERPRETER_FRAGMENT_BLOCK, fp_mapping.second, fp_block_length); m_fragment_instructions_buffer->notify(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 779519fee7..330838f804 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -147,7 +147,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control shared_mutex m_sampler_mutex; atomic_t m_samplers_dirty = {true}; - std::unordered_map> m_null_textures; + std::unordered_map> m_null_textures; rsx::simple_array m_scratch_buffer; // Occlusion query type, can be SAMPLES_PASSED or ANY_SAMPLES_PASSED @@ -183,6 +183,7 @@ private: void load_texture_env(); void bind_texture_env(); + void bind_interpreter_texture_env(); gl::texture* get_present_source(gl::present_surface_info* info, const rsx::avconf& avconfig); diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp index fc575ffc5c..b3c382e657 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp @@ -15,39 +15,6 @@ namespace gl using enum program_common::interpreter::compiler_option; using enum program_common::interpreter::cached_pipeline_flags; - namespace interpreter - { - void texture_pool_allocator::create(::glsl::program_domain domain) - { - GLenum pname; - switch (domain) - { - default: - rsx_log.fatal("Unexpected program domain %d", static_cast(domain)); - [[fallthrough]]; - case ::glsl::program_domain::glsl_vertex_program: - pname = GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS; break; - case ::glsl::program_domain::glsl_fragment_program: - pname = GL_MAX_TEXTURE_IMAGE_UNITS; break; - } - - glGetIntegerv(pname, &max_image_units); - } - - void texture_pool_allocator::allocate(int size) - { - if ((used + size) > max_image_units) - { - rsx_log.fatal("Out of image binding slots!"); - } - - used += size; - texture_pool pool; - pool.pool_size = size; - pools.push_back(pool); - } - } - void shader_interpreter::create(rsx::shader_loading_dialog* dlg) { dlg->create("Precompiling interpreter variants.\nPlease wait...", "Shader Compilation"); @@ -109,7 +76,6 @@ namespace gl auto data = new interpreter::cached_program(); data->flags = base_pipeline->second->flags | CACHED_PIPE_UNOPTIMIZED; data->build_compiler_options = base_pipeline->second->build_compiler_options; - data->allocator = base_pipeline->second->allocator; data->vertex_shader = base_pipeline->second->vertex_shader; data->fragment_shader = base_pipeline->second->fragment_shader; data->prog = base_pipeline->second->prog; @@ -308,42 +274,6 @@ namespace gl void shader_interpreter::build_fs(u64 compiler_options, interpreter::cached_program& prog_data) { - // Allocate TIUs - auto& allocator = prog_data.allocator; - if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES) - { - allocator.create(::glsl::program_domain::glsl_fragment_program); - if (allocator.max_image_units >= 32) - { - // 16 + 4 + 4 + 4 - allocator.allocate(4); // 1D - allocator.allocate(16); // 2D - allocator.allocate(4); // CUBE - allocator.allocate(4); // 3D - } - else if (allocator.max_image_units >= 24) - { - // 16 + 4 + 2 + 2 - allocator.allocate(2); // 1D - allocator.allocate(16); // 2D - allocator.allocate(2); // CUBE - allocator.allocate(4); // 3D - } - else if (allocator.max_image_units >= 16) - { - // 10 + 2 + 2 + 2 - allocator.allocate(2); // 1D - allocator.allocate(10); // 2D - allocator.allocate(2); // CUBE - allocator.allocate(2); // 3D - } - else - { - // Unusable - rsx_log.fatal("Failed to allocate enough TIUs for shader interpreter."); - } - } - // Cache lookup compiler_options &= COMPILER_OPT_ALL_FS_MASK; { @@ -366,7 +296,7 @@ namespace gl std::stringstream builder; builder << "#version 450\n" - "//#extension GL_ARB_bindless_texture : require\n\n"; + "#extension GL_ARB_bindless_texture : require\n\n"; ::glsl::insert_subheader_block(builder); comp.insertConstants(builder); @@ -438,17 +368,17 @@ namespace gl const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" }; for (int i = 0; i < 4; ++i) { - builder << "uniform " << type_names[i] << " " << type_names[i] << "_array[" << allocator.pools[i].pool_size << "];\n"; + builder << "layout(bindless_sampler) uniform " << type_names[i] << " " << type_names[i] << "_array[16];\n"; } builder << "\n" "#undef TEX_PARAM\n" "#define TEX_PARAM(index) texture_parameters[index + texture_base_index]\n" - "#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n" - "#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n" - "#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n" - "#define SAMPLER3D(index) sampler3D_array[texture_handles[index]]\n" - "#define SAMPLERCUBE(index) samplerCube_array[texture_handles[index]]\n\n"; + "#define IS_TEXTURE_RESIDENT(index) TEST_BIT(textures_resident, int(index))\n" + "#define SAMPLER1D(index) sampler1D_array[index]\n" + "#define SAMPLER2D(index) sampler2D_array[index]\n" + "#define SAMPLER3D(index) sampler3D_array[index]\n" + "#define SAMPLERCUBE(index) samplerCube_array[index]\n\n"; } else if (compiler_options) { @@ -460,9 +390,8 @@ namespace gl "{\n" " uint shader_control;\n" " uint texture_control;\n" - " uint reserved1;\n" + " uint textures_resident;\n" " uint reserved2;\n" - " uint texture_handles[16];\n" " uvec4 fp_instructions[];\n" "};\n\n"; @@ -547,19 +476,7 @@ namespace gl if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES) { // Initialize texture bindings - int assigned = 0; - auto& allocator = data->allocator; - const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" }; - - for (int i = 0; i < 4; ++i) - { - for (int j = 0; j < allocator.pools[i].pool_size; ++j) - { - allocator.pools[i].allocate(assigned++); - } - - data->prog->uniforms[type_names[i]] = allocator.pools[i].allocated; - } + flush_texture_bindings(data->prog.get()); } data->flags &= ~CACHED_PIPE_UNINITIALIZED; @@ -578,99 +495,27 @@ namespace gl return (m_current_interpreter && program == m_current_interpreter->prog.get()); } - void shader_interpreter::update_fragment_textures( - const std::array, 16>& descriptors, - u16 reference_mask, u32* out) + void shader_interpreter::bind_fragment_texture(int i, handle64_t handle, const rsx::sampled_image_descriptor_base& descriptor) { - if (reference_mask == 0 || !m_current_interpreter) + m_texture_bindings.get(descriptor.image_type)[i] = handle; + } + + void shader_interpreter::flush_texture_bindings(glsl::program* program) + { + using enum rsx::texture_dimension_extended; + + if (!program) { - return; + ensure(m_current_interpreter); + program = m_current_interpreter->prog.get(); } - // Reset allocation - auto& allocator = m_current_interpreter->allocator; - for (unsigned i = 0; i < 4; ++i) + const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" }; + const rsx::texture_dimension_extended types[] = { texture_dimension_1d, texture_dimension_2d, texture_dimension_cubemap, texture_dimension_3d }; + + for (int i = 0; i < 4; ++i) { - allocator.pools[i].num_used = 0; - allocator.pools[i].flags = 0; + program->uniforms[type_names[i]] = m_texture_bindings.get(types[i]); } - - rsx::simple_array> replacement_map; - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) - { - if (reference_mask & (1 << i)) - { - auto sampler_state = static_cast(descriptors[i].get()); - ensure(sampler_state); - - int pool_id = static_cast(sampler_state->image_type); - auto& pool = allocator.pools[pool_id]; - - const int old = pool.allocated[pool.num_used]; - if (!pool.allocate(i)) - { - rsx_log.error("Could not allocate texture resource for shader interpreter."); - break; - } - - out[i] = (pool.num_used - 1); - if (old != i) - { - // Check if the candidate target has also been replaced - bool found = false; - for (auto& e : replacement_map) - { - if (e.second == old) - { - // This replacement consumed this 'old' value - e.second = i; - found = true; - break; - } - } - - if (!found) - { - replacement_map.push_back({ old, i }); - } - } - } - else - { - out[i] = 0xFF; - } - } - - // Bind TIU locations - if (replacement_map.empty()) [[likely]] - { - return; - } - - // Overlapping texture bindings are trouble. Cannot bind one TIU to two types of samplers simultaneously - for (unsigned i = 0; i < replacement_map.size(); ++i) - { - for (int j = 0; j < 4; ++j) - { - auto& pool = allocator.pools[j]; - for (int k = pool.num_used; k < pool.pool_size; ++k) - { - if (pool.allocated[k] == replacement_map[i].second) - { - pool.allocated[k] = replacement_map[i].first; - pool.flags |= static_cast(interpreter::texture_pool_flags::dirty); - - // Exit nested loop - j = 4; - break; - } - } - } - } - - if (allocator.pools[0].flags) m_current_interpreter->prog->uniforms["sampler1D_array"] = allocator.pools[0].allocated; - if (allocator.pools[1].flags) m_current_interpreter->prog->uniforms["sampler2D_array"] = allocator.pools[1].allocated; - if (allocator.pools[2].flags) m_current_interpreter->prog->uniforms["samplerCube_array"] = allocator.pools[2].allocated; - if (allocator.pools[3].flags) m_current_interpreter->prog->uniforms["sampler3D_array"] = allocator.pools[3].allocated; } } diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h index ccfcde30e1..437cbb43cf 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h @@ -53,14 +53,29 @@ namespace gl } }; - struct texture_pool_allocator + struct bindless_textures_t { - int max_image_units = 0; - int used = 0; - std::vector pools; + std::array sampler1D; + std::array sampler2D; + std::array sampler3D; + std::array samplerCUBE; - void create(::glsl::program_domain domain); - void allocate(int size); + std::span get(rsx::texture_dimension_extended type) + { + using enum rsx::texture_dimension_extended; + switch (type) + { + default: + case texture_dimension_2d: + return sampler2D; + case texture_dimension_cubemap: + return samplerCUBE; + case texture_dimension_1d: + return sampler1D; + case texture_dimension_3d: + return sampler3D; + } + } }; struct cached_program @@ -69,12 +84,11 @@ namespace gl // Compiler options mask - May not always match the storage compiler options in case of compatible pipelines // However the storage mask must be a subset of this options mask - u32 build_compiler_options = 0; + u64 build_compiler_options = 0; std::shared_ptr vertex_shader; std::shared_ptr fragment_shader; std::shared_ptr prog; - texture_pool_allocator allocator; }; } @@ -92,6 +106,9 @@ namespace gl shader_cache_t m_fs_cache; pipeline_cache_t m_program_cache; + // Texture binding information. + interpreter::bindless_textures_t m_texture_bindings{}; + void build_vs(u64 compiler_options, interpreter::cached_program& prog_data); void build_fs(u64 compiler_options, interpreter::cached_program& prog_data); @@ -103,10 +120,17 @@ namespace gl std::shared_ptr m_current_interpreter; public: + shader_interpreter() + { + std::memset(&m_texture_bindings, 0, sizeof(m_texture_bindings)); + } + void create(rsx::shader_loading_dialog* dlg); void destroy(); - void update_fragment_textures(const std::array, 16>& descriptors, u16 reference_mask, u32* out); + // Update texture bindings based on the incoming descriptor structures + void bind_fragment_texture(int i, handle64_t handle, const rsx::sampled_image_descriptor_base& descriptor); + void flush_texture_bindings(glsl::program* program = nullptr); glsl::program* get(const interpreter::program_metadata& fp_metadata, u32 vp_ctrl, u32 fp_ctrl); bool is_interpreter(const glsl::program* program) const;