From 6e1436f1cd91beb9c4574bb954c587e7a491511f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 18 Jul 2025 02:28:26 +0300 Subject: [PATCH] vk: Start converting UBO bindings to SSBO indexed arrays --- rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp | 34 +++++++++---- rpcs3/Emu/RSX/Program/GLSLCommon.cpp | 7 +++ .../GLSLSnippets/RSXProg/RSXDefines2.glsl | 11 ++++ rpcs3/Emu/RSX/Program/GLSLTypes.h | 1 + rpcs3/Emu/RSX/VK/VKGSRender.cpp | 20 ++++---- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 50 +++++++++---------- 6 files changed, 76 insertions(+), 47 deletions(-) diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp index 97cf918e57..e026f9813d 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -587,6 +587,12 @@ namespace rsx void draw_command_processor::fill_user_clip_data(void* buffer) const { + if (REGS(m_ctx)->clip_planes_mask() == 0) [[ likely ]] + { + *reinterpret_cast(buffer) = 0u; + return; + } + const rsx::user_clip_plane_op clip_plane_control[6] = { REGS(m_ctx)->clip_plane_0_enabled(), @@ -597,11 +603,18 @@ namespace rsx REGS(m_ctx)->clip_plane_5_enabled(), }; - u8 data_block[64]; - s32* clip_enabled_flags = reinterpret_cast(data_block); - f32* clip_distance_factors = reinterpret_cast(data_block + 32); + /** + * We encode the clip configuration + * For each plane, we have 2 bits, encoding 0, 1, 2 + * 0 = LT + * 1 = EQ (Disabled) + * 2 = GT + */ + s32 clip_configuration_field = 0; - for (int index = 0; index < 6; ++index) +#define CLIP_DISTANCE_FACTOR(x) (x + 1) + + for (int index = 0, shift_offset = 0; index < 6; ++index, shift_offset += 2) { switch (clip_plane_control[index]) { @@ -610,23 +623,22 @@ namespace rsx [[fallthrough]]; case rsx::user_clip_plane_op::disable: - clip_enabled_flags[index] = 0; - clip_distance_factors[index] = 0.f; + clip_configuration_field |= CLIP_DISTANCE_FACTOR(0) << shift_offset; break; case rsx::user_clip_plane_op::greater_or_equal: - clip_enabled_flags[index] = 1; - clip_distance_factors[index] = 1.f; + clip_configuration_field |= CLIP_DISTANCE_FACTOR(1) << shift_offset; break; case rsx::user_clip_plane_op::less_than: - clip_enabled_flags[index] = 1; - clip_distance_factors[index] = -1.f; + clip_configuration_field |= CLIP_DISTANCE_FACTOR(2) << shift_offset; break; } } - memcpy(buffer, data_block, 2 * 8 * sizeof(u32)); +#undef CLIP_DISTANCE_FACTOR + + *reinterpret_cast(buffer) = clip_configuration_field; } /** diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index 0a1005eb46..f751711c43 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -184,6 +184,13 @@ namespace glsl OS << "#define _test_bit(x, y) (_get_bits(x, y, 1) != 0)\n"; OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n"; + if (props.require_clip_functions) + { + OS << + "#define CLIP_PLANE_DISABLED 1\n" + "#define is_user_clip_enabled(idx) (_get_bits(get_user_clip_config(), idx * 2, 2) == CLIP_PLANE_DISABLED)\n" + "#define user_clip_factor(idx) float(_get_bits(get_user_clip_config(), idx * 2, 2) - 1)\n\n"; + } if (props.domain == glsl::program_domain::glsl_fragment_program) { diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl index 2b07c25f6f..930ec553c7 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl @@ -11,4 +11,15 @@ struct sampler_info uint flags; // 48 }; +struct vertex_context_t +{ + mat4 scale_offset_mat; + uint user_clip_configuration_bits; + uint transform_branch_bits; + float point_size; + float z_near; + float z_far; + // float reserved[3]; +}; + )" diff --git a/rpcs3/Emu/RSX/Program/GLSLTypes.h b/rpcs3/Emu/RSX/Program/GLSLTypes.h index 27a1c1dec3..2e5fd49cd7 100644 --- a/rpcs3/Emu/RSX/Program/GLSLTypes.h +++ b/rpcs3/Emu/RSX/Program/GLSLTypes.h @@ -26,6 +26,7 @@ namespace glsl bool require_lit_emulation : 1; bool require_explicit_invariance : 1; bool require_instanced_render : 1; + bool require_clip_functions : 1; bool emulate_zclip_transform : 1; bool emulate_depth_clip_only : 1; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b2003f645c..ff60d3d3d1 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1924,6 +1924,8 @@ void VKGSRender::load_program_env() fmt::throw_exception("Unreachable right now"); } + const auto& ctx = REGS(m_ctx); + const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length; const bool is_interpreter = m_shader_interpreter.is_interpreter(m_program); @@ -1933,8 +1935,8 @@ void VKGSRender::load_program_env() const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty); const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty); const bool update_instruction_buffers = (!!m_interpreter_state && is_interpreter); - const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty)); - const bool update_instancing_data = rsx::method_registers.current_draw_clause.is_trivial_instanced_draw; + const bool update_raster_env = (ctx->polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty)); + const bool update_instancing_data = ctx->current_draw_clause.is_trivial_instanced_draw; if (update_vertex_env) { @@ -1944,10 +1946,10 @@ void VKGSRender::load_program_env() m_draw_processor.fill_scale_offset_data(buf, false); m_draw_processor.fill_user_clip_data(buf + 64); - *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 132)) = rsx::method_registers.point_size() * rsx::get_resolution_scale(); - *(reinterpret_cast(buf + 136)) = rsx::method_registers.clip_min(); - *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_max(); + *(reinterpret_cast(buf + 68)) = ctx->transform_branch_bits(); + *(reinterpret_cast(buf + 72)) = ctx->point_size() * rsx::get_resolution_scale(); + *(reinterpret_cast(buf + 76)) = ctx->clip_min(); + *(reinterpret_cast(buf + 80)) = ctx->clip_max(); m_vertex_env_ring_info.unmap(); m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 144 }; @@ -2046,7 +2048,7 @@ void VKGSRender::load_program_env() auto mem = m_raster_env_ring_info.static_alloc<256>(); auto buf = m_raster_env_ring_info.map(mem, 128); - std::memcpy(buf, rsx::method_registers.polygon_stipple_pattern(), 128); + std::memcpy(buf, ctx->polygon_stipple_pattern(), 128); m_raster_env_ring_info.unmap(); m_raster_env_buffer_info = { m_raster_env_ring_info.heap->value, mem, 128 }; @@ -2066,7 +2068,7 @@ void VKGSRender::load_program_env() vp_config[0] = current_vertex_program.base_address; vp_config[1] = current_vertex_program.entry; vp_config[2] = current_vertex_program.output_mask; - vp_config[3] = rsx::method_registers.two_side_light_en()? 1u: 0u; + vp_config[3] = ctx->two_side_light_en()? 1u: 0u; std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length); m_vertex_instructions_buffer.unmap(); @@ -2083,7 +2085,7 @@ void VKGSRender::load_program_env() // Control mask const auto control_masks = reinterpret_cast(fp_buf); - control_masks[0] = rsx::method_registers.shader_control(); + control_masks[0] = ctx->shader_control(); control_masks[1] = current_fragment_program.texture_state.texture_dimensions; std::memcpy(fp_buf + 16, current_fragment_program.get_data(), current_fragment_program.ucode_length); diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 8553b9bba0..256f2c5b71 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -77,17 +77,16 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) "#version 450\n\n" "#extension GL_ARB_separate_shader_objects : enable\n\n"; + glsl::insert_subheader_block(OS); + OS << "layout(std140, set=0, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform VertexContextBuffer\n" "{\n" - " mat4 scale_offset_mat;\n" - " ivec4 user_clip_enabled[2];\n" - " vec4 user_clip_factor[2];\n" - " uint transform_branch_bits;\n" - " float point_size;\n" - " float z_near;\n" - " float z_far;\n" - "};\n\n"; + " vertex_context_t vertex_contexts[];\n" + "};\n\n" + "" + "#define get_vertex_context() vertex_contexts[vtx_context_id]\n" + "#define get_user_clip_config() get_vertex_context().user_clip_configuration_bits\n\n"; const vk::glsl::program_input context_input { @@ -104,8 +103,10 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.cr_pred_buffer_location << ") readonly buffer EXT_Conditional_Rendering\n" "{\n" - " uint conditional_rendering_predicate;\n" - "};\n\n"; + " uint cr_predicates[];\n" + "};\n\n" + "" + "#define get_cr_predicate() cr_predicates[cr_predicate_id]\n\n"; const vk::glsl::program_input predicate_input { @@ -125,22 +126,16 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) " uint vertex_index_offset;\n" " uint draw_id;\n" " uint layout_ptr_offset;\n" - " uint xform_constants_offset;\n"; - - u32 push_constants_size = 5 * sizeof(u32); - if (m_device_props.emulate_conditional_rendering) - { - push_constants_size += sizeof(u32); - OS << " uint conditional_rendering_enabled;\n"; - } - - OS << "};\n\n"; + " uint vtx_constants_offset;\n" + " uint vtx_context_id;\n" + " uint cr_predicate_id;\n" + "};\n\n"; const vk::glsl::program_input push_constants { .domain = glsl::glsl_vertex_program, .type = vk::glsl::input_type_push_constant, - .bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = push_constants_size }, + .bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = 28 }, .set = vk::glsl::binding_set_index_vertex, .location = umax, .name = "push_constants_block" @@ -274,13 +269,13 @@ static const vertex_reg_info reg_table[] = { "spec_color1", true, "dst_reg4", "", false, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTSPECULAR | CELL_GCM_ATTRIB_OUTPUT_MASK_BACKSPECULAR }, { "fog_c", true, "dst_reg5", ".xxxx", true, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FOG }, //Warning: With spir-v if you declare clip distance var, you must assign a value even when its disabled! Runtime does not assign a default value - { "gl_ClipDistance[0]", false, "dst_reg5", ".y * user_clip_factor[0].x", false, "user_clip_enabled[0].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 }, - { "gl_ClipDistance[1]", false, "dst_reg5", ".z * user_clip_factor[0].y", false, "user_clip_enabled[0].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 }, - { "gl_ClipDistance[2]", false, "dst_reg5", ".w * user_clip_factor[0].z", false, "user_clip_enabled[0].z > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, + { "gl_ClipDistance[0]", false, "dst_reg5", ".y * user_clip_factor(0)", false, "is_user_clip_enabled(0)", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 }, + { "gl_ClipDistance[1]", false, "dst_reg5", ".z * user_clip_factor(1)", false, "is_user_clip_enabled(1)", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 }, + { "gl_ClipDistance[2]", false, "dst_reg5", ".w * user_clip_factor(2)", false, "is_user_clip_enabled(2)", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, { "gl_PointSize", false, "dst_reg6", ".x", false, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_POINTSIZE }, - { "gl_ClipDistance[3]", false, "dst_reg6", ".y * user_clip_factor[0].w", false, "user_clip_enabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 }, - { "gl_ClipDistance[4]", false, "dst_reg6", ".z * user_clip_factor[1].x", false, "user_clip_enabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 }, - { "gl_ClipDistance[5]", false, "dst_reg6", ".w * user_clip_factor[1].y", false, "user_clip_enabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, + { "gl_ClipDistance[3]", false, "dst_reg6", ".y * user_clip_factor(3)", false, "is_user_clip_enabled(3)", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 }, + { "gl_ClipDistance[4]", false, "dst_reg6", ".z * user_clip_factor(4)", false, "is_user_clip_enabled(4)", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 }, + { "gl_ClipDistance[5]", false, "dst_reg6", ".w * user_clip_factor(5)", false, "is_user_clip_enabled(5)", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, { "tc0", true, "dst_reg7", "", false, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX0 }, { "tc1", true, "dst_reg8", "", false, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX1 }, { "tc2", true, "dst_reg9", "", false, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX2 }, @@ -310,6 +305,7 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) glsl::shader_properties properties2{}; properties2.domain = glsl::glsl_vertex_program; properties2.require_lit_emulation = properties.has_lit_op; + properties2.require_clip_functions = true; properties2.emulate_zclip_transform = true; properties2.emulate_depth_clip_only = vk::g_render_device->get_shader_types_support().allow_float64; properties2.low_precision_tests = vk::is_NVIDIA(vk::get_driver_vendor());