rsx/vk: Add post-cyclic-z barriers allowing us to keep early Z optimizations

This commit is contained in:
kd-11 2025-12-26 15:23:54 +03:00 committed by kd-11
parent fd3ecea21e
commit ba673d1407
3 changed files with 48 additions and 5 deletions

View file

@ -40,6 +40,9 @@ namespace rsx
xform_instancing_state_dirty = (1 << 25), // Transform instancing state has changed
zeta_address_is_cyclic = (1 << 26), // The currently bound Z buffer is active for R/W in a cyclic manner
zeta_address_cyclic_barrier = (1 << 27), // A memory barrier is required to "end" the Z buffer cyclic state
// TODO - Should signal that we simply need to do a FP compare before the next draw call and invalidate the ucode if the content has changed.
// Marking as dirty to invalidate hot cache also works, it's not like there's tons of barriers per frame anyway.
fragment_program_needs_rehash = fragment_program_ucode_dirty,

View file

@ -1681,10 +1681,24 @@ namespace rsx
return;
}
auto set_zeta_write_enabled = [&](bool state)
{
if (state == m_framebuffer_layout.zeta_write_enabled)
{
return;
}
if (m_graphics_state & rsx::zeta_address_is_cyclic)
{
m_graphics_state |= rsx::fragment_program_state_dirty;
}
m_framebuffer_layout.zeta_write_enabled = state;
};
auto evaluate_depth_buffer_state = [&]()
{
m_framebuffer_layout.zeta_write_enabled =
(rsx::method_registers.depth_test_enabled() && rsx::method_registers.depth_write_enabled());
const bool zeta_write_en = (rsx::method_registers.depth_test_enabled() && rsx::method_registers.depth_write_enabled());
set_zeta_write_enabled(zeta_write_en);
};
auto evaluate_stencil_buffer_state = [&]()
@ -1707,7 +1721,7 @@ namespace rsx
rsx::method_registers.back_stencil_op_zfail() != rsx::stencil_op::keep);
}
m_framebuffer_layout.zeta_write_enabled = (mask && active_write_op);
set_zeta_write_enabled(mask && active_write_op);
}
};
@ -2110,6 +2124,9 @@ namespace rsx
break;
}
const bool zeta_was_cyclic = m_graphics_state & rsx::zeta_address_is_cyclic;
m_graphics_state.clear(rsx::zeta_address_is_cyclic);
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1)) continue;
@ -2242,11 +2259,17 @@ namespace rsx
}
if (sampler_descriptors[i]->is_cyclic_reference &&
!(current_fragment_program.ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD)) &&
m_framebuffer_layout.zeta_address != 0 &&
!g_cfg.video.strict_rendering_mode &&
g_cfg.video.shader_precision != gpu_preset_level::low)
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_DISABLE_EARLY_Z;
m_graphics_state |= rsx::zeta_address_is_cyclic;
if (!(current_fragment_program.ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD)) &&
m_framebuffer_layout.zeta_write_enabled)
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_DISABLE_EARLY_Z;
}
}
}
else if (!backend_config.supports_hw_renormalization /* &&
@ -2340,6 +2363,13 @@ namespace rsx
}
m_program_cache_hint.invalidate_fragment_program(current_fragment_program);
if (zeta_was_cyclic && zeta_was_cyclic != m_graphics_state.test(rsx::zeta_address_is_cyclic))
{
// Forced "fall-out" barrier. This is a special case for Z buffers because they can be cyclic without writes.
// That condition can cause early-Z in a later call to introduce data hazard in previous cyclic draws.
m_graphics_state |= rsx::zeta_address_cyclic_barrier;
}
}
bool thread::invalidate_fragment_program(u32 dst_dma, u32 dst_offset, u32 size)

View file

@ -1044,6 +1044,14 @@ void VKGSRender::end()
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil))
{
ds->write_barrier(*m_current_command_buffer);
if (m_graphics_state.test(rsx::zeta_address_cyclic_barrier) &&
ds->current_layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)
{
// We actually need to end the subpass as a minimum. Without this, early-Z optimiazations in following draws will clobber reads from previous draws and cause flickering.
// Since we're ending the subpass, might as well restore DCC/HiZ for extra performance
ds->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
}
}
for (auto &rtt : m_rtts.m_bound_render_targets)
@ -1054,6 +1062,8 @@ void VKGSRender::end()
}
}
m_graphics_state.clear(rsx::zeta_address_cyclic_barrier);
m_frame_stats.setup_time += m_profiler.duration();
// Now bind the shader resources. It is important that this takes place after the barriers so that we don't end up with stale descriptors