mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-01-02 06:40:20 +01:00
rsx/vk: Add post-cyclic-z barriers allowing us to keep early Z optimizations
This commit is contained in:
parent
fd3ecea21e
commit
ba673d1407
|
|
@ -40,6 +40,9 @@ namespace rsx
|
|||
|
||||
xform_instancing_state_dirty = (1 << 25), // Transform instancing state has changed
|
||||
|
||||
zeta_address_is_cyclic = (1 << 26), // The currently bound Z buffer is active for R/W in a cyclic manner
|
||||
zeta_address_cyclic_barrier = (1 << 27), // A memory barrier is required to "end" the Z buffer cyclic state
|
||||
|
||||
// TODO - Should signal that we simply need to do a FP compare before the next draw call and invalidate the ucode if the content has changed.
|
||||
// Marking as dirty to invalidate hot cache also works, it's not like there's tons of barriers per frame anyway.
|
||||
fragment_program_needs_rehash = fragment_program_ucode_dirty,
|
||||
|
|
|
|||
|
|
@ -1681,10 +1681,24 @@ namespace rsx
|
|||
return;
|
||||
}
|
||||
|
||||
auto set_zeta_write_enabled = [&](bool state)
|
||||
{
|
||||
if (state == m_framebuffer_layout.zeta_write_enabled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_graphics_state & rsx::zeta_address_is_cyclic)
|
||||
{
|
||||
m_graphics_state |= rsx::fragment_program_state_dirty;
|
||||
}
|
||||
m_framebuffer_layout.zeta_write_enabled = state;
|
||||
};
|
||||
|
||||
auto evaluate_depth_buffer_state = [&]()
|
||||
{
|
||||
m_framebuffer_layout.zeta_write_enabled =
|
||||
(rsx::method_registers.depth_test_enabled() && rsx::method_registers.depth_write_enabled());
|
||||
const bool zeta_write_en = (rsx::method_registers.depth_test_enabled() && rsx::method_registers.depth_write_enabled());
|
||||
set_zeta_write_enabled(zeta_write_en);
|
||||
};
|
||||
|
||||
auto evaluate_stencil_buffer_state = [&]()
|
||||
|
|
@ -1707,7 +1721,7 @@ namespace rsx
|
|||
rsx::method_registers.back_stencil_op_zfail() != rsx::stencil_op::keep);
|
||||
}
|
||||
|
||||
m_framebuffer_layout.zeta_write_enabled = (mask && active_write_op);
|
||||
set_zeta_write_enabled(mask && active_write_op);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -2110,6 +2124,9 @@ namespace rsx
|
|||
break;
|
||||
}
|
||||
|
||||
const bool zeta_was_cyclic = m_graphics_state & rsx::zeta_address_is_cyclic;
|
||||
m_graphics_state.clear(rsx::zeta_address_is_cyclic);
|
||||
|
||||
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
|
||||
{
|
||||
if (!(textures_ref & 1)) continue;
|
||||
|
|
@ -2242,11 +2259,17 @@ namespace rsx
|
|||
}
|
||||
|
||||
if (sampler_descriptors[i]->is_cyclic_reference &&
|
||||
!(current_fragment_program.ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD)) &&
|
||||
m_framebuffer_layout.zeta_address != 0 &&
|
||||
!g_cfg.video.strict_rendering_mode &&
|
||||
g_cfg.video.shader_precision != gpu_preset_level::low)
|
||||
{
|
||||
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_DISABLE_EARLY_Z;
|
||||
m_graphics_state |= rsx::zeta_address_is_cyclic;
|
||||
|
||||
if (!(current_fragment_program.ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD)) &&
|
||||
m_framebuffer_layout.zeta_write_enabled)
|
||||
{
|
||||
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_DISABLE_EARLY_Z;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!backend_config.supports_hw_renormalization /* &&
|
||||
|
|
@ -2340,6 +2363,13 @@ namespace rsx
|
|||
}
|
||||
|
||||
m_program_cache_hint.invalidate_fragment_program(current_fragment_program);
|
||||
|
||||
if (zeta_was_cyclic && zeta_was_cyclic != m_graphics_state.test(rsx::zeta_address_is_cyclic))
|
||||
{
|
||||
// Forced "fall-out" barrier. This is a special case for Z buffers because they can be cyclic without writes.
|
||||
// That condition can cause early-Z in a later call to introduce data hazard in previous cyclic draws.
|
||||
m_graphics_state |= rsx::zeta_address_cyclic_barrier;
|
||||
}
|
||||
}
|
||||
|
||||
bool thread::invalidate_fragment_program(u32 dst_dma, u32 dst_offset, u32 size)
|
||||
|
|
|
|||
|
|
@ -1044,6 +1044,14 @@ void VKGSRender::end()
|
|||
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil))
|
||||
{
|
||||
ds->write_barrier(*m_current_command_buffer);
|
||||
|
||||
if (m_graphics_state.test(rsx::zeta_address_cyclic_barrier) &&
|
||||
ds->current_layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)
|
||||
{
|
||||
// We actually need to end the subpass as a minimum. Without this, early-Z optimiazations in following draws will clobber reads from previous draws and cause flickering.
|
||||
// Since we're ending the subpass, might as well restore DCC/HiZ for extra performance
|
||||
ds->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &rtt : m_rtts.m_bound_render_targets)
|
||||
|
|
@ -1054,6 +1062,8 @@ void VKGSRender::end()
|
|||
}
|
||||
}
|
||||
|
||||
m_graphics_state.clear(rsx::zeta_address_cyclic_barrier);
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
// Now bind the shader resources. It is important that this takes place after the barriers so that we don't end up with stale descriptors
|
||||
|
|
|
|||
Loading…
Reference in a new issue