diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 385aa0279e..f4933c7fc8 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -34,6 +34,7 @@ namespace rsx texture_upload_context upload_context = texture_upload_context::shader_read; rsx::texture_dimension_extended image_type = texture_dimension_extended::texture_dimension_2d; bool is_depth_texture = false; + bool is_cyclic_reference = false; f32 scale_x = 1.f; f32 scale_y = 1.f; diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 36a615154c..fb26a14478 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -213,11 +213,13 @@ namespace rsx sampled_image_descriptor() {} - sampled_image_descriptor(image_view_type handle, texture_upload_context ctx, bool is_depth, f32 x_scale, f32 y_scale, rsx::texture_dimension_extended type) + sampled_image_descriptor(image_view_type handle, texture_upload_context ctx, bool is_depth, + f32 x_scale, f32 y_scale, rsx::texture_dimension_extended type, bool cyclic_reference = false) { image_handle = handle; upload_context = ctx; is_depth_texture = is_depth; + is_cyclic_reference = cyclic_reference; scale_x = x_scale; scale_y = y_scale; image_type = type; @@ -1981,7 +1983,7 @@ namespace rsx } return{ texptr->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage, - is_depth, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d }; + is_depth, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, assume_bound }; } const auto scaled_w = rsx::apply_resolution_scale(internal_width, true); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 2ded343082..f2f1ed60f9 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1267,7 +1267,7 @@ void VKGSRender::update_draw_state() void VKGSRender::begin_render_pass() { - if (render_pass_open) + if (m_render_pass_open) return; VkRenderPassBeginInfo rp_begin = {}; @@ -1280,16 +1280,16 @@ void VKGSRender::begin_render_pass() rp_begin.renderArea.extent.height = m_draw_fbo->height(); vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - render_pass_open = true; + m_render_pass_open = true; } void VKGSRender::close_render_pass() { - if (!render_pass_open) + if (!m_render_pass_open) return; vkCmdEndRenderPass(*m_current_command_buffer); - render_pass_open = false; + m_render_pass_open = false; } void VKGSRender::emit_geometry(u32 sub_index) @@ -1497,6 +1497,9 @@ void VKGSRender::end() surface_store_tag = m_rtts.cache_tag; } + const bool check_for_cyclic_refs = m_render_pass_is_cyclic; + m_render_pass_is_cyclic = false; + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { if (!fs_sampler_state[i]) @@ -1608,6 +1611,7 @@ void VKGSRender::end() } m_textures_dirty[i] = false; + m_render_pass_is_cyclic |= sampler_state->is_cyclic_reference; } } @@ -1656,10 +1660,32 @@ void VKGSRender::end() *sampler_state = {}; m_vertex_textures_dirty[i] = false; + m_render_pass_is_cyclic |= sampler_state->is_cyclic_reference; } } m_samplers_dirty.store(false); + + if (check_for_cyclic_refs && !m_render_pass_is_cyclic) + { + // Reverse texture barriers for optimal performance + for (unsigned i = m_rtts.m_bound_render_targets_config.first, count = 0; + count < m_rtts.m_bound_render_targets_config.second; + ++i, ++count) + { + if (auto surface = m_rtts.m_bound_render_targets[i].second; + surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) + { + surface->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + } + } + + if (auto surface = m_rtts.m_bound_depth_stencil.second; + surface && surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) + { + surface->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + } + } } std::chrono::time_point textures_end = steady_clock::now(); @@ -1707,7 +1733,7 @@ void VKGSRender::end() if (LIKELY(view)) { - m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, i, ::glsl::program_domain::glsl_fragment_program, m_current_frame->descriptor_set); @@ -1729,7 +1755,7 @@ void VKGSRender::end() VK_BORDER_COLOR_INT_OPAQUE_BLACK); } - m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, i, ::glsl::program_domain::glsl_fragment_program, m_current_frame->descriptor_set, @@ -1785,7 +1811,7 @@ void VKGSRender::end() continue; } - m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, i, ::glsl::program_domain::glsl_vertex_program, m_current_frame->descriptor_set); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 58d521cf30..9aba5bd3b6 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -396,7 +396,8 @@ private: std::atomic m_last_sync_event = { 0 }; - bool render_pass_open = false; + bool m_render_pass_open = false; + bool m_render_pass_is_cyclic = false; size_t m_current_renderpass_id = 0; //Vertex layout diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 1b8d8aea83..fda79477fa 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -479,22 +479,46 @@ namespace vk { case VK_IMAGE_LAYOUT_GENERAL: // Avoid this layout as it is unoptimized - barrier.srcAccessMask = + if (new_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL || + new_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { - VK_ACCESS_TRANSFER_READ_BIT | - VK_ACCESS_TRANSFER_WRITE_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_SHADER_READ_BIT | - VK_ACCESS_INPUT_ATTACHMENT_READ_BIT - }; - src_stage = + if (range.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) + { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + } + else + { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + } + } + else if (new_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL || + new_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT - }; + // Finish reading before writing + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT; + src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + else + { + barrier.srcAccessMask = + { + VK_ACCESS_TRANSFER_READ_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT + }; + src_stage = + { + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT + }; + } break; case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -540,43 +564,54 @@ namespace vk void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout layout, VkImageSubresourceRange range) { + // NOTE: Sampling from an attachment in ATTACHMENT_OPTIMAL layout on some hw ends up with garbage output + // Transition to GENERAL if this resource is both input and output + // TODO: This implicitly makes the target incompatible with the renderpass declaration; investigate a proper workaround + // TODO: This likely throws out hw optimizations on the rest of the renderpass, manage carefully + + VkAccessFlags src_access; + VkPipelineStageFlags src_stage; + if (range.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) + { + if (!rsx::method_registers.color_write_enabled() && layout == VK_IMAGE_LAYOUT_GENERAL) + { + // Nothing to do + return; + } + + src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + } + else + { + if (!rsx::method_registers.depth_write_enabled() && layout == VK_IMAGE_LAYOUT_GENERAL) + { + // Nothing to do + return; + } + + src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + } + VkImageMemoryBarrier barrier = {}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.newLayout = layout; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; barrier.oldLayout = layout; barrier.image = image; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.subresourceRange = range; + barrier.srcAccessMask = src_access; barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - VkPipelineStageFlags src_stage; - if (range.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) - { - barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - } - else - { - barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - } - vkCmdPipelineBarrier(cmd, src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); } void insert_texture_barrier(VkCommandBuffer cmd, vk::image *image) { - if (image->info.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) - { - VkImageAspectFlags aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - if (image->info.format != VK_FORMAT_D16_UNORM) aspect |= VK_IMAGE_ASPECT_STENCIL_BIT; - insert_texture_barrier(cmd, image->value, image->current_layout, { aspect, 0, 1, 0, 1 }); - } - else - { - insert_texture_barrier(cmd, image->value, image->current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); - } + insert_texture_barrier(cmd, image->value, image->current_layout, { image->aspect(), 0, 1, 0, 1 }); + image->current_layout = VK_IMAGE_LAYOUT_GENERAL; } void enter_uninterruptible() diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index d147ecaf9e..7b0db2cae1 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -69,23 +69,21 @@ namespace vk // Helper to optionally clear/initialize memory contents depending on barrier type auto clear_surface_impl = [&]() { + push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); VkImageSubresourceRange range{ aspect(), 0, 1, 0, 1 }; - const auto old_layout = current_layout; - - change_image_layout(cmd, this, VK_IMAGE_LAYOUT_GENERAL, range); if (aspect() & VK_IMAGE_ASPECT_COLOR_BIT) { VkClearColorValue color{}; - vkCmdClearColorImage(cmd, value, VK_IMAGE_LAYOUT_GENERAL, &color, 1, &range); + vkCmdClearColorImage(cmd, value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &color, 1, &range); } else { VkClearDepthStencilValue clear{ 1.f, 255 }; - vkCmdClearDepthStencilImage(cmd, value, VK_IMAGE_LAYOUT_GENERAL, &clear, 1, &range); + vkCmdClearDepthStencilImage(cmd, value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range); } - change_image_layout(cmd, this, old_layout, range); + pop_layout(cmd); state_flags &= ~rsx::surface_state_flags::erase_bkgnd; };