mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-30 20:34:45 +01:00
- Use CONCURRENT queue access instead of fighting with queue acquire/release via submit chains. The minor benefits of forcing EXCLUSIVE mode are buried under the huge penalty of multiple vkQueueSubmit. Batching submits does not help alleviate this situation. We simply must avoid interrupting execution.
2815 lines
96 KiB
C++
2815 lines
96 KiB
C++
#include "stdafx.h"
|
|
#include "../Overlays/overlay_shader_compile_notification.h"
|
|
#include "../Overlays/Shaders/shader_loading_dialog_native.h"
|
|
|
|
#include "VKAsyncScheduler.h"
|
|
#include "VKCommandStream.h"
|
|
#include "VKCommonDecompiler.h"
|
|
#include "VKCompute.h"
|
|
#include "VKGSRender.h"
|
|
#include "VKHelpers.h"
|
|
#include "VKRenderPass.h"
|
|
#include "VKResourceManager.h"
|
|
|
|
#include "vkutils/buffer_object.h"
|
|
#include "vkutils/scratch.h"
|
|
|
|
#include "Emu/RSX/rsx_methods.h"
|
|
#include "Emu/Memory/vm_locking.h"
|
|
|
|
#include "../Program/program_state_cache2.hpp"
|
|
|
|
#include "util/asm.hpp"
|
|
|
|
namespace vk
|
|
{
|
|
VkCompareOp get_compare_func(rsx::comparison_function op, bool reverse_direction = false);
|
|
|
|
std::pair<VkFormat, VkComponentMapping> get_compatible_surface_format(rsx::surface_color_format color_format)
|
|
{
|
|
const VkComponentMapping o_rgb = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE };
|
|
const VkComponentMapping z_rgb = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ZERO };
|
|
|
|
switch (color_format)
|
|
{
|
|
#ifndef __APPLE__
|
|
case rsx::surface_color_format::r5g6b5:
|
|
return std::make_pair(VK_FORMAT_R5G6B5_UNORM_PACK16, vk::default_component_map);
|
|
|
|
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
|
|
return std::make_pair(VK_FORMAT_A1R5G5B5_UNORM_PACK16, o_rgb);
|
|
|
|
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
|
|
return std::make_pair(VK_FORMAT_A1R5G5B5_UNORM_PACK16, z_rgb);
|
|
#else
|
|
// assign B8G8R8A8_UNORM to formats that are not supported by Metal
|
|
case rsx::surface_color_format::r5g6b5:
|
|
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map);
|
|
|
|
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
|
|
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, o_rgb);
|
|
|
|
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
|
|
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, z_rgb);
|
|
#endif
|
|
case rsx::surface_color_format::a8r8g8b8:
|
|
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map);
|
|
|
|
case rsx::surface_color_format::a8b8g8r8:
|
|
return std::make_pair(VK_FORMAT_R8G8B8A8_UNORM, vk::default_component_map);
|
|
|
|
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
|
|
return std::make_pair(VK_FORMAT_R8G8B8A8_UNORM, o_rgb);
|
|
|
|
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
|
|
return std::make_pair(VK_FORMAT_R8G8B8A8_UNORM, z_rgb);
|
|
|
|
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
|
|
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, z_rgb);
|
|
|
|
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
|
|
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, o_rgb);
|
|
|
|
case rsx::surface_color_format::w16z16y16x16:
|
|
return std::make_pair(VK_FORMAT_R16G16B16A16_SFLOAT, vk::default_component_map);
|
|
|
|
case rsx::surface_color_format::w32z32y32x32:
|
|
return std::make_pair(VK_FORMAT_R32G32B32A32_SFLOAT, vk::default_component_map);
|
|
|
|
case rsx::surface_color_format::b8:
|
|
{
|
|
const VkComponentMapping no_alpha = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE };
|
|
return std::make_pair(VK_FORMAT_R8_UNORM, no_alpha);
|
|
}
|
|
|
|
case rsx::surface_color_format::g8b8:
|
|
{
|
|
const VkComponentMapping gb_rg = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G };
|
|
return std::make_pair(VK_FORMAT_R8G8_UNORM, gb_rg);
|
|
}
|
|
|
|
case rsx::surface_color_format::x32:
|
|
{
|
|
const VkComponentMapping rrrr = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R };
|
|
return std::make_pair(VK_FORMAT_R32_SFLOAT, rrrr);
|
|
}
|
|
|
|
default:
|
|
rsx_log.error("Surface color buffer: Unsupported surface color format (0x%x)", static_cast<u32>(color_format));
|
|
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map);
|
|
}
|
|
}
|
|
|
|
VkLogicOp get_logic_op(rsx::logic_op op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case rsx::logic_op::logic_clear: return VK_LOGIC_OP_CLEAR;
|
|
case rsx::logic_op::logic_and: return VK_LOGIC_OP_AND;
|
|
case rsx::logic_op::logic_and_reverse: return VK_LOGIC_OP_AND_REVERSE;
|
|
case rsx::logic_op::logic_copy: return VK_LOGIC_OP_COPY;
|
|
case rsx::logic_op::logic_and_inverted: return VK_LOGIC_OP_AND_INVERTED;
|
|
case rsx::logic_op::logic_noop: return VK_LOGIC_OP_NO_OP;
|
|
case rsx::logic_op::logic_xor: return VK_LOGIC_OP_XOR;
|
|
case rsx::logic_op::logic_or : return VK_LOGIC_OP_OR;
|
|
case rsx::logic_op::logic_nor: return VK_LOGIC_OP_NOR;
|
|
case rsx::logic_op::logic_equiv: return VK_LOGIC_OP_EQUIVALENT;
|
|
case rsx::logic_op::logic_invert: return VK_LOGIC_OP_INVERT;
|
|
case rsx::logic_op::logic_or_reverse: return VK_LOGIC_OP_OR_REVERSE;
|
|
case rsx::logic_op::logic_copy_inverted: return VK_LOGIC_OP_COPY_INVERTED;
|
|
case rsx::logic_op::logic_or_inverted: return VK_LOGIC_OP_OR_INVERTED;
|
|
case rsx::logic_op::logic_nand: return VK_LOGIC_OP_NAND;
|
|
case rsx::logic_op::logic_set: return VK_LOGIC_OP_SET;
|
|
default:
|
|
fmt::throw_exception("Unknown logic op 0x%x", static_cast<u32>(op));
|
|
}
|
|
}
|
|
|
|
VkBlendFactor get_blend_factor(rsx::blend_factor factor)
|
|
{
|
|
switch (factor)
|
|
{
|
|
case rsx::blend_factor::one: return VK_BLEND_FACTOR_ONE;
|
|
case rsx::blend_factor::zero: return VK_BLEND_FACTOR_ZERO;
|
|
case rsx::blend_factor::src_alpha: return VK_BLEND_FACTOR_SRC_ALPHA;
|
|
case rsx::blend_factor::dst_alpha: return VK_BLEND_FACTOR_DST_ALPHA;
|
|
case rsx::blend_factor::src_color: return VK_BLEND_FACTOR_SRC_COLOR;
|
|
case rsx::blend_factor::dst_color: return VK_BLEND_FACTOR_DST_COLOR;
|
|
case rsx::blend_factor::constant_color: return VK_BLEND_FACTOR_CONSTANT_COLOR;
|
|
case rsx::blend_factor::constant_alpha: return VK_BLEND_FACTOR_CONSTANT_ALPHA;
|
|
case rsx::blend_factor::one_minus_src_color: return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR;
|
|
case rsx::blend_factor::one_minus_dst_color: return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
|
|
case rsx::blend_factor::one_minus_src_alpha: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
|
|
case rsx::blend_factor::one_minus_dst_alpha: return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA;
|
|
case rsx::blend_factor::one_minus_constant_alpha: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA;
|
|
case rsx::blend_factor::one_minus_constant_color: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR;
|
|
case rsx::blend_factor::src_alpha_saturate: return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE;
|
|
default:
|
|
fmt::throw_exception("Unknown blend factor 0x%x", static_cast<u32>(factor));
|
|
}
|
|
}
|
|
|
|
VkBlendOp get_blend_op(rsx::blend_equation op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case rsx::blend_equation::add_signed:
|
|
rsx_log.trace("blend equation add_signed used. Emulating using FUNC_ADD");
|
|
[[fallthrough]];
|
|
case rsx::blend_equation::add:
|
|
return VK_BLEND_OP_ADD;
|
|
case rsx::blend_equation::substract: return VK_BLEND_OP_SUBTRACT;
|
|
case rsx::blend_equation::reverse_substract_signed:
|
|
rsx_log.trace("blend equation reverse_subtract_signed used. Emulating using FUNC_REVERSE_SUBTRACT");
|
|
[[fallthrough]];
|
|
case rsx::blend_equation::reverse_substract: return VK_BLEND_OP_REVERSE_SUBTRACT;
|
|
case rsx::blend_equation::min: return VK_BLEND_OP_MIN;
|
|
case rsx::blend_equation::max: return VK_BLEND_OP_MAX;
|
|
default:
|
|
fmt::throw_exception("Unknown blend op: 0x%x", static_cast<u32>(op));
|
|
}
|
|
}
|
|
|
|
VkStencilOp get_stencil_op(rsx::stencil_op op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case rsx::stencil_op::keep: return VK_STENCIL_OP_KEEP;
|
|
case rsx::stencil_op::zero: return VK_STENCIL_OP_ZERO;
|
|
case rsx::stencil_op::replace: return VK_STENCIL_OP_REPLACE;
|
|
case rsx::stencil_op::incr: return VK_STENCIL_OP_INCREMENT_AND_CLAMP;
|
|
case rsx::stencil_op::decr: return VK_STENCIL_OP_DECREMENT_AND_CLAMP;
|
|
case rsx::stencil_op::invert: return VK_STENCIL_OP_INVERT;
|
|
case rsx::stencil_op::incr_wrap: return VK_STENCIL_OP_INCREMENT_AND_WRAP;
|
|
case rsx::stencil_op::decr_wrap: return VK_STENCIL_OP_DECREMENT_AND_WRAP;
|
|
default:
|
|
fmt::throw_exception("Unknown stencil op: 0x%x", static_cast<u32>(op));
|
|
}
|
|
}
|
|
|
|
VkFrontFace get_front_face(rsx::front_face ffv)
|
|
{
|
|
switch (ffv)
|
|
{
|
|
case rsx::front_face::cw: return VK_FRONT_FACE_CLOCKWISE;
|
|
case rsx::front_face::ccw: return VK_FRONT_FACE_COUNTER_CLOCKWISE;
|
|
default:
|
|
fmt::throw_exception("Unknown front face value: 0x%x", static_cast<u32>(ffv));
|
|
}
|
|
}
|
|
|
|
VkCullModeFlags get_cull_face(rsx::cull_face cfv)
|
|
{
|
|
switch (cfv)
|
|
{
|
|
case rsx::cull_face::back: return VK_CULL_MODE_BACK_BIT;
|
|
case rsx::cull_face::front: return VK_CULL_MODE_FRONT_BIT;
|
|
case rsx::cull_face::front_and_back: return VK_CULL_MODE_FRONT_AND_BACK;
|
|
default:
|
|
fmt::throw_exception("Unknown cull face value: 0x%x", static_cast<u32>(cfv));
|
|
}
|
|
}
|
|
}
|
|
|
|
namespace
|
|
{
|
|
std::tuple<VkPipelineLayout, VkDescriptorSetLayout> get_shared_pipeline_layout(VkDevice dev)
|
|
{
|
|
const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table();
|
|
std::vector<VkDescriptorSetLayoutBinding> bindings(binding_table.total_descriptor_bindings);
|
|
|
|
usz idx = 0;
|
|
|
|
// Vertex stream, one stream for cacheable data, one stream for transient data
|
|
for (int i = 0; i < 3; i++)
|
|
{
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
|
bindings[idx].binding = binding_table.vertex_buffers_first_bind_slot + i;
|
|
idx++;
|
|
}
|
|
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
bindings[idx].binding = binding_table.fragment_constant_buffers_bind_slot;
|
|
|
|
idx++;
|
|
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
bindings[idx].binding = binding_table.fragment_state_bind_slot;
|
|
|
|
idx++;
|
|
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
bindings[idx].binding = binding_table.fragment_texture_params_bind_slot;
|
|
|
|
idx++;
|
|
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
|
bindings[idx].binding = binding_table.vertex_constant_buffers_bind_slot;
|
|
|
|
idx++;
|
|
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
|
|
bindings[idx].binding = binding_table.vertex_params_bind_slot;
|
|
|
|
idx++;
|
|
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
|
bindings[idx].binding = binding_table.conditional_render_predicate_slot;
|
|
|
|
idx++;
|
|
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
bindings[idx].binding = binding_table.rasterizer_env_bind_slot;
|
|
|
|
idx++;
|
|
|
|
for (auto binding = binding_table.textures_first_bind_slot;
|
|
binding < binding_table.vertex_textures_first_bind_slot;
|
|
binding++)
|
|
{
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
bindings[idx].binding = binding;
|
|
idx++;
|
|
}
|
|
|
|
for (int i = 0; i < rsx::limits::vertex_textures_count; i++)
|
|
{
|
|
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
bindings[idx].descriptorCount = 1;
|
|
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
|
bindings[idx].binding = binding_table.vertex_textures_first_bind_slot + i;
|
|
idx++;
|
|
}
|
|
|
|
ensure(idx == binding_table.total_descriptor_bindings);
|
|
|
|
std::array<VkPushConstantRange, 1> push_constants;
|
|
push_constants[0].offset = 0;
|
|
push_constants[0].size = 16;
|
|
push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
|
|
|
if (vk::emulate_conditional_rendering())
|
|
{
|
|
// Conditional render toggle
|
|
push_constants[0].size = 20;
|
|
}
|
|
|
|
const auto set_layout = vk::descriptors::create_layout(bindings);
|
|
|
|
VkPipelineLayoutCreateInfo layout_info = {};
|
|
layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
|
layout_info.setLayoutCount = 1;
|
|
layout_info.pSetLayouts = &set_layout;
|
|
layout_info.pushConstantRangeCount = 1;
|
|
layout_info.pPushConstantRanges = push_constants.data();
|
|
|
|
VkPipelineLayout result;
|
|
CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result));
|
|
return std::make_tuple(result, set_layout);
|
|
}
|
|
}
|
|
|
|
u64 VKGSRender::get_cycles()
|
|
{
|
|
return thread_ctrl::get_cycles(static_cast<named_thread<VKGSRender>&>(*this));
|
|
}
|
|
|
|
VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
|
|
{
|
|
if (m_instance.create("RPCS3"))
|
|
{
|
|
m_instance.bind();
|
|
}
|
|
else
|
|
{
|
|
rsx_log.fatal("Could not find a vulkan compatible GPU driver. Your GPU(s) may not support Vulkan, or you need to install the vulkan runtime and drivers");
|
|
m_device = VK_NULL_HANDLE;
|
|
return;
|
|
}
|
|
|
|
std::vector<vk::physical_device>& gpus = m_instance.enumerate_devices();
|
|
|
|
//Actually confirm that the loader found at least one compatible device
|
|
//This should not happen unless something is wrong with the driver setup on the target system
|
|
if (gpus.empty())
|
|
{
|
|
//We can't throw in Emulator::Load, so we show error and return
|
|
rsx_log.fatal("No compatible GPU devices found");
|
|
m_device = VK_NULL_HANDLE;
|
|
return;
|
|
}
|
|
|
|
bool gpu_found = false;
|
|
std::string adapter_name = g_cfg.video.vk.adapter;
|
|
|
|
display_handle_t display = m_frame->handle();
|
|
|
|
#ifdef HAVE_X11
|
|
std::visit([this](auto&& p) {
|
|
using T = std::decay_t<decltype(p)>;
|
|
if constexpr (std::is_same_v<T, std::pair<Display*, Window>>)
|
|
{
|
|
m_display_handle = p.first; XFlush(m_display_handle);
|
|
}
|
|
}, display);
|
|
#endif
|
|
|
|
for (auto &gpu : gpus)
|
|
{
|
|
if (gpu.get_name() == adapter_name)
|
|
{
|
|
m_swapchain.reset(m_instance.create_swapchain(display, gpu));
|
|
gpu_found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!gpu_found || adapter_name.empty())
|
|
{
|
|
m_swapchain.reset(m_instance.create_swapchain(display, gpus[0]));
|
|
}
|
|
|
|
if (!m_swapchain)
|
|
{
|
|
m_device = VK_NULL_HANDLE;
|
|
rsx_log.fatal("Could not successfully initialize a swapchain");
|
|
return;
|
|
}
|
|
|
|
m_device = const_cast<vk::render_device*>(&m_swapchain->get_device());
|
|
vk::set_current_renderer(m_swapchain->get_device());
|
|
|
|
m_swapchain_dims.width = m_frame->client_width();
|
|
m_swapchain_dims.height = m_frame->client_height();
|
|
|
|
if (!m_swapchain->init(m_swapchain_dims.width, m_swapchain_dims.height))
|
|
{
|
|
swapchain_unavailable = true;
|
|
}
|
|
|
|
//create command buffer...
|
|
m_command_buffer_pool.create((*m_device), m_device->get_graphics_queue_family());
|
|
m_primary_cb_list.create(m_command_buffer_pool, vk::command_buffer::access_type_hint::flush_only);
|
|
m_current_command_buffer = m_primary_cb_list.get();
|
|
m_current_command_buffer->begin();
|
|
|
|
//Create secondary command_buffer for parallel operations
|
|
m_secondary_command_buffer_pool.create((*m_device), m_device->get_graphics_queue_family());
|
|
m_secondary_cb_list.create(m_secondary_command_buffer_pool, vk::command_buffer::access_type_hint::all);
|
|
|
|
//Precalculated stuff
|
|
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
|
|
|
|
//Occlusion
|
|
m_occlusion_query_manager = std::make_unique<vk::query_pool_manager>(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE);
|
|
m_occlusion_map.resize(occlusion_query_count);
|
|
|
|
for (u32 n = 0; n < occlusion_query_count; ++n)
|
|
m_occlusion_query_data[n].driver_handle = n;
|
|
|
|
if (g_cfg.video.precise_zpass_count)
|
|
{
|
|
m_occlusion_query_manager->set_control_flags(VK_QUERY_CONTROL_PRECISE_BIT, 0);
|
|
}
|
|
|
|
// Generate frame contexts
|
|
const u32 max_draw_calls = m_device->get_descriptor_max_draw_calls();
|
|
const auto& binding_table = m_device->get_pipeline_binding_table();
|
|
const u32 num_fs_samplers = binding_table.vertex_textures_first_bind_slot - binding_table.textures_first_bind_slot;
|
|
|
|
std::vector<VkDescriptorPoolSize> sizes;
|
|
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * max_draw_calls });
|
|
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * max_draw_calls });
|
|
sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) * max_draw_calls });
|
|
|
|
// Conditional rendering predicate slot; refactor to allow skipping this when not needed
|
|
sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 * max_draw_calls });
|
|
|
|
VkSemaphoreCreateInfo semaphore_info = {};
|
|
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
|
|
|
// VRAM allocation
|
|
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000, VK_TRUE);
|
|
m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer");
|
|
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer");
|
|
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer");
|
|
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE);
|
|
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer");
|
|
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
|
|
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
|
|
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
|
|
m_raster_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "raster env buffer");
|
|
|
|
const auto shadermode = g_cfg.video.shadermode.get();
|
|
|
|
if (shadermode == shader_mode::async_with_interpreter || shadermode == shader_mode::interpreter_only)
|
|
{
|
|
m_vertex_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "vertex instructions buffer", 512 * 16);
|
|
m_fragment_instructions_buffer.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 64 * 0x100000, "fragment instructions buffer", 2048);
|
|
}
|
|
|
|
// Initialize optional allocation information with placeholders
|
|
m_raster_env_buffer_info = { m_raster_env_ring_info.heap->value, 0, 128 };
|
|
|
|
const auto limits = m_device->gpu().get_limits();
|
|
m_texbuffer_view_size = std::min(limits.maxTexelBufferElements, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000u);
|
|
|
|
if (m_texbuffer_view_size < 0x800000)
|
|
{
|
|
// Warn, only possibly expected on macOS
|
|
rsx_log.warning("Current driver may crash due to memory limitations (%uk)", m_texbuffer_view_size / 1024);
|
|
}
|
|
|
|
for (auto &ctx : frame_context_storage)
|
|
{
|
|
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_wait_semaphore);
|
|
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.acquire_signal_semaphore);
|
|
ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast<u32>(sizes.size()), max_draw_calls, 1);
|
|
}
|
|
|
|
const auto& memory_map = m_device->get_memory_mapping();
|
|
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, memory_map.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0, VMM_ALLOCATION_POOL_UNDEFINED);
|
|
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32);
|
|
|
|
vk::initialize_compiler_context();
|
|
vk::initialize_pipe_compiler(g_cfg.video.shader_compiler_threads_count);
|
|
|
|
m_prog_buffer = std::make_unique<vk::program_cache>
|
|
(
|
|
[this](const vk::pipeline_props& props, const RSXVertexProgram& vp, const RSXFragmentProgram& fp)
|
|
{
|
|
// Program was linked or queued for linking
|
|
m_shaders_cache->store(props, vp, fp);
|
|
}
|
|
);
|
|
|
|
if (g_cfg.video.disable_vertex_cache || g_cfg.video.multithreaded_rsx)
|
|
m_vertex_cache = std::make_unique<vk::null_vertex_cache>();
|
|
else
|
|
m_vertex_cache = std::make_unique<vk::weak_vertex_cache>();
|
|
|
|
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.93");
|
|
|
|
for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i)
|
|
{
|
|
const auto target_layout = m_swapchain->get_optimal_present_layout();
|
|
const auto target_image = m_swapchain->get_image(i);
|
|
VkClearColorValue clear_color{};
|
|
VkImageSubresourceRange range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
|
|
|
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range);
|
|
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range);
|
|
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, target_layout, range);
|
|
|
|
}
|
|
|
|
m_current_frame = &frame_context_storage[0];
|
|
|
|
m_texture_cache.initialize((*m_device), m_device->get_graphics_queue(),
|
|
m_texture_upload_buffer_ring_info);
|
|
|
|
vk::get_overlay_pass<vk::ui_overlay_renderer>()->init(*m_current_command_buffer, m_texture_upload_buffer_ring_info);
|
|
|
|
if (shadermode == shader_mode::async_with_interpreter || shadermode == shader_mode::interpreter_only)
|
|
{
|
|
m_shader_interpreter.init(*m_device);
|
|
}
|
|
|
|
backend_config.supports_multidraw = true;
|
|
|
|
// NOTE: We do not actually need multiple sample support for A2C to work
|
|
// This is here for visual consistency - will be removed when AA problems due to mipmaps are fixed
|
|
if (g_cfg.video.antialiasing_level != msaa_level::none)
|
|
{
|
|
backend_config.supports_hw_msaa = true;
|
|
backend_config.supports_hw_a2c = true;
|
|
backend_config.supports_hw_a2one = m_device->get_alpha_to_one_support();
|
|
}
|
|
|
|
// NOTE: On NVIDIA cards going back decades (including the PS3) there is a slight normalization inaccuracy in compressed formats.
|
|
// Confirmed in BLES01916 (The Evil Within) which uses RGB565 for some virtual texturing data.
|
|
backend_config.supports_hw_renormalization = (vk::get_driver_vendor() == vk::driver_vendor::NVIDIA);
|
|
|
|
// Conditional rendering support
|
|
backend_config.supports_hw_conditional_render = true;
|
|
|
|
// Passthrough DMA
|
|
backend_config.supports_passthrough_dma = m_device->get_external_memory_host_support();
|
|
|
|
// Host sync
|
|
backend_config.supports_host_gpu_labels = !!g_cfg.video.host_label_synchronization;
|
|
|
|
// Async compute and related operations
|
|
if (g_cfg.video.vk.asynchronous_texture_streaming)
|
|
{
|
|
// Optimistic, enable async compute
|
|
backend_config.supports_asynchronous_compute = true;
|
|
|
|
if (m_device->get_graphics_queue() == m_device->get_transfer_queue())
|
|
{
|
|
rsx_log.error("Cannot run graphics and async transfer in the same queue. Async uploads are disabled. This is a limitation of your GPU");
|
|
backend_config.supports_asynchronous_compute = false;
|
|
}
|
|
}
|
|
|
|
// Sanity checks
|
|
switch (vk::get_driver_vendor())
|
|
{
|
|
case vk::driver_vendor::NVIDIA:
|
|
if (backend_config.supports_asynchronous_compute)
|
|
{
|
|
if (auto chip_family = vk::get_chip_family();
|
|
chip_family == vk::chip_class::NV_kepler || chip_family == vk::chip_class::NV_maxwell)
|
|
{
|
|
rsx_log.warning("Older NVIDIA cards do not meet requirements for true asynchronous compute due to some driver fakery.");
|
|
}
|
|
|
|
rsx_log.notice("Forcing safe async compute for NVIDIA device to avoid crashing.");
|
|
g_cfg.video.vk.asynchronous_scheduler.set(vk_gpu_scheduler_mode::safe);
|
|
}
|
|
break;
|
|
#if !defined(_WIN32)
|
|
// Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations
|
|
case vk::driver_vendor::RADV:
|
|
case vk::driver_vendor::AMD:
|
|
#if !defined(__linux__)
|
|
// Intel chipsets would fail on BSD in most cases and DRM_IOCTL_i915_GEM_USERPTR unimplemented
|
|
case vk::driver_vendor::ANV:
|
|
#endif
|
|
if (backend_config.supports_passthrough_dma)
|
|
{
|
|
rsx_log.error("AMDGPU kernel driver on linux and INTEL driver on some platforms cannot support passthrough DMA buffers.");
|
|
backend_config.supports_passthrough_dma = false;
|
|
}
|
|
break;
|
|
#endif
|
|
case vk::driver_vendor::MVK:
|
|
// Async compute crashes immediately on Apple GPUs
|
|
rsx_log.error("Apple GPUs are incompatible with the current implementation of asynchronous texture decoding.");
|
|
backend_config.supports_asynchronous_compute = false;
|
|
break;
|
|
case vk::driver_vendor::INTEL:
|
|
// As expected host allocations won't work on INTEL despite the extension being present
|
|
if (backend_config.supports_passthrough_dma)
|
|
{
|
|
rsx_log.error("INTEL driver does not support passthrough DMA buffers");
|
|
backend_config.supports_passthrough_dma = false;
|
|
}
|
|
break;
|
|
default: break;
|
|
}
|
|
|
|
if (backend_config.supports_asynchronous_compute)
|
|
{
|
|
// Run only if async compute can be used.
|
|
g_fxo->init<vk::AsyncTaskScheduler>(g_cfg.video.vk.asynchronous_scheduler);
|
|
}
|
|
|
|
if (backend_config.supports_host_gpu_labels)
|
|
{
|
|
if (backend_config.supports_passthrough_dma)
|
|
{
|
|
m_host_object_data = std::make_unique<vk::buffer>(*m_device,
|
|
0x10000,
|
|
memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0,
|
|
VMM_ALLOCATION_POOL_SYSTEM);
|
|
|
|
m_host_data_ptr = new (m_host_object_data->map(0, 0x100000)) vk::host_data_t();
|
|
ensure(m_host_data_ptr->magic == 0xCAFEBABE);
|
|
}
|
|
else
|
|
{
|
|
rsx_log.error("Your GPU/driver does not support extensions required to enable passthrough DMA emulation. Host GPU labels will be disabled.");
|
|
backend_config.supports_host_gpu_labels = false;
|
|
}
|
|
}
|
|
|
|
if (!backend_config.supports_host_gpu_labels &&
|
|
!backend_config.supports_asynchronous_compute)
|
|
{
|
|
// Disable passthrough DMA unless we enable a feature that requires it.
|
|
// I'm avoiding an explicit checkbox for this until I figure out why host labels don't fix all problems with passthrough.
|
|
backend_config.supports_passthrough_dma = false;
|
|
}
|
|
}
|
|
|
|
VKGSRender::~VKGSRender()
|
|
{
|
|
if (m_device == VK_NULL_HANDLE)
|
|
{
|
|
//Initialization failed
|
|
return;
|
|
}
|
|
|
|
// Flush DMA queue
|
|
while (!g_fxo->get<rsx::dma_manager>().sync())
|
|
{
|
|
do_local_task(rsx::FIFO_state::lock_wait);
|
|
}
|
|
|
|
//Wait for device to finish up with resources
|
|
vkDeviceWaitIdle(*m_device);
|
|
|
|
// Globals. TODO: Refactor lifetime management
|
|
if (backend_config.supports_asynchronous_compute)
|
|
{
|
|
g_fxo->get<vk::AsyncTaskScheduler>().destroy();
|
|
}
|
|
|
|
// Host data
|
|
if (m_host_object_data)
|
|
{
|
|
m_host_object_data->unmap();
|
|
m_host_object_data.reset();
|
|
}
|
|
|
|
// Clear flush requests
|
|
m_flush_requests.clear_pending_flag();
|
|
|
|
// Shaders
|
|
vk::destroy_pipe_compiler(); // Ensure no pending shaders being compiled
|
|
vk::finalize_compiler_context(); // Shut down the glslang compiler
|
|
m_prog_buffer->clear(); // Delete shader objects
|
|
m_shader_interpreter.destroy();
|
|
|
|
m_persistent_attribute_storage.reset();
|
|
m_volatile_attribute_storage.reset();
|
|
m_vertex_layout_storage.reset();
|
|
|
|
// Upscaler (references some global resources)
|
|
m_upscaler.reset();
|
|
|
|
// Heaps
|
|
m_attrib_ring_info.destroy();
|
|
m_fragment_env_ring_info.destroy();
|
|
m_vertex_env_ring_info.destroy();
|
|
m_fragment_texture_params_ring_info.destroy();
|
|
m_vertex_layout_ring_info.destroy();
|
|
m_fragment_constants_ring_info.destroy();
|
|
m_transform_constants_ring_info.destroy();
|
|
m_index_buffer_ring_info.destroy();
|
|
m_texture_upload_buffer_ring_info.destroy();
|
|
m_vertex_instructions_buffer.destroy();
|
|
m_fragment_instructions_buffer.destroy();
|
|
m_raster_env_ring_info.destroy();
|
|
|
|
// Fallback bindables
|
|
null_buffer.reset();
|
|
null_buffer_view.reset();
|
|
|
|
if (m_current_frame == &m_aux_frame_context)
|
|
{
|
|
// Return resources back to the owner
|
|
m_current_frame = &frame_context_storage[m_current_queue_index];
|
|
m_current_frame->swap_storage(m_aux_frame_context);
|
|
m_current_frame->grab_resources(m_aux_frame_context);
|
|
}
|
|
|
|
m_aux_frame_context.buffer_views_to_clean.clear();
|
|
|
|
// NOTE: aux_context uses descriptor pools borrowed from the main queues and any allocations will be automatically freed when pool is destroyed
|
|
for (auto &ctx : frame_context_storage)
|
|
{
|
|
vkDestroySemaphore((*m_device), ctx.present_wait_semaphore, nullptr);
|
|
vkDestroySemaphore((*m_device), ctx.acquire_signal_semaphore, nullptr);
|
|
ctx.descriptor_pool.destroy();
|
|
|
|
ctx.buffer_views_to_clean.clear();
|
|
}
|
|
|
|
// Textures
|
|
m_rtts.destroy();
|
|
m_texture_cache.destroy();
|
|
|
|
m_stencil_mirror_sampler.reset();
|
|
|
|
// Overlay text handler
|
|
m_text_writer.reset();
|
|
|
|
//Pipeline descriptors
|
|
vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr);
|
|
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
|
|
|
|
// Queries
|
|
m_occlusion_query_manager.reset();
|
|
m_cond_render_buffer.reset();
|
|
|
|
// Command buffer
|
|
m_primary_cb_list.destroy();
|
|
m_secondary_cb_list.destroy();
|
|
|
|
m_command_buffer_pool.destroy();
|
|
m_secondary_command_buffer_pool.destroy();
|
|
|
|
// Global resources
|
|
vk::destroy_global_resources();
|
|
|
|
// Device handles/contexts
|
|
m_swapchain->destroy();
|
|
m_instance.destroy();
|
|
|
|
#if defined(HAVE_X11) && defined(HAVE_VULKAN)
|
|
if (m_display_handle)
|
|
XCloseDisplay(m_display_handle);
|
|
#endif
|
|
}
|
|
|
|
bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|
{
|
|
vk::texture_cache::thrashed_set result;
|
|
{
|
|
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
|
|
result = m_texture_cache.invalidate_address(*m_secondary_cb_list.get(), address, cause);
|
|
}
|
|
|
|
if (result.invalidate_samplers)
|
|
{
|
|
std::lock_guard lock(m_sampler_mutex);
|
|
m_samplers_dirty.store(true);
|
|
}
|
|
|
|
if (!result.violation_handled)
|
|
{
|
|
return zcull_ctrl->on_access_violation(address);
|
|
}
|
|
|
|
if (result.num_flushable > 0)
|
|
{
|
|
if (g_fxo->get<rsx::dma_manager>().is_current_thread())
|
|
{
|
|
// The offloader thread cannot handle flush requests
|
|
ensure(!(m_queue_status & flush_queue_state::deadlock));
|
|
|
|
m_offloader_fault_range = g_fxo->get<rsx::dma_manager>().get_fault_range(is_writing);
|
|
m_offloader_fault_cause = (is_writing) ? rsx::invalidation_cause::write : rsx::invalidation_cause::read;
|
|
|
|
g_fxo->get<rsx::dma_manager>().set_mem_fault_flag();
|
|
m_queue_status |= flush_queue_state::deadlock;
|
|
m_eng_interrupt_mask |= rsx::backend_interrupt;
|
|
|
|
// Wait for deadlock to clear
|
|
while (m_queue_status & flush_queue_state::deadlock)
|
|
{
|
|
utils::pause();
|
|
}
|
|
|
|
g_fxo->get<rsx::dma_manager>().clear_mem_fault_flag();
|
|
return true;
|
|
}
|
|
|
|
bool has_queue_ref = false;
|
|
if (!is_current_thread()) [[likely]]
|
|
{
|
|
// Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
|
|
vm::temporary_unlock();
|
|
|
|
std::lock_guard lock(m_flush_queue_mutex);
|
|
|
|
m_flush_requests.post(false);
|
|
m_eng_interrupt_mask |= rsx::backend_interrupt;
|
|
has_queue_ref = true;
|
|
}
|
|
else
|
|
{
|
|
if (vk::is_uninterruptible())
|
|
{
|
|
rsx_log.error("Fault in uninterruptible code!");
|
|
}
|
|
|
|
// Flush primary cb queue to sync pending changes (e.g image transitions!)
|
|
flush_command_queue();
|
|
}
|
|
|
|
if (has_queue_ref)
|
|
{
|
|
// Wait for the RSX thread to process request if it hasn't already
|
|
m_flush_requests.producer_wait();
|
|
}
|
|
|
|
m_texture_cache.flush_all(*m_secondary_cb_list.next(), result);
|
|
|
|
if (has_queue_ref)
|
|
{
|
|
// Release RSX thread
|
|
m_flush_requests.remove_one();
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void VKGSRender::on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause)
|
|
{
|
|
std::lock_guard lock(m_secondary_cb_guard);
|
|
|
|
auto data = m_texture_cache.invalidate_range(*m_secondary_cb_list.next(), range, cause);
|
|
AUDIT(data.empty());
|
|
|
|
if (cause == rsx::invalidation_cause::unmap)
|
|
{
|
|
if (data.violation_handled)
|
|
{
|
|
m_texture_cache.purge_unreleased_sections();
|
|
{
|
|
std::lock_guard lock(m_sampler_mutex);
|
|
m_samplers_dirty.store(true);
|
|
}
|
|
}
|
|
|
|
vk::unmap_dma(range.start, range.length());
|
|
}
|
|
}
|
|
|
|
void VKGSRender::on_semaphore_acquire_wait()
|
|
{
|
|
if (m_flush_requests.pending() ||
|
|
(async_flip_requested & flip_request::emu_requested) ||
|
|
(m_queue_status & flush_queue_state::deadlock))
|
|
{
|
|
do_local_task(rsx::FIFO_state::lock_wait);
|
|
}
|
|
}
|
|
|
|
bool VKGSRender::on_vram_exhausted(rsx::problem_severity severity)
|
|
{
|
|
ensure(!vk::is_uninterruptible() && rsx::get_current_renderer()->is_current_thread());
|
|
|
|
bool texture_cache_relieved = false;
|
|
if (severity >= rsx::problem_severity::fatal && m_texture_cache.is_overallocated())
|
|
{
|
|
// Evict some unused textures. Do not evict any active references
|
|
std::set<u32> exclusion_list;
|
|
auto scan_array = [&](const auto& texture_array)
|
|
{
|
|
for (auto i = 0ull; i < texture_array.size(); ++i)
|
|
{
|
|
const auto& tex = texture_array[i];
|
|
const auto addr = rsx::get_address(tex.offset(), tex.location());
|
|
exclusion_list.insert(addr);
|
|
}
|
|
};
|
|
|
|
scan_array(rsx::method_registers.fragment_textures);
|
|
scan_array(rsx::method_registers.vertex_textures);
|
|
|
|
// Hold the secondary lock guard to prevent threads from trying to touch access violation handler stuff
|
|
std::lock_guard lock(m_secondary_cb_guard);
|
|
|
|
rsx_log.warning("Texture cache is overallocated. Will evict unnecessary textures.");
|
|
texture_cache_relieved = m_texture_cache.evict_unused(exclusion_list);
|
|
}
|
|
|
|
texture_cache_relieved |= m_texture_cache.handle_memory_pressure(severity);
|
|
if (severity == rsx::problem_severity::low)
|
|
{
|
|
// Low severity only handles invalidating unused textures
|
|
return texture_cache_relieved;
|
|
}
|
|
|
|
bool surface_cache_relieved = false;
|
|
if (severity >= rsx::problem_severity::moderate)
|
|
{
|
|
// Check if we need to spill
|
|
const auto mem_info = m_device->get_memory_mapping();
|
|
if (severity >= rsx::problem_severity::fatal && // Only spill for fatal errors
|
|
mem_info.device_local != mem_info.host_visible_coherent && // Do not spill if it is an IGP, there is nowhere to spill to
|
|
m_rtts.is_overallocated()) // Surface cache must be over-allocated by the design quota
|
|
{
|
|
// Queue a VRAM spill operation.
|
|
m_rtts.spill_unused_memory();
|
|
}
|
|
|
|
// Moderate severity and higher also starts removing stale render target objects
|
|
if (m_rtts.handle_memory_pressure(*m_current_command_buffer, severity))
|
|
{
|
|
surface_cache_relieved = true;
|
|
m_rtts.free_invalidated(*m_current_command_buffer, severity);
|
|
}
|
|
|
|
if (severity >= rsx::problem_severity::fatal && surface_cache_relieved && !m_samplers_dirty)
|
|
{
|
|
// If surface cache was modified destructively, then we must reload samplers touching the surface cache.
|
|
bool invalidate_samplers = false;
|
|
auto scan_array = [&](const auto& texture_array, const auto& sampler_states)
|
|
{
|
|
for (auto i = 0ull; i < texture_array.size() && !invalidate_samplers; ++i)
|
|
{
|
|
if (texture_array[i].enabled() && sampler_states[i])
|
|
{
|
|
invalidate_samplers = (sampler_states[i]->upload_context == rsx::texture_upload_context::framebuffer_storage);
|
|
}
|
|
}
|
|
};
|
|
|
|
scan_array(rsx::method_registers.fragment_textures, fs_sampler_state);
|
|
scan_array(rsx::method_registers.vertex_textures, vs_sampler_state);
|
|
|
|
if (invalidate_samplers)
|
|
{
|
|
m_samplers_dirty.store(true);
|
|
}
|
|
}
|
|
}
|
|
|
|
const bool any_cache_relieved = (texture_cache_relieved || surface_cache_relieved);
|
|
if (any_cache_relieved && severity >= rsx::problem_severity::fatal)
|
|
{
|
|
// Imminent crash, full GPU sync is the least of our problems
|
|
flush_command_queue(true, true);
|
|
}
|
|
|
|
return any_cache_relieved;
|
|
}
|
|
|
|
void VKGSRender::notify_tile_unbound(u32 tile)
|
|
{
|
|
//TODO: Handle texture writeback
|
|
if (false)
|
|
{
|
|
u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location);
|
|
on_notify_memory_unmapped(addr, tiles[tile].size);
|
|
m_rtts.invalidate_surface_address(addr, false);
|
|
}
|
|
|
|
{
|
|
std::lock_guard lock(m_sampler_mutex);
|
|
m_samplers_dirty.store(true);
|
|
}
|
|
}
|
|
|
|
void VKGSRender::check_heap_status(u32 flags)
|
|
{
|
|
ensure(flags);
|
|
|
|
bool heap_critical;
|
|
if (flags == VK_HEAP_CHECK_ALL)
|
|
{
|
|
heap_critical = m_attrib_ring_info.is_critical() ||
|
|
m_texture_upload_buffer_ring_info.is_critical() ||
|
|
m_fragment_env_ring_info.is_critical() ||
|
|
m_vertex_env_ring_info.is_critical() ||
|
|
m_fragment_texture_params_ring_info.is_critical() ||
|
|
m_vertex_layout_ring_info.is_critical() ||
|
|
m_fragment_constants_ring_info.is_critical() ||
|
|
m_transform_constants_ring_info.is_critical() ||
|
|
m_index_buffer_ring_info.is_critical() ||
|
|
m_raster_env_ring_info.is_critical();
|
|
}
|
|
else
|
|
{
|
|
heap_critical = false;
|
|
u32 test = 1u << std::countr_zero(flags);
|
|
|
|
do
|
|
{
|
|
switch (flags & test)
|
|
{
|
|
case 0:
|
|
break;
|
|
case VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE:
|
|
heap_critical = m_texture_upload_buffer_ring_info.is_critical();
|
|
break;
|
|
case VK_HEAP_CHECK_VERTEX_STORAGE:
|
|
heap_critical = m_attrib_ring_info.is_critical() || m_index_buffer_ring_info.is_critical();
|
|
break;
|
|
case VK_HEAP_CHECK_VERTEX_ENV_STORAGE:
|
|
heap_critical = m_vertex_env_ring_info.is_critical();
|
|
break;
|
|
case VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE:
|
|
heap_critical = m_fragment_env_ring_info.is_critical() || m_raster_env_ring_info.is_critical();
|
|
break;
|
|
case VK_HEAP_CHECK_TEXTURE_ENV_STORAGE:
|
|
heap_critical = m_fragment_texture_params_ring_info.is_critical();
|
|
break;
|
|
case VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE:
|
|
heap_critical = m_vertex_layout_ring_info.is_critical();
|
|
break;
|
|
case VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE:
|
|
heap_critical = m_transform_constants_ring_info.is_critical();
|
|
break;
|
|
case VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE:
|
|
heap_critical = m_fragment_constants_ring_info.is_critical();
|
|
break;
|
|
default:
|
|
fmt::throw_exception("Unexpected heap flag set! (0x%X)", test);
|
|
}
|
|
|
|
flags &= ~test;
|
|
test <<= 1;
|
|
}
|
|
while (flags && !heap_critical);
|
|
}
|
|
|
|
if (heap_critical)
|
|
{
|
|
m_profiler.start();
|
|
|
|
vk::frame_context_t *target_frame = nullptr;
|
|
if (!m_queued_frames.empty())
|
|
{
|
|
if (m_current_frame != &m_aux_frame_context)
|
|
{
|
|
target_frame = m_queued_frames.front();
|
|
}
|
|
}
|
|
|
|
if (target_frame == nullptr)
|
|
{
|
|
flush_command_queue(true);
|
|
m_vertex_cache->purge();
|
|
|
|
m_index_buffer_ring_info.reset_allocation_stats();
|
|
m_fragment_env_ring_info.reset_allocation_stats();
|
|
m_vertex_env_ring_info.reset_allocation_stats();
|
|
m_fragment_texture_params_ring_info.reset_allocation_stats();
|
|
m_vertex_layout_ring_info.reset_allocation_stats();
|
|
m_fragment_constants_ring_info.reset_allocation_stats();
|
|
m_transform_constants_ring_info.reset_allocation_stats();
|
|
m_attrib_ring_info.reset_allocation_stats();
|
|
m_texture_upload_buffer_ring_info.reset_allocation_stats();
|
|
m_raster_env_ring_info.reset_allocation_stats();
|
|
m_current_frame->reset_heap_ptrs();
|
|
m_last_heap_sync_time = rsx::get_shared_tag();
|
|
}
|
|
else
|
|
{
|
|
// Flush the frame context
|
|
frame_context_cleanup(target_frame);
|
|
}
|
|
|
|
m_frame_stats.flip_time += m_profiler.duration();
|
|
}
|
|
}
|
|
|
|
void VKGSRender::check_present_status()
|
|
{
|
|
while (!m_queued_frames.empty())
|
|
{
|
|
auto ctx = m_queued_frames.front();
|
|
if (!ctx->swap_command_buffer->poke())
|
|
{
|
|
return;
|
|
}
|
|
|
|
frame_context_cleanup(ctx);
|
|
}
|
|
}
|
|
|
|
void VKGSRender::check_descriptors()
|
|
{
|
|
// Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources
|
|
const auto required_descriptors = rsx::method_registers.current_draw_clause.pass_count();
|
|
if (!m_current_frame->descriptor_pool.can_allocate(required_descriptors, m_current_frame->used_descriptors))
|
|
{
|
|
// Should hard sync before resetting descriptors for spec compliance
|
|
flush_command_queue(true);
|
|
|
|
m_current_frame->descriptor_pool.reset(0);
|
|
m_current_frame->used_descriptors = 0;
|
|
}
|
|
}
|
|
|
|
VkDescriptorSet VKGSRender::allocate_descriptor_set()
|
|
{
|
|
if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]]
|
|
{
|
|
return m_current_frame->descriptor_pool.allocate(descriptor_layouts, VK_TRUE, m_current_frame->used_descriptors++);
|
|
}
|
|
else
|
|
{
|
|
return m_shader_interpreter.allocate_descriptor_set();
|
|
}
|
|
}
|
|
|
|
void VKGSRender::set_viewport()
|
|
{
|
|
const auto [clip_width, clip_height] = rsx::apply_resolution_scale<true>(
|
|
rsx::method_registers.surface_clip_width(), rsx::method_registers.surface_clip_height());
|
|
|
|
const auto zclip_near = rsx::method_registers.clip_min();
|
|
const auto zclip_far = rsx::method_registers.clip_max();
|
|
|
|
//NOTE: The scale_offset matrix already has viewport matrix factored in
|
|
m_viewport.x = 0;
|
|
m_viewport.y = 0;
|
|
m_viewport.width = clip_width;
|
|
m_viewport.height = clip_height;
|
|
|
|
if (m_device->get_unrestricted_depth_range_support())
|
|
{
|
|
m_viewport.minDepth = zclip_near;
|
|
m_viewport.maxDepth = zclip_far;
|
|
}
|
|
else
|
|
{
|
|
m_viewport.minDepth = 0.f;
|
|
m_viewport.maxDepth = 1.f;
|
|
}
|
|
|
|
m_graphics_state &= ~(rsx::pipeline_state::zclip_config_state_dirty);
|
|
}
|
|
|
|
void VKGSRender::set_scissor(bool clip_viewport)
|
|
{
|
|
areau scissor;
|
|
if (get_scissor(scissor, clip_viewport))
|
|
{
|
|
m_scissor.extent.height = scissor.height();
|
|
m_scissor.extent.width = scissor.width();
|
|
m_scissor.offset.x = scissor.x1;
|
|
m_scissor.offset.y = scissor.y1;
|
|
}
|
|
}
|
|
|
|
void VKGSRender::bind_viewport()
|
|
{
|
|
if (m_graphics_state & rsx::pipeline_state::zclip_config_state_dirty)
|
|
{
|
|
if (m_device->get_unrestricted_depth_range_support())
|
|
{
|
|
m_viewport.minDepth = rsx::method_registers.clip_min();
|
|
m_viewport.maxDepth = rsx::method_registers.clip_max();
|
|
}
|
|
|
|
m_graphics_state &= ~(rsx::pipeline_state::zclip_config_state_dirty);
|
|
}
|
|
|
|
vkCmdSetViewport(*m_current_command_buffer, 0, 1, &m_viewport);
|
|
vkCmdSetScissor(*m_current_command_buffer, 0, 1, &m_scissor);
|
|
}
|
|
|
|
void VKGSRender::on_init_thread()
|
|
{
|
|
if (m_device == VK_NULL_HANDLE)
|
|
{
|
|
fmt::throw_exception("No vulkan device was created");
|
|
}
|
|
|
|
GSRender::on_init_thread();
|
|
zcull_ctrl.reset(static_cast<::rsx::reports::ZCULL_control*>(this));
|
|
|
|
if (!m_overlay_manager)
|
|
{
|
|
m_frame->hide();
|
|
m_shaders_cache->load(nullptr, pipeline_layout);
|
|
m_frame->show();
|
|
}
|
|
else
|
|
{
|
|
rsx::shader_loading_dialog_native dlg(this);
|
|
|
|
// TODO: Handle window resize messages during loading on GPUs without OUT_OF_DATE_KHR support
|
|
m_shaders_cache->load(&dlg, pipeline_layout);
|
|
}
|
|
}
|
|
|
|
void VKGSRender::on_exit()
|
|
{
|
|
GSRender::on_exit();
|
|
zcull_ctrl.release();
|
|
}
|
|
|
|
void VKGSRender::clear_surface(u32 mask)
|
|
{
|
|
if (skip_current_frame || swapchain_unavailable) return;
|
|
|
|
// If stencil write mask is disabled, remove clear_stencil bit
|
|
if (!rsx::method_registers.stencil_mask()) mask &= ~RSX_GCM_CLEAR_STENCIL_BIT;
|
|
|
|
// Ignore invalid clear flags
|
|
if (!(mask & RSX_GCM_CLEAR_ANY_MASK)) return;
|
|
|
|
u8 ctx = rsx::framebuffer_creation_context::context_draw;
|
|
if (mask & RSX_GCM_CLEAR_COLOR_MASK) ctx |= rsx::framebuffer_creation_context::context_clear_color;
|
|
if (mask & RSX_GCM_CLEAR_DEPTH_STENCIL_MASK) ctx |= rsx::framebuffer_creation_context::context_clear_depth;
|
|
init_buffers(rsx::framebuffer_creation_context{ctx});
|
|
|
|
if (!framebuffer_status_valid) return;
|
|
|
|
//float depth_clear = 1.f;
|
|
u32 stencil_clear = 0;
|
|
u32 depth_stencil_mask = 0;
|
|
|
|
std::vector<VkClearAttachment> clear_descriptors;
|
|
VkClearValue depth_stencil_clear_values = {}, color_clear_values = {};
|
|
|
|
u16 scissor_x = static_cast<u16>(m_scissor.offset.x);
|
|
u16 scissor_w = static_cast<u16>(m_scissor.extent.width);
|
|
u16 scissor_y = static_cast<u16>(m_scissor.offset.y);
|
|
u16 scissor_h = static_cast<u16>(m_scissor.extent.height);
|
|
|
|
const u16 fb_width = m_draw_fbo->width();
|
|
const u16 fb_height = m_draw_fbo->height();
|
|
|
|
//clip region
|
|
std::tie(scissor_x, scissor_y, scissor_w, scissor_h) = rsx::clip_region<u16>(fb_width, fb_height, scissor_x, scissor_y, scissor_w, scissor_h, true);
|
|
VkClearRect region = { { { scissor_x, scissor_y }, { scissor_w, scissor_h } }, 0, 1 };
|
|
|
|
const bool full_frame = (scissor_w == fb_width && scissor_h == fb_height);
|
|
bool update_color = false, update_z = false;
|
|
auto surface_depth_format = rsx::method_registers.surface_depth_fmt();
|
|
|
|
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); mask & RSX_GCM_CLEAR_DEPTH_STENCIL_MASK)
|
|
{
|
|
if (mask & RSX_GCM_CLEAR_DEPTH_BIT)
|
|
{
|
|
u32 max_depth_value = get_max_depth_value(surface_depth_format);
|
|
|
|
u32 clear_depth = rsx::method_registers.z_clear_value(is_depth_stencil_format(surface_depth_format));
|
|
float depth_clear = static_cast<float>(clear_depth) / max_depth_value;
|
|
|
|
depth_stencil_clear_values.depthStencil.depth = depth_clear;
|
|
depth_stencil_clear_values.depthStencil.stencil = stencil_clear;
|
|
|
|
depth_stencil_mask |= VK_IMAGE_ASPECT_DEPTH_BIT;
|
|
}
|
|
|
|
if (is_depth_stencil_format(surface_depth_format))
|
|
{
|
|
if (mask & RSX_GCM_CLEAR_STENCIL_BIT)
|
|
{
|
|
u8 clear_stencil = rsx::method_registers.stencil_clear_value();
|
|
depth_stencil_clear_values.depthStencil.stencil = clear_stencil;
|
|
|
|
depth_stencil_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
|
|
if (ds->samples() > 1)
|
|
{
|
|
if (full_frame) ds->stencil_init_flags &= 0xFF;
|
|
ds->stencil_init_flags |= clear_stencil;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((depth_stencil_mask && depth_stencil_mask != ds->aspect()) || !full_frame)
|
|
{
|
|
// At least one aspect is not being cleared or the clear does not cover the full frame
|
|
// Steps to initialize memory are required
|
|
|
|
if (ds->state_flags & rsx::surface_state_flags::erase_bkgnd && // Needs initialization
|
|
ds->old_contents.empty() && !g_cfg.video.read_depth_buffer) // No way to load data from memory, so no initialization given
|
|
{
|
|
// Only one aspect was cleared. Make sure to memory initialize the other before removing dirty flag
|
|
const auto ds_mask = (mask & RSX_GCM_CLEAR_DEPTH_STENCIL_MASK);
|
|
if (ds_mask == RSX_GCM_CLEAR_DEPTH_BIT && (ds->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT))
|
|
{
|
|
// Depth was cleared, initialize stencil
|
|
depth_stencil_clear_values.depthStencil.stencil = 0xFF;
|
|
depth_stencil_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
}
|
|
else if (ds_mask == RSX_GCM_CLEAR_STENCIL_BIT)
|
|
{
|
|
// Stencil was cleared, initialize depth
|
|
depth_stencil_clear_values.depthStencil.depth = 1.f;
|
|
depth_stencil_mask |= VK_IMAGE_ASPECT_DEPTH_BIT;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Barrier required before any writes
|
|
ds->write_barrier(*m_current_command_buffer);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (auto colormask = (mask & RSX_GCM_CLEAR_COLOR_MASK))
|
|
{
|
|
if (!m_draw_buffers.empty())
|
|
{
|
|
bool use_fast_clear = false;
|
|
u8 clear_a = rsx::method_registers.clear_color_a();
|
|
u8 clear_r = rsx::method_registers.clear_color_r();
|
|
u8 clear_g = rsx::method_registers.clear_color_g();
|
|
u8 clear_b = rsx::method_registers.clear_color_b();
|
|
|
|
switch (rsx::method_registers.surface_color())
|
|
{
|
|
case rsx::surface_color_format::x32:
|
|
case rsx::surface_color_format::w16z16y16x16:
|
|
case rsx::surface_color_format::w32z32y32x32:
|
|
{
|
|
//NOP
|
|
colormask = 0;
|
|
break;
|
|
}
|
|
case rsx::surface_color_format::b8:
|
|
{
|
|
rsx::get_b8_clear_color(clear_r, clear_g, clear_b, clear_a);
|
|
colormask = rsx::get_b8_clearmask(colormask);
|
|
use_fast_clear = (colormask == RSX_GCM_CLEAR_RED_BIT);
|
|
break;
|
|
}
|
|
case rsx::surface_color_format::g8b8:
|
|
{
|
|
rsx::get_g8b8_clear_color(clear_r, clear_g, clear_b, clear_a);
|
|
colormask = rsx::get_g8b8_r8g8_clearmask(colormask);
|
|
use_fast_clear = (colormask == (RSX_GCM_CLEAR_RED_BIT | RSX_GCM_CLEAR_GREEN_BIT));
|
|
break;
|
|
}
|
|
case rsx::surface_color_format::a8b8g8r8:
|
|
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
|
|
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
|
|
{
|
|
rsx::get_abgr8_clear_color(clear_r, clear_g, clear_b, clear_a);
|
|
colormask = rsx::get_abgr8_clearmask(colormask);
|
|
[[fallthrough]];
|
|
}
|
|
default:
|
|
{
|
|
use_fast_clear = (colormask == RSX_GCM_CLEAR_COLOR_MASK);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (colormask)
|
|
{
|
|
if (!use_fast_clear || !full_frame)
|
|
{
|
|
// If we're not clobber all the memory, a barrier is required
|
|
for (const auto& index : m_rtts.m_bound_render_target_ids)
|
|
{
|
|
m_rtts.m_bound_render_targets[index].second->write_barrier(*m_current_command_buffer);
|
|
}
|
|
}
|
|
|
|
color_clear_values.color.float32[0] = static_cast<float>(clear_r) / 255;
|
|
color_clear_values.color.float32[1] = static_cast<float>(clear_g) / 255;
|
|
color_clear_values.color.float32[2] = static_cast<float>(clear_b) / 255;
|
|
color_clear_values.color.float32[3] = static_cast<float>(clear_a) / 255;
|
|
|
|
if (use_fast_clear)
|
|
{
|
|
for (u32 index = 0; index < m_draw_buffers.size(); ++index)
|
|
{
|
|
clear_descriptors.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, index, color_clear_values });
|
|
}
|
|
}
|
|
else
|
|
{
|
|
color4f clear_color =
|
|
{
|
|
color_clear_values.color.float32[0],
|
|
color_clear_values.color.float32[1],
|
|
color_clear_values.color.float32[2],
|
|
color_clear_values.color.float32[3]
|
|
};
|
|
|
|
auto attachment_clear_pass = vk::get_overlay_pass<vk::attachment_clear_pass>();
|
|
attachment_clear_pass->run(*m_current_command_buffer, m_draw_fbo, region.rect, colormask, clear_color, get_render_pass());
|
|
}
|
|
|
|
update_color = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (depth_stencil_mask)
|
|
{
|
|
if ((depth_stencil_mask & VK_IMAGE_ASPECT_STENCIL_BIT) &&
|
|
rsx::method_registers.stencil_mask() != 0xff)
|
|
{
|
|
// Partial stencil clear. Disables fast stencil clear
|
|
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
|
auto key = vk::get_renderpass_key({ ds });
|
|
auto renderpass = vk::get_renderpass(*m_device, key);
|
|
|
|
vk::get_overlay_pass<vk::stencil_clear_pass>()->run(
|
|
*m_current_command_buffer, ds, region.rect,
|
|
depth_stencil_clear_values.depthStencil.stencil,
|
|
rsx::method_registers.stencil_mask(), renderpass);
|
|
|
|
depth_stencil_mask &= ~VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
}
|
|
|
|
if (depth_stencil_mask)
|
|
{
|
|
clear_descriptors.push_back({ static_cast<VkImageAspectFlags>(depth_stencil_mask), 0, depth_stencil_clear_values });
|
|
}
|
|
|
|
update_z = true;
|
|
}
|
|
|
|
if (update_color || update_z)
|
|
{
|
|
m_rtts.on_write({ update_color, update_color, update_color, update_color }, update_z);
|
|
}
|
|
|
|
if (!clear_descriptors.empty())
|
|
{
|
|
begin_render_pass();
|
|
vkCmdClearAttachments(*m_current_command_buffer, ::size32(clear_descriptors), clear_descriptors.data(), 1, ®ion);
|
|
}
|
|
}
|
|
|
|
void VKGSRender::flush_command_queue(bool hard_sync, bool do_not_switch)
|
|
{
|
|
close_and_submit_command_buffer();
|
|
|
|
if (hard_sync)
|
|
{
|
|
// wait for the latest instruction to execute
|
|
m_current_command_buffer->reset();
|
|
|
|
// Clear all command buffer statuses
|
|
m_primary_cb_list.poke_all();
|
|
|
|
// Drain present queue
|
|
while (!m_queued_frames.empty())
|
|
{
|
|
check_present_status();
|
|
}
|
|
|
|
m_flush_requests.clear_pending_flag();
|
|
}
|
|
|
|
if (!do_not_switch)
|
|
{
|
|
// Grab next cb in line and make it usable
|
|
// NOTE: Even in the case of a hard sync, this is required to free any waiters on the CB (ZCULL)
|
|
m_current_command_buffer = m_primary_cb_list.next();
|
|
m_current_command_buffer->reset();
|
|
}
|
|
else
|
|
{
|
|
// Special hard-sync where we must preserve the CB. This can happen when an emergency event handler is invoked and needs to flush to hw.
|
|
ensure(hard_sync);
|
|
}
|
|
|
|
// Just in case a queued frame holds a ref to this cb, drain the present queue
|
|
check_present_status();
|
|
|
|
if (m_occlusion_query_active)
|
|
{
|
|
m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task;
|
|
}
|
|
|
|
m_current_command_buffer->begin();
|
|
}
|
|
|
|
bool VKGSRender::release_GCM_label(u32 address, u32 args)
|
|
{
|
|
if (!backend_config.supports_host_gpu_labels)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
auto drain_label_queue = [this]()
|
|
{
|
|
while (m_host_data_ptr->last_label_release_event > m_host_data_ptr->commands_complete_event)
|
|
{
|
|
utils::pause();
|
|
|
|
if (thread_ctrl::state() == thread_state::aborting)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
|
|
ensure(m_host_data_ptr);
|
|
if (m_host_data_ptr->texture_load_complete_event == m_host_data_ptr->texture_load_request_event)
|
|
{
|
|
// All texture loads already seen by the host GPU
|
|
// Wait for all previously submitted labels to be flushed
|
|
drain_label_queue();
|
|
return false;
|
|
}
|
|
|
|
const auto mapping = vk::map_dma(address, 4);
|
|
const auto write_data = std::bit_cast<u32, be_t<u32>>(args);
|
|
|
|
if (!dynamic_cast<vk::memory_block_host*>(mapping.second->memory.get()))
|
|
{
|
|
// NVIDIA GPUs can disappoint when DMA blocks straddle VirtualAlloc boundaries.
|
|
// Take the L and try the fallback.
|
|
rsx_log.warning("Host label update at 0x%x was not possible.", address);
|
|
drain_label_queue();
|
|
return false;
|
|
}
|
|
|
|
m_host_data_ptr->last_label_release_event = m_host_data_ptr->inc_counter();
|
|
|
|
if (m_host_data_ptr->texture_load_request_event > m_host_data_ptr->last_label_submit_event)
|
|
{
|
|
if (vk::is_renderpass_open(*m_current_command_buffer))
|
|
{
|
|
vk::end_renderpass(*m_current_command_buffer);
|
|
}
|
|
|
|
vkCmdUpdateBuffer(*m_current_command_buffer, mapping.second->value, mapping.first, 4, &write_data);
|
|
flush_command_queue();
|
|
}
|
|
else
|
|
{
|
|
auto cmd = m_secondary_cb_list.next();
|
|
cmd->begin();
|
|
vkCmdUpdateBuffer(*cmd, mapping.second->value, mapping.first, 4, &write_data);
|
|
vkCmdUpdateBuffer(*cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::commands_complete_event), 8, const_cast<u64*>(&m_host_data_ptr->last_label_release_event));
|
|
cmd->end();
|
|
|
|
vk::queue_submit_t submit_info = { m_device->get_graphics_queue(), nullptr };
|
|
cmd->submit(submit_info);
|
|
|
|
m_host_data_ptr->last_label_submit_event = m_host_data_ptr->last_label_release_event;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void VKGSRender::sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_t payload)
|
|
{
|
|
rsx::thread::sync_hint(hint, payload);
|
|
|
|
if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task))
|
|
{
|
|
// Occlusion queries not enabled, do nothing
|
|
return;
|
|
}
|
|
|
|
// Occlusion test result evaluation is coming up, avoid a hard sync
|
|
switch (hint)
|
|
{
|
|
case rsx::FIFO_hint::hint_conditional_render_eval:
|
|
{
|
|
// If a flush request is already enqueued, do nothing
|
|
if (m_flush_requests.pending())
|
|
{
|
|
return;
|
|
}
|
|
|
|
// If the result is not going to be read by CELL, do nothing
|
|
const auto ref_addr = static_cast<u32>(payload.address);
|
|
if (!zcull_ctrl->is_query_result_urgent(ref_addr))
|
|
{
|
|
// No effect on CELL behaviour, it will be faster to handle this in RSX code
|
|
return;
|
|
}
|
|
|
|
// OK, cell will be accessing the results, probably.
|
|
// Try to avoid flush spam, it is more costly to flush the CB than it is to just upload the vertex data
|
|
// This is supposed to be an optimization afterall.
|
|
const auto now = rsx::uclock();
|
|
if ((now - m_last_cond_render_eval_hint) > 50)
|
|
{
|
|
// Schedule a sync on the next loop iteration
|
|
m_flush_requests.post(false);
|
|
m_flush_requests.remove_one();
|
|
}
|
|
|
|
m_last_cond_render_eval_hint = now;
|
|
break;
|
|
}
|
|
case rsx::FIFO_hint::hint_zcull_sync:
|
|
{
|
|
// Check if the required report is synced to this CB
|
|
auto& data = m_occlusion_map[payload.query->driver_handle];
|
|
|
|
// NOTE: Currently, a special condition exists where the indices can be empty even with active draw count.
|
|
// This is caused by async compiler and should be removed when ubershaders are added in
|
|
if (!data.is_current(m_current_command_buffer) || data.indices.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Unavoidable hard sync coming up, flush immediately
|
|
// This heavyweight hint should be used with caution
|
|
std::lock_guard lock(m_flush_queue_mutex);
|
|
flush_command_queue();
|
|
|
|
if (m_flush_requests.pending())
|
|
{
|
|
// Clear without wait
|
|
m_flush_requests.clear_pending_flag();
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void VKGSRender::do_local_task(rsx::FIFO_state state)
|
|
{
|
|
if (m_queue_status & flush_queue_state::deadlock)
|
|
{
|
|
// Clear offloader deadlock
|
|
// NOTE: It is not possible to handle regular flush requests before this is cleared
|
|
// NOTE: This may cause graphics corruption due to unsynchronized modification
|
|
on_invalidate_memory_range(m_offloader_fault_range, m_offloader_fault_cause);
|
|
m_queue_status.clear(flush_queue_state::deadlock);
|
|
}
|
|
|
|
if (m_queue_status & flush_queue_state::flushing)
|
|
{
|
|
// Abort recursive CB submit requests.
|
|
// When flushing flag is already set, only deadlock events may be processed.
|
|
return;
|
|
}
|
|
else if (m_flush_requests.pending())
|
|
{
|
|
if (m_flush_queue_mutex.try_lock())
|
|
{
|
|
// TODO: Determine if a hard sync is necessary
|
|
// Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
|
|
flush_command_queue();
|
|
|
|
m_flush_requests.clear_pending_flag();
|
|
m_flush_requests.consumer_wait();
|
|
m_flush_queue_mutex.unlock();
|
|
}
|
|
}
|
|
else if (!in_begin_end && state != rsx::FIFO_state::lock_wait)
|
|
{
|
|
if (m_graphics_state & rsx::pipeline_state::framebuffer_reads_dirty)
|
|
{
|
|
//This will re-engage locks and break the texture cache if another thread is waiting in access violation handler!
|
|
//Only call when there are no waiters
|
|
m_texture_cache.do_update();
|
|
m_graphics_state &= ~rsx::pipeline_state::framebuffer_reads_dirty;
|
|
}
|
|
}
|
|
|
|
rsx::thread::do_local_task(state);
|
|
|
|
switch (state)
|
|
{
|
|
case rsx::FIFO_state::lock_wait:
|
|
// Critical check finished
|
|
return;
|
|
//case rsx::FIFO_state::spinning:
|
|
//case rsx::FIFO_state::empty:
|
|
// We have some time, check the present queue
|
|
//check_present_status();
|
|
//break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (m_overlay_manager)
|
|
{
|
|
if (!in_begin_end && async_flip_requested & flip_request::native_ui)
|
|
{
|
|
flush_command_queue(true);
|
|
rsx::display_flip_info_t info{};
|
|
info.buffer = current_display_buffer;
|
|
flip(info);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool VKGSRender::load_program()
|
|
{
|
|
if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)
|
|
{
|
|
get_current_fragment_program(fs_sampler_state);
|
|
ensure(current_fragment_program.valid);
|
|
|
|
get_current_vertex_program(vs_sampler_state);
|
|
|
|
m_graphics_state &= ~rsx::pipeline_state::invalidate_pipeline_bits;
|
|
}
|
|
|
|
auto &vertex_program = current_vertex_program;
|
|
auto &fragment_program = current_fragment_program;
|
|
auto old_program = m_program;
|
|
|
|
vk::pipeline_props properties{};
|
|
|
|
// Input assembly
|
|
bool emulated_primitive_type;
|
|
properties.state.set_primitive_type(vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, emulated_primitive_type));
|
|
|
|
const bool restarts_valid = rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed && !emulated_primitive_type && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
|
|
if (rsx::method_registers.restart_index_enabled() && !vk::emulate_primitive_restart(rsx::method_registers.current_draw_clause.primitive) && restarts_valid)
|
|
properties.state.enable_primitive_restart();
|
|
|
|
// Rasterizer state
|
|
properties.state.set_attachment_count(::size32(m_draw_buffers));
|
|
properties.state.set_front_face(vk::get_front_face(rsx::method_registers.front_face_mode()));
|
|
properties.state.enable_depth_clamp(rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled());
|
|
properties.state.enable_depth_bias(true);
|
|
properties.state.enable_depth_bounds_test(m_device->get_depth_bounds_support());
|
|
|
|
if (rsx::method_registers.depth_test_enabled())
|
|
{
|
|
//NOTE: Like stencil, depth write is meaningless without depth test
|
|
properties.state.set_depth_mask(rsx::method_registers.depth_write_enabled());
|
|
properties.state.enable_depth_test(vk::get_compare_func(rsx::method_registers.depth_func()));
|
|
}
|
|
|
|
if (rsx::method_registers.logic_op_enabled())
|
|
properties.state.enable_logic_op(vk::get_logic_op(rsx::method_registers.logic_operation()));
|
|
|
|
if (rsx::method_registers.cull_face_enabled())
|
|
properties.state.enable_cull_face(vk::get_cull_face(rsx::method_registers.cull_face_mode()));
|
|
|
|
for (uint index = 0; index < m_draw_buffers.size(); ++index)
|
|
{
|
|
bool color_mask_b = rsx::method_registers.color_mask_b(index);
|
|
bool color_mask_g = rsx::method_registers.color_mask_g(index);
|
|
bool color_mask_r = rsx::method_registers.color_mask_r(index);
|
|
bool color_mask_a = rsx::method_registers.color_mask_a(index);
|
|
|
|
switch (rsx::method_registers.surface_color())
|
|
{
|
|
case rsx::surface_color_format::b8:
|
|
rsx::get_b8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a);
|
|
break;
|
|
case rsx::surface_color_format::g8b8:
|
|
rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
properties.state.set_color_mask(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a);
|
|
}
|
|
|
|
bool mrt_blend_enabled[] =
|
|
{
|
|
rsx::method_registers.blend_enabled(),
|
|
rsx::method_registers.blend_enabled_surface_1(),
|
|
rsx::method_registers.blend_enabled_surface_2(),
|
|
rsx::method_registers.blend_enabled_surface_3()
|
|
};
|
|
|
|
VkBlendFactor sfactor_rgb, sfactor_a, dfactor_rgb, dfactor_a;
|
|
VkBlendOp equation_rgb, equation_a;
|
|
|
|
if (mrt_blend_enabled[0] || mrt_blend_enabled[1] || mrt_blend_enabled[2] || mrt_blend_enabled[3])
|
|
{
|
|
sfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_rgb());
|
|
sfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_a());
|
|
dfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_rgb());
|
|
dfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_a());
|
|
equation_rgb = vk::get_blend_op(rsx::method_registers.blend_equation_rgb());
|
|
equation_a = vk::get_blend_op(rsx::method_registers.blend_equation_a());
|
|
|
|
for (u8 idx = 0; idx < m_draw_buffers.size(); ++idx)
|
|
{
|
|
if (mrt_blend_enabled[idx])
|
|
{
|
|
properties.state.enable_blend(idx, sfactor_rgb, sfactor_a, dfactor_rgb, dfactor_a, equation_rgb, equation_a);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (rsx::method_registers.stencil_test_enabled())
|
|
{
|
|
if (!rsx::method_registers.two_sided_stencil_test_enabled())
|
|
{
|
|
properties.state.enable_stencil_test(
|
|
vk::get_stencil_op(rsx::method_registers.stencil_op_fail()),
|
|
vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()),
|
|
vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()),
|
|
vk::get_compare_func(rsx::method_registers.stencil_func()),
|
|
0xFF, 0xFF); //write mask, func_mask, ref are dynamic
|
|
}
|
|
else
|
|
{
|
|
properties.state.enable_stencil_test_separate(0,
|
|
vk::get_stencil_op(rsx::method_registers.stencil_op_fail()),
|
|
vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()),
|
|
vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()),
|
|
vk::get_compare_func(rsx::method_registers.stencil_func()),
|
|
0xFF, 0xFF); //write mask, func_mask, ref are dynamic
|
|
|
|
properties.state.enable_stencil_test_separate(1,
|
|
vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail()),
|
|
vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail()),
|
|
vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass()),
|
|
vk::get_compare_func(rsx::method_registers.back_stencil_func()),
|
|
0xFF, 0xFF); //write mask, func_mask, ref are dynamic
|
|
}
|
|
|
|
if (auto ds = m_rtts.m_bound_depth_stencil.second;
|
|
ds && ds->samples() > 1 && !(ds->stencil_init_flags & 0xFF00))
|
|
{
|
|
if (properties.state.ds.front.failOp != VK_STENCIL_OP_KEEP ||
|
|
properties.state.ds.front.depthFailOp != VK_STENCIL_OP_KEEP ||
|
|
properties.state.ds.front.passOp != VK_STENCIL_OP_KEEP ||
|
|
properties.state.ds.back.failOp != VK_STENCIL_OP_KEEP ||
|
|
properties.state.ds.back.depthFailOp != VK_STENCIL_OP_KEEP ||
|
|
properties.state.ds.back.passOp != VK_STENCIL_OP_KEEP)
|
|
{
|
|
// Toggle bit 9 to signal require full bit-wise transfer
|
|
ds->stencil_init_flags |= (1 << 8);
|
|
}
|
|
}
|
|
}
|
|
|
|
const auto rasterization_samples = u8((m_current_renderpass_key >> 16) & 0xF);
|
|
if (backend_config.supports_hw_a2c || rasterization_samples > 1)
|
|
{
|
|
const bool alpha_to_one_enable = rsx::method_registers.msaa_alpha_to_one_enabled() && backend_config.supports_hw_a2one;
|
|
|
|
properties.state.set_multisample_state(
|
|
rasterization_samples,
|
|
rsx::method_registers.msaa_sample_mask(),
|
|
rsx::method_registers.msaa_enabled(),
|
|
rsx::method_registers.msaa_alpha_to_coverage_enabled(),
|
|
alpha_to_one_enable);
|
|
|
|
// A problem observed on multiple GPUs is that interior geometry edges can resolve 0 samples unless we force shading rate of 1.
|
|
// For whatever reason, the way MSAA images are 'resolved' on PS3 bypasses this issue.
|
|
// NOTE: We do not do image resolve at all, the output is merely 'exploded' and the guest application is responsible for doing the resolve in software as it is on real hardware.
|
|
properties.state.set_multisample_shading_rate(1.f);
|
|
}
|
|
|
|
properties.renderpass_key = m_current_renderpass_key;
|
|
if (!m_interpreter_state && m_program) [[likely]]
|
|
{
|
|
if (!m_shader_interpreter.is_interpreter(m_program) &&
|
|
m_pipeline_properties == properties)
|
|
{
|
|
// Nothing changed
|
|
return true;
|
|
}
|
|
}
|
|
|
|
const auto shadermode = g_cfg.video.shadermode.get();
|
|
m_vertex_prog = nullptr;
|
|
m_fragment_prog = nullptr;
|
|
|
|
if (shadermode != shader_mode::interpreter_only) [[likely]]
|
|
{
|
|
vk::enter_uninterruptible();
|
|
|
|
// Load current program from cache
|
|
std::tie(m_program, m_vertex_prog, m_fragment_prog) = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties,
|
|
shadermode != shader_mode::recompiler, true, pipeline_layout);
|
|
|
|
vk::leave_uninterruptible();
|
|
|
|
if (m_prog_buffer->check_cache_missed())
|
|
{
|
|
// Notify the user with HUD notification
|
|
if (g_cfg.misc.show_shader_compilation_hint)
|
|
{
|
|
if (m_overlay_manager)
|
|
{
|
|
if (auto dlg = m_overlay_manager->get<rsx::overlays::shader_compile_notification>())
|
|
{
|
|
// Extend duration
|
|
dlg->touch();
|
|
}
|
|
else
|
|
{
|
|
// Create dialog but do not show immediately
|
|
m_overlay_manager->create<rsx::overlays::shader_compile_notification>();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
m_program = nullptr;
|
|
}
|
|
|
|
if (!m_program && (shadermode == shader_mode::async_with_interpreter || shadermode == shader_mode::interpreter_only))
|
|
{
|
|
if (!m_shader_interpreter.is_interpreter(old_program))
|
|
{
|
|
m_interpreter_state = rsx::invalidate_pipeline_bits;
|
|
}
|
|
|
|
m_program = m_shader_interpreter.get(properties, current_fp_metadata);
|
|
}
|
|
|
|
m_pipeline_properties = properties;
|
|
return m_program != nullptr;
|
|
}
|
|
|
|
void VKGSRender::load_program_env()
|
|
{
|
|
if (!m_program)
|
|
{
|
|
fmt::throw_exception("Unreachable right now");
|
|
}
|
|
|
|
const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length;
|
|
|
|
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
|
|
const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty);
|
|
const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty);
|
|
const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty);
|
|
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
|
|
const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program));
|
|
const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty));
|
|
|
|
if (update_vertex_env)
|
|
{
|
|
check_heap_status(VK_HEAP_CHECK_VERTEX_ENV_STORAGE);
|
|
|
|
// Vertex state
|
|
const auto mem = m_vertex_env_ring_info.alloc<256>(256);
|
|
auto buf = static_cast<u8*>(m_vertex_env_ring_info.map(mem, 148));
|
|
|
|
fill_scale_offset_data(buf, false);
|
|
fill_user_clip_data(buf + 64);
|
|
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
|
|
*(reinterpret_cast<f32*>(buf + 132)) = rsx::method_registers.point_size() * rsx::get_resolution_scale();
|
|
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.clip_min();
|
|
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_max();
|
|
|
|
m_vertex_env_ring_info.unmap();
|
|
m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 144 };
|
|
}
|
|
|
|
if (update_transform_constants)
|
|
{
|
|
// Transform constants
|
|
const usz transform_constants_size = (!m_vertex_prog || m_vertex_prog->has_indexed_constants) ? 8192 : m_vertex_prog->constant_ids.size() * 16;
|
|
if (transform_constants_size)
|
|
{
|
|
check_heap_status(VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE);
|
|
|
|
const auto alignment = m_device->gpu().get_limits().minUniformBufferOffsetAlignment;
|
|
auto mem = m_transform_constants_ring_info.alloc<1>(utils::align(transform_constants_size, alignment));
|
|
auto buf = m_transform_constants_ring_info.map(mem, transform_constants_size);
|
|
|
|
const std::vector<u16>& constant_ids = (transform_constants_size == 8192) ? std::vector<u16>{} : m_vertex_prog->constant_ids;
|
|
fill_vertex_program_constants_data(buf, constant_ids);
|
|
m_transform_constants_ring_info.unmap();
|
|
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, mem, transform_constants_size };
|
|
}
|
|
}
|
|
|
|
if (update_fragment_constants && !update_instruction_buffers)
|
|
{
|
|
check_heap_status(VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE);
|
|
|
|
// Fragment constants
|
|
if (fragment_constants_size)
|
|
{
|
|
auto mem = m_fragment_constants_ring_info.alloc<256>(fragment_constants_size);
|
|
auto buf = m_fragment_constants_ring_info.map(mem, fragment_constants_size);
|
|
|
|
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), fragment_constants_size },
|
|
*ensure(m_fragment_prog), current_fragment_program, true);
|
|
|
|
m_fragment_constants_ring_info.unmap();
|
|
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, mem, fragment_constants_size };
|
|
}
|
|
else
|
|
{
|
|
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, 0, 32 };
|
|
}
|
|
}
|
|
|
|
if (update_fragment_env)
|
|
{
|
|
check_heap_status(VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE);
|
|
|
|
auto mem = m_fragment_env_ring_info.alloc<256>(256);
|
|
auto buf = m_fragment_env_ring_info.map(mem, 32);
|
|
|
|
fill_fragment_state_buffer(buf, current_fragment_program);
|
|
m_fragment_env_ring_info.unmap();
|
|
m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, mem, 32 };
|
|
}
|
|
|
|
if (update_fragment_texture_env)
|
|
{
|
|
check_heap_status(VK_HEAP_CHECK_TEXTURE_ENV_STORAGE);
|
|
|
|
auto mem = m_fragment_texture_params_ring_info.alloc<256>(512);
|
|
auto buf = m_fragment_texture_params_ring_info.map(mem, 512);
|
|
|
|
current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask);
|
|
m_fragment_texture_params_ring_info.unmap();
|
|
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 512 };
|
|
}
|
|
|
|
if (update_raster_env)
|
|
{
|
|
check_heap_status(VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE);
|
|
|
|
auto mem = m_raster_env_ring_info.alloc<256>(256);
|
|
auto buf = m_raster_env_ring_info.map(mem, 128);
|
|
|
|
std::memcpy(buf, rsx::method_registers.polygon_stipple_pattern(), 128);
|
|
m_raster_env_ring_info.unmap();
|
|
m_raster_env_buffer_info = { m_raster_env_ring_info.heap->value, mem, 128 };
|
|
|
|
m_graphics_state &= ~(rsx::pipeline_state::polygon_stipple_pattern_dirty);
|
|
}
|
|
|
|
if (update_instruction_buffers)
|
|
{
|
|
if (m_interpreter_state & rsx::vertex_program_dirty)
|
|
{
|
|
// Attach vertex buffer data
|
|
const auto vp_block_length = current_vp_metadata.ucode_length + 16;
|
|
auto vp_mapping = m_vertex_instructions_buffer.alloc<256>(vp_block_length);
|
|
auto vp_buf = static_cast<u8*>(m_vertex_instructions_buffer.map(vp_mapping, vp_block_length));
|
|
|
|
auto vp_config = reinterpret_cast<u32*>(vp_buf);
|
|
vp_config[0] = current_vertex_program.base_address;
|
|
vp_config[1] = current_vertex_program.entry;
|
|
vp_config[2] = current_vertex_program.output_mask;
|
|
vp_config[3] = rsx::method_registers.two_side_light_en()? 1u: 0u;
|
|
|
|
std::memcpy(vp_buf + 16, current_vertex_program.data.data(), current_vp_metadata.ucode_length);
|
|
m_vertex_instructions_buffer.unmap();
|
|
|
|
m_vertex_instructions_buffer_info = { m_vertex_instructions_buffer.heap->value, vp_mapping, vp_block_length };
|
|
}
|
|
|
|
if (m_interpreter_state & rsx::fragment_program_dirty)
|
|
{
|
|
// Attach fragment buffer data
|
|
const auto fp_block_length = current_fp_metadata.program_ucode_length + 16;
|
|
auto fp_mapping = m_fragment_instructions_buffer.alloc<256>(fp_block_length);
|
|
auto fp_buf = static_cast<u8*>(m_fragment_instructions_buffer.map(fp_mapping, fp_block_length));
|
|
|
|
// Control mask
|
|
const auto control_masks = reinterpret_cast<u32*>(fp_buf);
|
|
control_masks[0] = rsx::method_registers.shader_control();
|
|
control_masks[1] = current_fragment_program.texture_state.texture_dimensions;
|
|
|
|
std::memcpy(fp_buf + 16, current_fragment_program.get_data(), current_fragment_program.ucode_length);
|
|
m_fragment_instructions_buffer.unmap();
|
|
|
|
m_fragment_instructions_buffer_info = { m_fragment_instructions_buffer.heap->value, fp_mapping, fp_block_length };
|
|
}
|
|
}
|
|
|
|
const auto& binding_table = m_device->get_pipeline_binding_table();
|
|
|
|
m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot, m_current_frame->descriptor_set);
|
|
m_program->bind_uniform(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, m_current_frame->descriptor_set);
|
|
m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot, m_current_frame->descriptor_set);
|
|
m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot, m_current_frame->descriptor_set);
|
|
m_program->bind_uniform(m_raster_env_buffer_info, binding_table.rasterizer_env_bind_slot, m_current_frame->descriptor_set);
|
|
|
|
if (!m_shader_interpreter.is_interpreter(m_program))
|
|
{
|
|
m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot, m_current_frame->descriptor_set);
|
|
}
|
|
else
|
|
{
|
|
m_program->bind_buffer(m_vertex_instructions_buffer_info, m_shader_interpreter.get_vertex_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
|
m_program->bind_buffer(m_fragment_instructions_buffer_info, m_shader_interpreter.get_fragment_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
|
}
|
|
|
|
if (vk::emulate_conditional_rendering())
|
|
{
|
|
auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer(*m_current_command_buffer, 4)->value;
|
|
m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
|
|
}
|
|
|
|
//Clear flags
|
|
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty);
|
|
m_graphics_state &= ~handled_flags;
|
|
}
|
|
|
|
void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_info)
|
|
{
|
|
// Actual allocation must have been done previously
|
|
u32 base_offset;
|
|
const u32 offset32 = static_cast<u32>(m_vertex_layout_stream_info.offset);
|
|
const u32 range32 = static_cast<u32>(m_vertex_layout_stream_info.range);
|
|
|
|
if (!m_vertex_layout_storage || !m_vertex_layout_storage->in_range(offset32, range32, base_offset))
|
|
{
|
|
ensure(m_texbuffer_view_size >= m_vertex_layout_stream_info.range);
|
|
|
|
if (m_vertex_layout_storage)
|
|
m_current_frame->buffer_views_to_clean.push_back(std::move(m_vertex_layout_storage));
|
|
|
|
const usz alloc_addr = m_vertex_layout_stream_info.offset;
|
|
const usz view_size = (alloc_addr + m_texbuffer_view_size) > m_vertex_layout_ring_info.size() ? m_vertex_layout_ring_info.size() - alloc_addr : m_texbuffer_view_size;
|
|
m_vertex_layout_storage = std::make_unique<vk::buffer_view>(*m_device, m_vertex_layout_ring_info.heap->value, VK_FORMAT_R32G32_UINT, alloc_addr, view_size);
|
|
base_offset = 0;
|
|
}
|
|
|
|
u8 data_size = 16;
|
|
u32 draw_info[5];
|
|
|
|
draw_info[0] = vertex_info.vertex_index_base;
|
|
draw_info[1] = vertex_info.vertex_index_offset;
|
|
draw_info[2] = id;
|
|
draw_info[3] = (id * 16) + (base_offset / 8);
|
|
|
|
if (vk::emulate_conditional_rendering())
|
|
{
|
|
draw_info[4] = cond_render_ctrl.hw_cond_active ? 1 : 0;
|
|
data_size = 20;
|
|
}
|
|
|
|
vkCmdPushConstants(*m_current_command_buffer, pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, data_size, draw_info);
|
|
|
|
const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset;
|
|
auto dst = m_vertex_layout_ring_info.map(data_offset, 128);
|
|
|
|
fill_vertex_layout_state(m_vertex_layout, vertex_info.first_vertex, vertex_info.allocated_vertex_count, static_cast<s32*>(dst),
|
|
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
|
|
|
|
m_vertex_layout_ring_info.unmap();
|
|
}
|
|
|
|
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool)
|
|
{
|
|
prepare_rtts(context);
|
|
}
|
|
|
|
void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags)
|
|
{
|
|
ensure(!m_queue_status.test_and_set(flush_queue_state::flushing));
|
|
|
|
// Workaround for deadlock occuring during RSX offloader fault
|
|
// TODO: Restructure command submission infrastructure to avoid this condition
|
|
const bool sync_success = g_fxo->get<rsx::dma_manager>().sync();
|
|
const VkBool32 force_flush = !sync_success;
|
|
|
|
if (vk::test_status_interrupt(vk::heap_dirty))
|
|
{
|
|
if (m_attrib_ring_info.is_dirty() ||
|
|
m_fragment_env_ring_info.is_dirty() ||
|
|
m_vertex_env_ring_info.is_dirty() ||
|
|
m_fragment_texture_params_ring_info.is_dirty() ||
|
|
m_vertex_layout_ring_info.is_dirty() ||
|
|
m_fragment_constants_ring_info.is_dirty() ||
|
|
m_index_buffer_ring_info.is_dirty() ||
|
|
m_transform_constants_ring_info.is_dirty() ||
|
|
m_texture_upload_buffer_ring_info.is_dirty() ||
|
|
m_raster_env_ring_info.is_dirty())
|
|
{
|
|
auto secondary_command_buffer = m_secondary_cb_list.next();
|
|
secondary_command_buffer->begin();
|
|
|
|
m_attrib_ring_info.sync(*secondary_command_buffer);
|
|
m_fragment_env_ring_info.sync(*secondary_command_buffer);
|
|
m_vertex_env_ring_info.sync(*secondary_command_buffer);
|
|
m_fragment_texture_params_ring_info.sync(*secondary_command_buffer);
|
|
m_vertex_layout_ring_info.sync(*secondary_command_buffer);
|
|
m_fragment_constants_ring_info.sync(*secondary_command_buffer);
|
|
m_index_buffer_ring_info.sync(*secondary_command_buffer);
|
|
m_transform_constants_ring_info.sync(*secondary_command_buffer);
|
|
m_texture_upload_buffer_ring_info.sync(*secondary_command_buffer);
|
|
m_raster_env_ring_info.sync(*secondary_command_buffer);
|
|
|
|
secondary_command_buffer->end();
|
|
|
|
vk::queue_submit_t submit_info{ m_device->get_graphics_queue(), nullptr };
|
|
secondary_command_buffer->submit(submit_info, force_flush);
|
|
}
|
|
|
|
vk::clear_status_interrupt(vk::heap_dirty);
|
|
}
|
|
|
|
#if 0 // Currently unreachable
|
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render)
|
|
{
|
|
ensure(m_render_pass_open);
|
|
m_device->_vkCmdEndConditionalRenderingEXT(*m_current_command_buffer);
|
|
}
|
|
#endif
|
|
|
|
// End any active renderpasses; the caller should handle reopening
|
|
if (vk::is_renderpass_open(*m_current_command_buffer))
|
|
{
|
|
close_render_pass();
|
|
}
|
|
|
|
// End open queries. Flags will be automatically reset by the submit routine
|
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
|
|
{
|
|
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
|
|
m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query);
|
|
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
|
|
}
|
|
|
|
if (m_host_data_ptr && m_host_data_ptr->last_label_release_event > m_host_data_ptr->last_label_submit_event)
|
|
{
|
|
vkCmdUpdateBuffer(*m_current_command_buffer,
|
|
m_host_object_data->value,
|
|
::offset32(&vk::host_data_t::commands_complete_event),
|
|
sizeof(u64),
|
|
const_cast<u64*>(&m_host_data_ptr->last_label_release_event));
|
|
|
|
m_host_data_ptr->last_label_submit_event = m_host_data_ptr->last_label_release_event;
|
|
}
|
|
|
|
m_current_command_buffer->end();
|
|
m_current_command_buffer->tag();
|
|
|
|
// Supporting concurrent access vastly simplifies this logic.
|
|
// Instead of doing CB slice injection, we can just chain these together logically with the async stream going first
|
|
vk::queue_submit_t primary_submit_info{ m_device->get_graphics_queue(), pFence };
|
|
vk::queue_submit_t secondary_submit_info{};
|
|
|
|
if (wait_semaphore)
|
|
{
|
|
primary_submit_info.wait_on(wait_semaphore, pipeline_stage_flags);
|
|
}
|
|
|
|
auto& async_scheduler = g_fxo->get<vk::AsyncTaskScheduler>();
|
|
if (async_scheduler.is_recording())
|
|
{
|
|
if (async_scheduler.is_host_mode())
|
|
{
|
|
const VkSemaphore async_sema = *async_scheduler.get_sema();
|
|
secondary_submit_info.queue_signal(async_sema);
|
|
primary_submit_info.wait_on(async_sema, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
|
|
|
|
// Delay object destruction by one cycle
|
|
vk::get_resource_manager()->push_down_current_scope();
|
|
}
|
|
|
|
async_scheduler.flush(secondary_submit_info, force_flush);
|
|
}
|
|
|
|
if (signal_semaphore)
|
|
{
|
|
primary_submit_info.queue_signal(signal_semaphore);
|
|
}
|
|
|
|
m_current_command_buffer->submit(primary_submit_info, force_flush);
|
|
|
|
m_queue_status.clear(flush_queue_state::flushing);
|
|
}
|
|
|
|
void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|
{
|
|
const bool clipped_scissor = (context == rsx::framebuffer_creation_context::context_draw);
|
|
if (m_current_framebuffer_context == context && !m_rtts_dirty && m_draw_fbo)
|
|
{
|
|
// Fast path
|
|
// Framebuffer usage has not changed, framebuffer exists and config regs have not changed
|
|
set_scissor(clipped_scissor);
|
|
return;
|
|
}
|
|
|
|
m_rtts_dirty = false;
|
|
framebuffer_status_valid = false;
|
|
m_framebuffer_state_contested = false;
|
|
|
|
get_framebuffer_layout(context, m_framebuffer_layout);
|
|
if (!framebuffer_status_valid)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (m_draw_fbo && m_framebuffer_layout.ignore_change)
|
|
{
|
|
// Nothing has changed, we're still using the same framebuffer
|
|
// Update flags to match current
|
|
set_scissor(clipped_scissor);
|
|
return;
|
|
}
|
|
|
|
m_rtts.prepare_render_target(*m_current_command_buffer,
|
|
m_framebuffer_layout.color_format, m_framebuffer_layout.depth_format,
|
|
m_framebuffer_layout.width, m_framebuffer_layout.height,
|
|
m_framebuffer_layout.target, m_framebuffer_layout.aa_mode, m_framebuffer_layout.raster_type,
|
|
m_framebuffer_layout.color_addresses, m_framebuffer_layout.zeta_address,
|
|
m_framebuffer_layout.actual_color_pitch, m_framebuffer_layout.actual_zeta_pitch,
|
|
(*m_device), *m_current_command_buffer);
|
|
|
|
// Reset framebuffer information
|
|
const auto color_bpp = get_format_block_size_in_bytes(m_framebuffer_layout.color_format);
|
|
const auto samples = get_format_sample_count(m_framebuffer_layout.aa_mode);
|
|
|
|
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
|
|
{
|
|
// Flush old address if we keep missing it
|
|
if (m_surface_info[i].pitch && g_cfg.video.write_color_buffers)
|
|
{
|
|
const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
|
|
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
|
|
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range);
|
|
}
|
|
|
|
m_surface_info[i].address = m_surface_info[i].pitch = 0;
|
|
m_surface_info[i].width = m_framebuffer_layout.width;
|
|
m_surface_info[i].height = m_framebuffer_layout.height;
|
|
m_surface_info[i].color_format = m_framebuffer_layout.color_format;
|
|
m_surface_info[i].bpp = color_bpp;
|
|
m_surface_info[i].samples = samples;
|
|
}
|
|
|
|
//Process depth surface as well
|
|
{
|
|
if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer)
|
|
{
|
|
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
|
|
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
|
|
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range);
|
|
}
|
|
|
|
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
|
|
m_depth_surface_info.width = m_framebuffer_layout.width;
|
|
m_depth_surface_info.height = m_framebuffer_layout.height;
|
|
m_depth_surface_info.depth_format = m_framebuffer_layout.depth_format;
|
|
m_depth_surface_info.bpp = get_format_block_size_in_bytes(m_framebuffer_layout.depth_format);
|
|
m_depth_surface_info.samples = samples;
|
|
}
|
|
|
|
//Bind created rtts as current fbo...
|
|
const auto draw_buffers = rsx::utility::get_rtt_indexes(m_framebuffer_layout.target);
|
|
m_draw_buffers.clear();
|
|
m_fbo_images.clear();
|
|
|
|
for (u8 index : draw_buffers)
|
|
{
|
|
if (auto surface = std::get<1>(m_rtts.m_bound_render_targets[index]))
|
|
{
|
|
m_fbo_images.push_back(surface);
|
|
|
|
m_surface_info[index].address = m_framebuffer_layout.color_addresses[index];
|
|
m_surface_info[index].pitch = m_framebuffer_layout.actual_color_pitch[index];
|
|
ensure(surface->rsx_pitch == m_framebuffer_layout.actual_color_pitch[index]);
|
|
|
|
m_texture_cache.notify_surface_changed(m_surface_info[index].get_memory_range(m_framebuffer_layout.aa_factors));
|
|
m_draw_buffers.push_back(index);
|
|
}
|
|
}
|
|
|
|
if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0)
|
|
{
|
|
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
|
m_fbo_images.push_back(ds);
|
|
|
|
m_depth_surface_info.address = m_framebuffer_layout.zeta_address;
|
|
m_depth_surface_info.pitch = m_framebuffer_layout.actual_zeta_pitch;
|
|
ensure(ds->rsx_pitch == m_framebuffer_layout.actual_zeta_pitch);
|
|
|
|
m_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(m_framebuffer_layout.aa_factors));
|
|
}
|
|
|
|
// Before messing with memory properties, flush command queue if there are dma transfers queued up
|
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
|
|
{
|
|
flush_command_queue();
|
|
}
|
|
|
|
if (!m_rtts.superseded_surfaces.empty())
|
|
{
|
|
for (auto& surface : m_rtts.superseded_surfaces)
|
|
{
|
|
m_texture_cache.discard_framebuffer_memory_region(*m_current_command_buffer, surface->get_memory_range());
|
|
}
|
|
|
|
m_rtts.superseded_surfaces.clear();
|
|
}
|
|
|
|
const auto color_fmt_info = get_compatible_gcm_format(m_framebuffer_layout.color_format);
|
|
for (u8 index : m_draw_buffers)
|
|
{
|
|
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
|
|
|
|
const utils::address_range surface_range = m_surface_info[index].get_memory_range();
|
|
if (g_cfg.video.write_color_buffers)
|
|
{
|
|
m_texture_cache.lock_memory_region(
|
|
*m_current_command_buffer, m_rtts.m_bound_render_targets[index].second, surface_range, true,
|
|
m_surface_info[index].width, m_surface_info[index].height, m_framebuffer_layout.actual_color_pitch[index],
|
|
color_fmt_info.first, color_fmt_info.second);
|
|
}
|
|
else
|
|
{
|
|
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range);
|
|
}
|
|
}
|
|
|
|
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
|
{
|
|
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
|
|
if (g_cfg.video.write_depth_buffer)
|
|
{
|
|
const u32 gcm_format = (m_depth_surface_info.depth_format == rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
|
m_texture_cache.lock_memory_region(
|
|
*m_current_command_buffer, m_rtts.m_bound_depth_stencil.second, surface_range, true,
|
|
m_depth_surface_info.width, m_depth_surface_info.height, m_framebuffer_layout.actual_zeta_pitch, gcm_format, true);
|
|
}
|
|
else
|
|
{
|
|
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range);
|
|
}
|
|
}
|
|
|
|
if (!m_rtts.orphaned_surfaces.empty())
|
|
{
|
|
u32 gcm_format;
|
|
bool swap_bytes;
|
|
|
|
for (auto& surface : m_rtts.orphaned_surfaces)
|
|
{
|
|
const bool lock = surface->is_depth_surface() ? !!g_cfg.video.write_depth_buffer :
|
|
!!g_cfg.video.write_color_buffers;
|
|
|
|
if (!lock) [[likely]]
|
|
{
|
|
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface->get_memory_range());
|
|
continue;
|
|
}
|
|
|
|
if (surface->is_depth_surface())
|
|
{
|
|
gcm_format = (surface->get_surface_depth_format() != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
|
swap_bytes = true;
|
|
}
|
|
else
|
|
{
|
|
auto info = get_compatible_gcm_format(surface->get_surface_color_format());
|
|
gcm_format = info.first;
|
|
swap_bytes = info.second;
|
|
}
|
|
|
|
m_texture_cache.lock_memory_region(
|
|
*m_current_command_buffer, surface, surface->get_memory_range(), false,
|
|
surface->get_surface_width<rsx::surface_metrics::pixels>(), surface->get_surface_height<rsx::surface_metrics::pixels>(), surface->get_rsx_pitch(),
|
|
gcm_format, swap_bytes);
|
|
}
|
|
|
|
m_rtts.orphaned_surfaces.clear();
|
|
}
|
|
|
|
m_current_renderpass_key = vk::get_renderpass_key(m_fbo_images);
|
|
m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key);
|
|
|
|
// Search old framebuffers for this same configuration
|
|
const auto [fbo_width, fbo_height] = rsx::apply_resolution_scale<true>(m_framebuffer_layout.width, m_framebuffer_layout.height);
|
|
|
|
if (m_draw_fbo)
|
|
{
|
|
// Release old ref
|
|
m_draw_fbo->release();
|
|
}
|
|
|
|
m_draw_fbo = vk::get_framebuffer(*m_device, fbo_width, fbo_height, VK_FALSE, m_cached_renderpass, m_fbo_images);
|
|
m_draw_fbo->add_ref();
|
|
|
|
set_viewport();
|
|
set_scissor(clipped_scissor);
|
|
|
|
check_zcull_status(true);
|
|
}
|
|
|
|
void VKGSRender::renderctl(u32 request_code, void* args)
|
|
{
|
|
switch (request_code)
|
|
{
|
|
case vk::rctrl_queue_submit:
|
|
{
|
|
const auto packet = reinterpret_cast<vk::queue_submit_t*>(args);
|
|
vk::queue_submit(packet);
|
|
free(packet);
|
|
break;
|
|
}
|
|
case vk::rctrl_run_gc:
|
|
{
|
|
auto eid = reinterpret_cast<u64>(args);
|
|
vk::on_event_completed(eid, true);
|
|
break;
|
|
}
|
|
default:
|
|
fmt::throw_exception("Unhandled request code 0x%x", request_code);
|
|
}
|
|
}
|
|
|
|
bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
|
|
{
|
|
if (swapchain_unavailable)
|
|
return false;
|
|
|
|
// Verify enough memory exists before attempting to handle data transfer
|
|
check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE);
|
|
|
|
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
|
|
{
|
|
m_samplers_dirty.store(true);
|
|
m_current_command_buffer->set_flag(vk::command_buffer::cb_has_blit_transfer);
|
|
|
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
|
|
{
|
|
// A dma transfer has been queued onto this cb
|
|
// This likely means that we're done with the tranfers to the target (writes_likely_completed=1)
|
|
flush_command_queue();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void VKGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
ensure(!m_occlusion_query_active);
|
|
|
|
query->result = 0;
|
|
//query->sync_timestamp = get_system_time();
|
|
m_active_query_info = query;
|
|
m_occlusion_query_active = true;
|
|
m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task;
|
|
}
|
|
|
|
void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
ensure(query == m_active_query_info);
|
|
|
|
// NOTE: flushing the queue is very expensive, do not flush just because query stopped
|
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
|
|
{
|
|
// End query
|
|
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
|
|
m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query);
|
|
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
|
|
}
|
|
|
|
// Clear occlusion load flag
|
|
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
|
|
|
|
m_occlusion_query_active = false;
|
|
m_active_query_info = nullptr;
|
|
}
|
|
|
|
bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
if (!query->num_draws)
|
|
return true;
|
|
|
|
auto &data = m_occlusion_map[query->driver_handle];
|
|
if (data.indices.empty())
|
|
return true;
|
|
|
|
if (data.is_current(m_current_command_buffer))
|
|
return false;
|
|
|
|
const u32 oldest = data.indices.front();
|
|
return m_occlusion_query_manager->check_query_status(oldest);
|
|
}
|
|
|
|
void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
auto &data = m_occlusion_map[query->driver_handle];
|
|
if (data.indices.empty())
|
|
return;
|
|
|
|
if (query->num_draws)
|
|
{
|
|
if (data.is_current(m_current_command_buffer))
|
|
{
|
|
std::lock_guard lock(m_flush_queue_mutex);
|
|
flush_command_queue();
|
|
|
|
if (m_flush_requests.pending())
|
|
{
|
|
m_flush_requests.clear_pending_flag();
|
|
}
|
|
|
|
rsx_log.warning("[Performance warning] Unexpected ZCULL read caused a hard sync");
|
|
busy_wait();
|
|
}
|
|
|
|
data.sync();
|
|
|
|
// Gather data
|
|
for (const auto occlusion_id : data.indices)
|
|
{
|
|
query->result += m_occlusion_query_manager->get_query_result(occlusion_id);
|
|
if (query->result && !g_cfg.video.precise_zpass_count)
|
|
{
|
|
// We only need one hit unless precise zcull is requested
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
m_occlusion_query_manager->free_queries(*m_current_command_buffer, data.indices);
|
|
data.indices.clear();
|
|
}
|
|
|
|
void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
if (m_active_query_info == query)
|
|
{
|
|
end_occlusion_query(query);
|
|
}
|
|
|
|
auto &data = m_occlusion_map[query->driver_handle];
|
|
if (data.indices.empty())
|
|
return;
|
|
|
|
m_occlusion_query_manager->free_queries(*m_current_command_buffer, data.indices);
|
|
data.indices.clear();
|
|
}
|
|
|
|
void VKGSRender::emergency_query_cleanup(vk::command_buffer* commands)
|
|
{
|
|
ensure(commands == static_cast<vk::command_buffer*>(m_current_command_buffer));
|
|
|
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
|
|
{
|
|
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
|
|
m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query);
|
|
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
|
|
}
|
|
}
|
|
|
|
void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occlusion_query_info*>& sources)
|
|
{
|
|
ensure(!sources.empty());
|
|
|
|
// Flag check whether to calculate all entries or only one
|
|
bool partial_eval;
|
|
|
|
// Try and avoid regenerating the data if its a repeat/spam
|
|
// NOTE: The incoming list is reversed with the first entry being the newest
|
|
if (m_cond_render_sync_tag == sources.front()->sync_tag)
|
|
{
|
|
// Already synched, check subdraw which is possible if last sync happened while query was active
|
|
if (!m_active_query_info || m_active_query_info != sources.front())
|
|
{
|
|
rsx::thread::begin_conditional_rendering(sources);
|
|
return;
|
|
}
|
|
|
|
// Partial evaluation only
|
|
partial_eval = true;
|
|
}
|
|
else
|
|
{
|
|
m_cond_render_sync_tag = sources.front()->sync_tag;
|
|
partial_eval = false;
|
|
}
|
|
|
|
// Time to aggregate
|
|
if (!m_cond_render_buffer)
|
|
{
|
|
auto& memory_props = m_device->get_memory_mapping();
|
|
auto usage_flags = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
|
|
|
if (m_device->get_conditional_render_support())
|
|
{
|
|
usage_flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT;
|
|
}
|
|
|
|
m_cond_render_buffer = std::make_unique<vk::buffer>(
|
|
*m_device, 4,
|
|
memory_props.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
|
usage_flags, 0, VMM_ALLOCATION_POOL_UNDEFINED);
|
|
}
|
|
|
|
VkPipelineStageFlags dst_stage;
|
|
VkAccessFlags dst_access;
|
|
u32 dst_offset = 0;
|
|
u32 num_hw_queries = 0;
|
|
usz first = 0;
|
|
usz last = (!partial_eval) ? sources.size() : 1;
|
|
|
|
// Count number of queries available. This is an "opening" evaluation, if there is only one source, read it as-is.
|
|
// The idea is to avoid scheduling a compute task unless we have to.
|
|
for (usz i = first; i < last; ++i)
|
|
{
|
|
auto& query_info = m_occlusion_map[sources[i]->driver_handle];
|
|
num_hw_queries += ::size32(query_info.indices);
|
|
}
|
|
|
|
if (m_device->get_conditional_render_support())
|
|
{
|
|
dst_stage = VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT;
|
|
dst_access = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT;
|
|
}
|
|
else
|
|
{
|
|
dst_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
|
|
dst_access = VK_ACCESS_SHADER_READ_BIT;
|
|
}
|
|
|
|
if (num_hw_queries == 1 && !partial_eval) [[ likely ]]
|
|
{
|
|
// Accept the first available query handle as the source of truth. No aggregation is required.
|
|
for (usz i = first; i < last; ++i)
|
|
{
|
|
auto& query_info = m_occlusion_map[sources[i]->driver_handle];
|
|
if (!query_info.indices.empty())
|
|
{
|
|
const auto& index = query_info.indices.front();
|
|
m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0);
|
|
|
|
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT, dst_access);
|
|
|
|
rsx::thread::begin_conditional_rendering(sources);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// This is unreachable unless something went horribly wrong
|
|
fmt::throw_exception("Unreachable");
|
|
}
|
|
else if (num_hw_queries > 0)
|
|
{
|
|
// We'll need to do some result aggregation using a compute shader.
|
|
auto scratch = vk::get_scratch_buffer(*m_current_command_buffer, num_hw_queries * 4);
|
|
for (usz i = first; i < last; ++i)
|
|
{
|
|
auto& query_info = m_occlusion_map[sources[i]->driver_handle];
|
|
for (const auto& index : query_info.indices)
|
|
{
|
|
m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, scratch->value, dst_offset);
|
|
dst_offset += 4;
|
|
}
|
|
}
|
|
|
|
// Sanity check
|
|
ensure(dst_offset <= scratch->size());
|
|
|
|
if (!partial_eval)
|
|
{
|
|
// Fast path should have been caught above
|
|
ensure(dst_offset > 4);
|
|
|
|
// Clear result to zero
|
|
vkCmdFillBuffer(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, 0);
|
|
|
|
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT);
|
|
}
|
|
|
|
vk::insert_buffer_memory_barrier(*m_current_command_buffer, scratch->value, 0, dst_offset,
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
|
|
|
vk::get_compute_task<vk::cs_aggregator>()->run(*m_current_command_buffer, m_cond_render_buffer.get(), scratch, dst_offset / 4);
|
|
|
|
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
|
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dst_stage,
|
|
VK_ACCESS_SHADER_WRITE_BIT, dst_access);
|
|
}
|
|
else if (m_program)
|
|
{
|
|
// This can sometimes happen when shaders are compiling, only log if there is a program hit
|
|
rsx_log.warning("Dubious query data pushed to cond render!, Please report to developers(q.pending=%d)", sources.front()->pending);
|
|
}
|
|
|
|
rsx::thread::begin_conditional_rendering(sources);
|
|
}
|
|
|
|
void VKGSRender::end_conditional_rendering()
|
|
{
|
|
thread::end_conditional_rendering();
|
|
}
|