rpcsx/rpcs3/Emu/RSX/VK/vkutils/device.cpp
Talkashie dabb2cc9a0
Fix typos, improve consistency
Fixes typos where spelling or grammar is objectively wrong.
Changes wording and capitalization in some areas to be more consistent with other areas.
2023-07-28 13:09:06 +03:00

1000 lines
37 KiB
C++

#include "device.h"
#include "instance.hpp"
#include "util/logs.hpp"
#include "Emu/system_config.h"
#ifdef __APPLE__
#include <MoltenVK/mvk_config.h>
#endif
namespace vk
{
// Global shared render device
const render_device* g_render_device = nullptr;
void physical_device::get_physical_device_features(bool allow_extensions)
{
if (!allow_extensions)
{
vkGetPhysicalDeviceFeatures(dev, &features);
return;
}
supported_extensions instance_extensions(supported_extensions::instance);
supported_extensions device_extensions(supported_extensions::device, nullptr, dev);
if (!instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME))
{
vkGetPhysicalDeviceFeatures(dev, &features);
}
else
{
VkPhysicalDeviceFeatures2KHR features2;
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = nullptr;
VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{};
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_info{};
VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT fbo_loops_info{};
VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR shader_barycentric_info{};
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_color_info{};
if (device_extensions.is_supported(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME))
{
shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
features2.pNext = &shader_support_info;
}
if (device_extensions.is_supported(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME))
{
driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR;
driver_properties.pNext = features2.pNext;
features2.pNext = &driver_properties;
}
if (device_extensions.is_supported(VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME))
{
descriptor_indexing_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
descriptor_indexing_info.pNext = features2.pNext;
features2.pNext = &descriptor_indexing_info;
descriptor_indexing_support = true;
}
if (device_extensions.is_supported(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME))
{
fbo_loops_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT;
fbo_loops_info.pNext = features2.pNext;
features2.pNext = &fbo_loops_info;
}
if (device_extensions.is_supported(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME))
{
shader_barycentric_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR;
shader_barycentric_info.pNext = features2.pNext;
features2.pNext = &shader_barycentric_info;
}
if (device_extensions.is_supported(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME))
{
custom_border_color_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
custom_border_color_info.pNext = features2.pNext;
features2.pNext = &custom_border_color_info;
}
auto _vkGetPhysicalDeviceFeatures2KHR = reinterpret_cast<PFN_vkGetPhysicalDeviceFeatures2KHR>(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceFeatures2KHR"));
ensure(_vkGetPhysicalDeviceFeatures2KHR); // "vkGetInstanceProcAddress failed to find entry point!"
_vkGetPhysicalDeviceFeatures2KHR(dev, &features2);
shader_types_support.allow_float64 = !!features2.features.shaderFloat64;
shader_types_support.allow_float16 = !!shader_support_info.shaderFloat16;
shader_types_support.allow_int8 = !!shader_support_info.shaderInt8;
optional_features_support.custom_border_color = !!custom_border_color_info.customBorderColors && !!custom_border_color_info.customBorderColorWithoutFormat;
optional_features_support.barycentric_coords = !!shader_barycentric_info.fragmentShaderBarycentric;
optional_features_support.framebuffer_loops = !!fbo_loops_info.attachmentFeedbackLoopLayout;
features = features2.features;
if (descriptor_indexing_support)
{
#define SET_DESCRIPTOR_BITFLAG(field, bit) if (descriptor_indexing_info.field) descriptor_indexing_support.update_after_bind_mask |= (1ull << bit)
SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
#undef SET_DESCRIPTOR_BITFLAG
}
}
optional_features_support.shader_stencil_export = device_extensions.is_supported(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
optional_features_support.conditional_rendering = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
optional_features_support.external_memory_host = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
optional_features_support.sampler_mirror_clamped = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
optional_features_support.synchronization_2 = device_extensions.is_supported(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
optional_features_support.unrestricted_depth_range = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
optional_features_support.debug_utils = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
optional_features_support.surface_capabilities_2 = instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
}
void physical_device::get_physical_device_properties(bool allow_extensions)
{
vkGetPhysicalDeviceMemoryProperties(dev, &memory_properties);
if (!allow_extensions)
{
vkGetPhysicalDeviceProperties(dev, &props);
return;
}
supported_extensions instance_extensions(supported_extensions::instance);
if (!instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME))
{
vkGetPhysicalDeviceProperties(dev, &props);
}
else
{
VkPhysicalDeviceProperties2KHR properties2;
properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
properties2.pNext = nullptr;
VkPhysicalDeviceDescriptorIndexingPropertiesEXT descriptor_indexing_props{};
if (descriptor_indexing_support)
{
descriptor_indexing_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT;
descriptor_indexing_props.pNext = properties2.pNext;
properties2.pNext = &descriptor_indexing_props;
}
auto _vkGetPhysicalDeviceProperties2KHR = reinterpret_cast<PFN_vkGetPhysicalDeviceProperties2KHR>(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceProperties2KHR"));
ensure(_vkGetPhysicalDeviceProperties2KHR);
_vkGetPhysicalDeviceProperties2KHR(dev, &properties2);
props = properties2.properties;
#ifdef __APPLE__
if (instance_extensions.is_supported(VK_MVK_MOLTENVK_EXTENSION_NAME))
{
MVKConfiguration mvk_config = {};
size_t mvk_config_size = sizeof(MVKConfiguration);
PFN_vkGetMoltenVKConfigurationMVK _vkGetMoltenVKConfigurationMVK = nullptr;
_vkGetMoltenVKConfigurationMVK = reinterpret_cast<PFN_vkGetMoltenVKConfigurationMVK>(vkGetInstanceProcAddr(parent, "vkGetMoltenVKConfigurationMVK"));
ensure(_vkGetMoltenVKConfigurationMVK);
PFN_vkSetMoltenVKConfigurationMVK _vkSetMoltenVKConfigurationMVK = nullptr;
_vkSetMoltenVKConfigurationMVK = reinterpret_cast<PFN_vkSetMoltenVKConfigurationMVK>(vkGetInstanceProcAddr(parent, "vkSetMoltenVKConfigurationMVK"));
ensure(_vkSetMoltenVKConfigurationMVK);
CHECK_RESULT_EX(_vkGetMoltenVKConfigurationMVK(VK_NULL_HANDLE, &mvk_config, &mvk_config_size), std::string("Could not get MoltenVK configuration."));
mvk_config.resumeLostDevice = true;
mvk_config.fastMathEnabled = g_cfg.video.disable_msl_fast_math.get() ? MVK_CONFIG_FAST_MATH_NEVER : MVK_CONFIG_FAST_MATH_ON_DEMAND;
CHECK_RESULT_EX(_vkSetMoltenVKConfigurationMVK(VK_NULL_HANDLE, &mvk_config, &mvk_config_size), std::string("Could not set MoltenVK configuration."));
}
else
{
rsx_log.error("Cannot set the MoltenVK configuration because VK_MVK_moltenvk is not supported.\nIf you're using MoltenVK through libvulkan, please manually set the appropriate environment variables instead.");
}
#endif
if (descriptor_indexing_support)
{
if (descriptor_indexing_props.maxUpdateAfterBindDescriptorsInAllPools < 800'000)
{
rsx_log.error("Physical device does not support enough descriptors for deferred updates to work effectively. Deferred updates are disabled.");
descriptor_indexing_support.update_after_bind_mask = 0;
}
else if (descriptor_indexing_props.maxUpdateAfterBindDescriptorsInAllPools < 2'000'000)
{
rsx_log.warning("Physical device reports a low amount of allowed deferred descriptor updates. Draw call threshold will be lowered accordingly.");
descriptor_max_draw_calls = 8192;
}
}
}
}
void physical_device::create(VkInstance context, VkPhysicalDevice pdev, bool allow_extensions)
{
dev = pdev;
parent = context;
get_physical_device_features(allow_extensions);
get_physical_device_properties(allow_extensions);
rsx_log.always()("Found Vulkan-compatible GPU: '%s' running on driver %s", get_name(), get_driver_version());
if (get_driver_vendor() == driver_vendor::RADV && get_name().find("LLVM 8.0.0") != umax)
{
// Serious driver bug causing black screens
// See https://bugs.freedesktop.org/show_bug.cgi?id=110970
rsx_log.fatal("RADV drivers have a major driver bug with LLVM 8.0.0 resulting in no visual output. Upgrade to LLVM version 8.0.1 or greater to avoid this issue.");
}
else if (get_driver_vendor() == driver_vendor::NVIDIA)
{
#ifdef _WIN32
// SPIRV bugs were fixed in 452.28 for windows
const u32 threshold_version = (452u << 22) | (28 << 14);
#else
// SPIRV bugs were fixed in 450.56 for linux/BSD
const u32 threshold_version = (450u << 22) | (56 << 14);
#endif
const auto current_version = props.driverVersion & ~0x3fffu; // Clear patch and revision fields
if (current_version < threshold_version)
{
rsx_log.error("Your current NVIDIA graphics driver version %s has known issues and is unsupported. Update to the latest NVIDIA driver.", get_driver_version());
}
}
if (get_chip_class() == chip_class::AMD_vega)
{
// Disable fp16 if driver uses LLVM emitter. It does fine with AMD proprietary drivers though.
shader_types_support.allow_float16 = (driver_properties.driverID == VK_DRIVER_ID_AMD_PROPRIETARY_KHR);
}
}
std::string physical_device::get_name() const
{
return props.deviceName;
}
driver_vendor physical_device::get_driver_vendor() const
{
#ifdef __APPLE__
// moltenVK currently returns DRIVER_ID_MOLTENVK (0).
// For now, assume the vendor is moltenVK on Apple devices.
return driver_vendor::MVK;
#endif
if (!driver_properties.driverID)
{
const auto gpu_name = get_name();
if (gpu_name.find("Radeon") != umax)
{
return driver_vendor::AMD;
}
if (gpu_name.find("NVIDIA") != umax || gpu_name.find("GeForce") != umax || gpu_name.find("Quadro") != umax)
{
return driver_vendor::NVIDIA;
}
if (gpu_name.find("RADV") != umax)
{
return driver_vendor::RADV;
}
if (gpu_name.find("Intel") != umax)
{
#ifdef _WIN32
return driver_vendor::INTEL;
#else
return driver_vendor::ANV;
#endif
}
return driver_vendor::unknown;
}
else
{
switch (driver_properties.driverID)
{
case VK_DRIVER_ID_AMD_PROPRIETARY_KHR:
case VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR:
return driver_vendor::AMD;
case VK_DRIVER_ID_MESA_RADV_KHR:
return driver_vendor::RADV;
case VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR:
return driver_vendor::NVIDIA;
case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR:
return driver_vendor::INTEL;
case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR:
return driver_vendor::ANV;
default:
// Mobile?
return driver_vendor::unknown;
}
}
}
std::string physical_device::get_driver_version() const
{
switch (get_driver_vendor())
{
case driver_vendor::NVIDIA:
{
// 10 + 8 + 8 + 6
const auto major_version = props.driverVersion >> 22;
const auto minor_version = (props.driverVersion >> 14) & 0xff;
const auto patch = (props.driverVersion >> 6) & 0xff;
const auto revision = (props.driverVersion & 0x3f);
return fmt::format("%u.%u.%u.%u", major_version, minor_version, patch, revision);
}
default:
{
// 10 + 10 + 12 (standard vulkan encoding created with VK_MAKE_VERSION)
return fmt::format("%u.%u.%u", (props.driverVersion >> 22), (props.driverVersion >> 12) & 0x3ff, (props.driverVersion) & 0x3ff);
}
}
}
chip_class physical_device::get_chip_class() const
{
return get_chip_family(props.vendorID, props.deviceID);
}
u32 physical_device::get_queue_count() const
{
if (!queue_props.empty())
return ::size32(queue_props);
u32 count = 0;
vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, nullptr);
return count;
}
const VkQueueFamilyProperties& physical_device::get_queue_properties(u32 queue)
{
if (queue_props.empty())
{
u32 count = 0;
vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, nullptr);
queue_props.resize(count);
vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, queue_props.data());
}
if (queue >= queue_props.size())
fmt::throw_exception("Bad queue index passed to get_queue_properties (%u)", queue);
return queue_props[queue];
}
const VkPhysicalDeviceMemoryProperties& physical_device::get_memory_properties() const
{
return memory_properties;
}
const VkPhysicalDeviceLimits& physical_device::get_limits() const
{
return props.limits;
}
physical_device::operator VkPhysicalDevice() const
{
return dev;
}
physical_device::operator VkInstance() const
{
return parent;
}
// Render Device - The actual usable device
void render_device::create(vk::physical_device& pdev, u32 graphics_queue_idx, u32 present_queue_idx, u32 transfer_queue_idx)
{
std::string message_on_error;
float queue_priorities[1] = { 0.f };
pgpu = &pdev;
ensure(graphics_queue_idx == present_queue_idx || present_queue_idx == umax); // TODO
std::vector<VkDeviceQueueCreateInfo> device_queues;
auto& graphics_queue = device_queues.emplace_back();
graphics_queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
graphics_queue.pNext = NULL;
graphics_queue.flags = 0;
graphics_queue.queueFamilyIndex = graphics_queue_idx;
graphics_queue.queueCount = 1;
graphics_queue.pQueuePriorities = queue_priorities;
u32 transfer_queue_sub_index = 0;
if (transfer_queue_idx == umax)
{
// Transfer queue must be a valid device queue
rsx_log.warning("Dedicated transfer+compute queue was not found on this GPU. Will use graphics queue instead.");
transfer_queue_idx = graphics_queue_idx;
// Check if we can at least get a second graphics queue
if (pdev.get_queue_properties(graphics_queue_idx).queueCount > 1)
{
rsx_log.notice("Will use a spare graphics queue to push transfer operations.");
graphics_queue.queueCount++;
transfer_queue_sub_index = 1;
}
}
m_graphics_queue_family = graphics_queue_idx;
m_present_queue_family = present_queue_idx;
m_transfer_queue_family = transfer_queue_idx;
if (graphics_queue_idx != transfer_queue_idx)
{
auto& transfer_queue = device_queues.emplace_back();
transfer_queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
transfer_queue.pNext = NULL;
transfer_queue.flags = 0;
transfer_queue.queueFamilyIndex = transfer_queue_idx;
transfer_queue.queueCount = 1;
transfer_queue.pQueuePriorities = queue_priorities;
}
// Set up instance information
std::vector<const char*> requested_extensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME };
// Enable hardware features manually
// Currently we require:
// 1. Anisotropic sampling
// 2. DXT support
// 3. Indexable storage buffers
VkPhysicalDeviceFeatures enabled_features{};
if (pgpu->shader_types_support.allow_float16)
{
requested_extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (pgpu->optional_features_support.conditional_rendering)
{
requested_extensions.push_back(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
}
if (pgpu->optional_features_support.unrestricted_depth_range)
{
requested_extensions.push_back(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
}
if (pgpu->optional_features_support.external_memory_host)
{
requested_extensions.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
requested_extensions.push_back(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
}
if (pgpu->optional_features_support.shader_stencil_export)
{
requested_extensions.push_back(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
}
if (pgpu->optional_features_support.sampler_mirror_clamped)
{
requested_extensions.push_back(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
}
if (pgpu->descriptor_indexing_support)
{
requested_extensions.push_back(VK_KHR_MAINTENANCE3_EXTENSION_NAME);
requested_extensions.push_back(VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME);
}
if (pgpu->optional_features_support.framebuffer_loops)
{
requested_extensions.push_back(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME);
}
if (pgpu->optional_features_support.barycentric_coords)
{
requested_extensions.push_back(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
}
if (pgpu->optional_features_support.custom_border_color)
{
requested_extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
}
if (pgpu->optional_features_support.synchronization_2)
{
requested_extensions.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
}
enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
enabled_features.logicOp = VK_TRUE;
enabled_features.depthClamp = VK_TRUE;
enabled_features.depthBounds = VK_TRUE;
enabled_features.wideLines = VK_TRUE;
enabled_features.largePoints = VK_TRUE;
enabled_features.shaderFloat64 = VK_TRUE;
if (g_cfg.video.antialiasing_level != msaa_level::none)
{
// MSAA features
enabled_features.sampleRateShading = VK_TRUE;
enabled_features.alphaToOne = VK_TRUE;
enabled_features.shaderStorageImageMultisample = VK_TRUE;
// enabled_features.shaderStorageImageReadWithoutFormat = VK_TRUE; // Unused currently, may be needed soon
enabled_features.shaderStorageImageWriteWithoutFormat = VK_TRUE;
}
if (g_cfg.video.precise_zpass_count)
{
enabled_features.occlusionQueryPrecise = VK_TRUE;
}
// enabled_features.shaderSampledImageArrayDynamicIndexing = TRUE; // Unused currently but will be needed soon
enabled_features.shaderClipDistance = VK_TRUE;
// enabled_features.shaderCullDistance = VK_TRUE; // Alt notation of clip distance
enabled_features.samplerAnisotropy = VK_TRUE;
enabled_features.textureCompressionBC = VK_TRUE;
enabled_features.shaderStorageBufferArrayDynamicIndexing = VK_TRUE;
// Optionally disable unsupported stuff
if (!pgpu->features.shaderStorageImageMultisample || !pgpu->features.shaderStorageImageWriteWithoutFormat)
{
// Disable MSAA if any of these two features are unsupported
if (g_cfg.video.antialiasing_level != msaa_level::none)
{
rsx_log.error("Your GPU driver does not support some required MSAA features. MSAA will be disabled.");
g_cfg.video.antialiasing_level.set(msaa_level::none);
}
enabled_features.sampleRateShading = VK_FALSE;
enabled_features.alphaToOne = VK_FALSE;
enabled_features.shaderStorageImageMultisample = VK_FALSE;
enabled_features.shaderStorageImageWriteWithoutFormat = VK_FALSE;
}
if (!pgpu->features.shaderClipDistance)
{
rsx_log.error("Your GPU does not support shader clip distance. Graphics will not render correctly.");
enabled_features.shaderClipDistance = VK_FALSE;
}
if (!pgpu->features.shaderStorageBufferArrayDynamicIndexing)
{
rsx_log.error("Your GPU does not support shader storage buffer array dynamic indexing. Graphics will not render correctly.");
enabled_features.shaderStorageBufferArrayDynamicIndexing = VK_FALSE;
}
if (!pgpu->features.samplerAnisotropy)
{
rsx_log.error("Your GPU does not support anisotropic filtering. Graphics may not render correctly.");
enabled_features.samplerAnisotropy = VK_FALSE;
}
if (!pgpu->features.shaderFloat64)
{
rsx_log.error("Your GPU does not support double precision floats in shaders. Graphics may not render correctly.");
enabled_features.shaderFloat64 = VK_FALSE;
}
if (!pgpu->features.depthBounds)
{
rsx_log.error("Your GPU does not support depth bounds testing. Graphics may not render correctly.");
enabled_features.depthBounds = VK_FALSE;
}
if (!pgpu->features.wideLines)
{
rsx_log.error("Your GPU does not support wide lines. Graphics may not render correctly.");
enabled_features.wideLines = VK_FALSE;
}
if (!pgpu->features.sampleRateShading && enabled_features.sampleRateShading)
{
rsx_log.error("Your GPU does not support sample rate shading for multisampling. Graphics may be inaccurate when MSAA is enabled.");
enabled_features.sampleRateShading = VK_FALSE;
}
if (!pgpu->features.alphaToOne && enabled_features.alphaToOne)
{
// AMD proprietary drivers do not expose alphaToOne support
rsx_log.error("Your GPU does not support alpha-to-one for multisampling. Graphics may be inaccurate when MSAA is enabled.");
enabled_features.alphaToOne = VK_FALSE;
}
if (!pgpu->features.occlusionQueryPrecise && enabled_features.occlusionQueryPrecise)
{
rsx_log.error("Your GPU does not support precise occlusion queries. Graphics may not render correctly.");
enabled_features.occlusionQueryPrecise = VK_FALSE;
}
#ifdef __APPLE__
if (!pgpu->features.logicOp)
{
rsx_log.error("Your GPU does not support framebuffer logical operations. Graphics may not render correctly.");
enabled_features.logicOp = VK_FALSE;
}
#endif
if (pgpu->get_driver_vendor() == driver_vendor::ANV &&
pgpu->descriptor_indexing_support.update_after_bind_mask & (1 << VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER))
{
// Just disable robust access for now. I'll revisit after ARC launches.
rsx_log.error("Robust buffer access is broken when enabled with EXT_descriptor_indexing on ANV");
enabled_features.robustBufferAccess = VK_FALSE;
}
VkDeviceCreateInfo device = {};
device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
device.pNext = nullptr;
device.queueCreateInfoCount = ::size32(device_queues);
device.pQueueCreateInfos = device_queues.data();
device.enabledLayerCount = 0;
device.ppEnabledLayerNames = nullptr; // Deprecated
device.enabledExtensionCount = ::size32(requested_extensions);
device.ppEnabledExtensionNames = requested_extensions.data();
device.pEnabledFeatures = &enabled_features;
VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{};
if (pgpu->shader_types_support.allow_float16)
{
// Allow use of f16 type in shaders if possible
shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
shader_support_info.shaderFloat16 = VK_TRUE;
shader_support_info.pNext = const_cast<void*>(device.pNext);
device.pNext = &shader_support_info;
rsx_log.notice("GPU/driver supports float16 data types natively. Using native float16_t variables if possible.");
}
else
{
rsx_log.notice("GPU/driver lacks support for float16 data types. All float16_t arithmetic will be emulated with float32_t.");
}
VkPhysicalDeviceDescriptorIndexingFeatures indexing_features{};
if (pgpu->descriptor_indexing_support)
{
#define SET_DESCRIPTOR_BITFLAG(field, bit) if (pgpu->descriptor_indexing_support.update_after_bind_mask & (1ull << bit)) indexing_features.field = VK_TRUE
SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
#undef SET_DESCRIPTOR_BITFLAG
indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
indexing_features.pNext = const_cast<void*>(device.pNext);
device.pNext = &indexing_features;
}
VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT fbo_loop_features{};
if (pgpu->optional_features_support.framebuffer_loops)
{
fbo_loop_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT;
fbo_loop_features.attachmentFeedbackLoopLayout = VK_TRUE;
fbo_loop_features.pNext = const_cast<void*>(device.pNext);
device.pNext = &fbo_loop_features;
}
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_color_features{};
if (pgpu->optional_features_support.custom_border_color)
{
custom_border_color_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
custom_border_color_features.customBorderColors = VK_TRUE;
custom_border_color_features.customBorderColorWithoutFormat = VK_TRUE;
custom_border_color_features.pNext = const_cast<void*>(device.pNext);
device.pNext = &custom_border_color_features;
}
VkPhysicalDeviceSynchronization2FeaturesKHR synchronization2_info{};
if (pgpu->optional_features_support.synchronization_2)
{
synchronization2_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES;
synchronization2_info.pNext = const_cast<void*>(device.pNext);
synchronization2_info.synchronization2 = VK_TRUE;
device.pNext = &synchronization2_info;
}
CHECK_RESULT_EX(vkCreateDevice(*pgpu, &device, nullptr, &dev), message_on_error);
// Dump some diagnostics to the log
rsx_log.notice("%u extensions loaded:", ::size32(requested_extensions));
for (const auto& ext : requested_extensions)
{
rsx_log.notice("** Using %s", ext);
}
// Initialize queues
vkGetDeviceQueue(dev, graphics_queue_idx, 0, &m_graphics_queue);
vkGetDeviceQueue(dev, transfer_queue_idx, transfer_queue_sub_index, &m_transfer_queue);
if (present_queue_idx != umax)
{
vkGetDeviceQueue(dev, present_queue_idx, 0, &m_present_queue);
}
// Import optional function endpoints
if (pgpu->optional_features_support.conditional_rendering)
{
_vkCmdBeginConditionalRenderingEXT = reinterpret_cast<PFN_vkCmdBeginConditionalRenderingEXT>(vkGetDeviceProcAddr(dev, "vkCmdBeginConditionalRenderingEXT"));
_vkCmdEndConditionalRenderingEXT = reinterpret_cast<PFN_vkCmdEndConditionalRenderingEXT>(vkGetDeviceProcAddr(dev, "vkCmdEndConditionalRenderingEXT"));
}
if (pgpu->optional_features_support.debug_utils)
{
_vkSetDebugUtilsObjectNameEXT = reinterpret_cast<PFN_vkSetDebugUtilsObjectNameEXT>(vkGetDeviceProcAddr(dev, "vkSetDebugUtilsObjectNameEXT"));
_vkQueueInsertDebugUtilsLabelEXT = reinterpret_cast<PFN_vkQueueInsertDebugUtilsLabelEXT>(vkGetDeviceProcAddr(dev, "vkQueueInsertDebugUtilsLabelEXT"));
_vkCmdInsertDebugUtilsLabelEXT = reinterpret_cast<PFN_vkCmdInsertDebugUtilsLabelEXT>(vkGetDeviceProcAddr(dev, "vkCmdInsertDebugUtilsLabelEXT"));
}
if (pgpu->optional_features_support.synchronization_2)
{
_vkCmdSetEvent2KHR = reinterpret_cast<PFN_vkCmdSetEvent2KHR>(vkGetDeviceProcAddr(dev, "vkCmdSetEvent2KHR"));
_vkCmdWaitEvents2KHR = reinterpret_cast<PFN_vkCmdWaitEvents2KHR>(vkGetDeviceProcAddr(dev, "vkCmdWaitEvents2KHR"));
_vkCmdPipelineBarrier2KHR = reinterpret_cast<PFN_vkCmdPipelineBarrier2KHR>(vkGetDeviceProcAddr(dev, "vkCmdPipelineBarrier2KHR"));
}
memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);
if (pgpu->optional_features_support.external_memory_host)
{
memory_map._vkGetMemoryHostPointerPropertiesEXT = reinterpret_cast<PFN_vkGetMemoryHostPointerPropertiesEXT>(vkGetDeviceProcAddr(dev, "vkGetMemoryHostPointerPropertiesEXT"));
}
if (g_cfg.video.disable_vulkan_mem_allocator)
{
m_allocator = std::make_unique<vk::mem_allocator_vk>(*this, pdev);
}
else
{
m_allocator = std::make_unique<vk::mem_allocator_vma>(*this, pdev);
}
// Useful for debugging different VRAM configurations
const u64 vram_allocation_limit = g_cfg.video.vk.vram_allocation_limit * 0x100000ull;
memory_map.device_local_total_bytes = std::min(memory_map.device_local_total_bytes, vram_allocation_limit);
}
void render_device::destroy()
{
if (g_render_device == this)
{
g_render_device = nullptr;
}
if (dev && pgpu)
{
if (m_allocator)
{
m_allocator->destroy();
m_allocator.reset();
}
vkDestroyDevice(dev, nullptr);
dev = nullptr;
memory_map = {};
m_formats_support = {};
}
}
const VkFormatProperties render_device::get_format_properties(VkFormat format) const
{
auto found = pgpu->format_properties.find(format);
if (found != pgpu->format_properties.end())
{
return found->second;
}
auto& props = pgpu->format_properties[format];
vkGetPhysicalDeviceFormatProperties(*pgpu, format, &props);
return props;
}
bool render_device::get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32* type_index) const
{
VkPhysicalDeviceMemoryProperties mem_infos = pgpu->get_memory_properties();
for (u32 i = 0; i < 32; i++)
{
if ((typeBits & 1) == 1)
{
if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask)
{
if (type_index)
{
*type_index = i;
}
return true;
}
}
typeBits >>= 1;
}
return false;
}
void render_device::rebalance_memory_type_usage()
{
// Rebalance device local memory types
memory_map.device_local.rebalance();
}
// Shared Util
memory_type_mapping get_memory_mapping(const vk::physical_device& dev)
{
VkPhysicalDevice pdev = dev;
VkPhysicalDeviceMemoryProperties memory_properties;
vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);
memory_type_mapping result;
result.device_local_total_bytes = 0;
result.host_visible_total_bytes = 0;
result.device_bar_total_bytes = 0;
// Sort the confusingly laid out heap-type map into something easier to scan.
// Not performance-critical, this method is called once at initialization.
struct memory_type
{
u32 type_index;
VkFlags flags;
VkDeviceSize size;
};
struct heap_type_map_entry
{
VkMemoryHeap heap;
std::vector<memory_type> types;
};
std::vector<heap_type_map_entry> memory_heap_map;
for (u32 i = 0; i < memory_properties.memoryHeapCount; ++i)
{
memory_heap_map.push_back(
{
.heap = memory_properties.memoryHeaps[i],
.types = {}
});
}
for (u32 i = 0; i < memory_properties.memoryTypeCount; i++)
{
auto& type_info = memory_properties.memoryTypes[i];
memory_heap_map[type_info.heapIndex].types.push_back({ i, type_info.propertyFlags, 0 });
}
auto find_memory_type_with_property = [&memory_heap_map](VkFlags desired_flags, VkFlags excluded_flags)
{
std::vector<memory_type> results;
for (auto& heap : memory_heap_map)
{
for (auto &type : heap.types)
{
if (((type.flags & desired_flags) == desired_flags) && !(type.flags & excluded_flags))
{
// Match, only once allowed per heap!
results.push_back({ type.type_index, type.flags, heap.heap.size });
break;
}
}
}
return results;
};
auto device_local_types = find_memory_type_with_property(
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
(VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD));
auto host_coherent_types = find_memory_type_with_property(
(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT),
0);
auto bar_memory_types = find_memory_type_with_property(
(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
0);
if (host_coherent_types.empty())
{
rsx_log.warning("[Performance Warning] Could not identify a cached upload heap. Will fall back to uncached transport.");
host_coherent_types = find_memory_type_with_property(
(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
0);
}
ensure(!device_local_types.empty());
ensure(!host_coherent_types.empty());
// BAR heap, currently parked for future use, I have some plans for it (kd-11)
for (auto& type : bar_memory_types)
{
result.device_bar.push(type.type_index, type.size);
result.device_bar_total_bytes += type.size;
}
// Generic VRAM access, requires some minor prioritization based on flags
// Most devices have a 'PURE' device local type, pin that as the first priority
// Internally, there will be some reshuffling based on memory load later, but this is rare
if (device_local_types.size() > 1)
{
std::sort(device_local_types.begin(), device_local_types.end(), [](const auto& a, const auto& b)
{
if (a.flags == b.flags)
{
return a.size > b.size;
}
return (a.flags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) || (b.flags != VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT && a.size > b.size);
});
}
for (auto& type : device_local_types)
{
result.device_local.push(type.type_index, type.size);
result.device_local_total_bytes += type.size;
}
// Sort upload heap entries based on size.
if (host_coherent_types.size() > 1)
{
std::sort(host_coherent_types.begin(), host_coherent_types.end(), FN(x.size > y.size));
}
for (auto& type : host_coherent_types)
{
result.host_visible_coherent.push(type.type_index, type.size);
result.host_visible_total_bytes += type.size;
}
rsx_log.notice("Detected %llu MB of device local memory", result.device_local_total_bytes / (0x100000));
rsx_log.notice("Detected %llu MB of host coherent memory", result.host_visible_total_bytes / (0x100000));
rsx_log.notice("Detected %llu MB of BAR memory", result.device_bar_total_bytes / (0x100000));
return result;
}
gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev)
{
const auto test_format_features = [&dev](VkFormat format, VkFlags required_features, VkBool32 linear_features) -> bool
{
VkFormatProperties props;
vkGetPhysicalDeviceFormatProperties(dev, format, &props);
const auto supported_features_mask = (linear_features) ? props.linearTilingFeatures : props.optimalTilingFeatures;
return (supported_features_mask & required_features) == required_features;
};
gpu_formats_support result = {};
const VkFlags required_zbuffer_features = (VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
const VkFlags required_colorbuffer_features = (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT);
// Check supported depth formats
result.d24_unorm_s8 = test_format_features(VK_FORMAT_D24_UNORM_S8_UINT, required_zbuffer_features, VK_FALSE);
result.d32_sfloat_s8 = test_format_features(VK_FORMAT_D32_SFLOAT_S8_UINT, required_zbuffer_features, VK_FALSE);
// Hide d24_s8 if force high precision z buffer is enabled
if (g_cfg.video.force_high_precision_z_buffer && result.d32_sfloat_s8)
{
result.d24_unorm_s8 = false;
}
// Checks if linear BGRA8 images can be used for present
result.bgra8_linear = test_format_features(VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_FEATURE_BLIT_SRC_BIT, VK_TRUE);
// Check if device supports RGBA8 format for rendering
if (!test_format_features(VK_FORMAT_R8G8B8A8_UNORM, required_colorbuffer_features, VK_FALSE))
{
// Non-fatal. Most games use BGRA layout due to legacy reasons as old GPUs typically supported BGRA and RGBA was emulated.
rsx_log.error("Your GPU and/or driver does not support RGBA8 format. This can cause problems in some rare games that use this memory layout.");
}
// Check if linear RGBA8 images can be used for present
result.argb8_linear = test_format_features(VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_FEATURE_BLIT_SRC_BIT, VK_TRUE);
return result;
}
pipeline_binding_table get_pipeline_binding_table(const vk::physical_device& dev)
{
pipeline_binding_table result{};
// Need to check how many samplers are supported by the driver
const auto usable_samplers = std::min(dev.get_limits().maxPerStageDescriptorSampledImages, 32u);
result.vertex_textures_first_bind_slot = result.textures_first_bind_slot + usable_samplers;
result.total_descriptor_bindings = result.vertex_textures_first_bind_slot + 4;
return result;
}
}