2023-06-24 14:59:27 +02:00
|
|
|
#include "device.hpp"
|
2023-07-24 05:03:55 +02:00
|
|
|
#include "amdgpu/shader/AccessOp.hpp"
|
|
|
|
|
#include "amdgpu/shader/Converter.hpp"
|
|
|
|
|
#include "scheduler.hpp"
|
2023-06-24 14:59:27 +02:00
|
|
|
#include "tiler.hpp"
|
|
|
|
|
|
|
|
|
|
#include "spirv-tools/optimizer.hpp"
|
2023-07-24 05:03:55 +02:00
|
|
|
#include "util/area.hpp"
|
2023-06-24 14:59:27 +02:00
|
|
|
#include "util/unreachable.hpp"
|
2023-07-17 14:35:53 +02:00
|
|
|
#include "vk.hpp"
|
2023-07-24 05:03:55 +02:00
|
|
|
#include <amdgpu/shader/UniformBindings.hpp>
|
2023-06-24 14:59:27 +02:00
|
|
|
#include <bit>
|
2023-07-24 05:03:55 +02:00
|
|
|
#include <compare>
|
2023-06-24 14:59:27 +02:00
|
|
|
#include <cstddef>
|
|
|
|
|
#include <cstdint>
|
|
|
|
|
#include <cstdio>
|
|
|
|
|
#include <cstring>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
#include <forward_list>
|
|
|
|
|
#include <map>
|
2023-07-24 05:03:55 +02:00
|
|
|
#include <mutex>
|
2023-06-24 14:59:27 +02:00
|
|
|
#include <optional>
|
|
|
|
|
#include <set>
|
|
|
|
|
#include <shaders/rect_list.geom.h>
|
|
|
|
|
#include <span>
|
2023-07-14 03:33:45 +02:00
|
|
|
#include <spirv_cross/spirv_glsl.hpp>
|
2023-06-24 14:59:27 +02:00
|
|
|
#include <sys/mman.h>
|
|
|
|
|
#include <unistd.h>
|
2023-07-14 03:33:45 +02:00
|
|
|
#include <util/VerifyVulkan.hpp>
|
2023-07-17 14:35:53 +02:00
|
|
|
#include <utility>
|
2023-06-24 14:59:27 +02:00
|
|
|
#include <vulkan/vulkan_core.h>
|
|
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
using namespace amdgpu;
|
|
|
|
|
using namespace amdgpu::device;
|
|
|
|
|
|
2023-06-24 14:59:27 +02:00
|
|
|
void *g_rwMemory;
|
|
|
|
|
std::size_t g_memorySize;
|
|
|
|
|
std::uint64_t g_memoryBase;
|
2023-07-17 14:35:53 +02:00
|
|
|
RemoteMemory g_hostMemory;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
namespace amdgpu::device::vk {
|
|
|
|
|
VkDevice g_vkDevice = VK_NULL_HANDLE;
|
|
|
|
|
VkAllocationCallbacks *g_vkAllocator = nullptr;
|
2023-06-25 21:58:15 +02:00
|
|
|
static VkPhysicalDeviceMemoryProperties g_physicalMemoryProperties;
|
|
|
|
|
static VkPhysicalDeviceProperties g_physicalDeviceProperties;
|
2023-07-17 14:35:53 +02:00
|
|
|
std::uint32_t findPhysicalMemoryTypeIndex(std::uint32_t typeBits,
|
|
|
|
|
VkMemoryPropertyFlags properties) {
|
|
|
|
|
typeBits &= (1 << g_physicalMemoryProperties.memoryTypeCount) - 1;
|
|
|
|
|
|
|
|
|
|
while (typeBits != 0) {
|
|
|
|
|
auto typeIndex = std::countr_zero(typeBits);
|
|
|
|
|
|
|
|
|
|
if ((g_physicalMemoryProperties.memoryTypes[typeIndex].propertyFlags &
|
|
|
|
|
properties) == properties) {
|
|
|
|
|
return typeIndex;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
typeBits &= ~(1 << typeIndex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
util::unreachable("Failed to find memory type with properties %x",
|
|
|
|
|
properties);
|
|
|
|
|
}
|
|
|
|
|
} // namespace amdgpu::device::vk
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
static Scheduler g_scheduler{4};
|
|
|
|
|
|
|
|
|
|
static VkResult _vkCreateShadersEXT(VkDevice device, uint32_t createInfoCount,
|
|
|
|
|
const VkShaderCreateInfoEXT *pCreateInfos,
|
|
|
|
|
const VkAllocationCallbacks *pAllocator,
|
|
|
|
|
VkShaderEXT *pShaders) {
|
|
|
|
|
static PFN_vkCreateShadersEXT fn;
|
|
|
|
|
|
|
|
|
|
if (fn == nullptr) {
|
|
|
|
|
fn = (PFN_vkCreateShadersEXT)vkGetDeviceProcAddr(vk::g_vkDevice,
|
|
|
|
|
"vkCreateShadersEXT");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return fn(device, createInfoCount, pCreateInfos, pAllocator, pShaders);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void _vkCmdBindShadersEXT(VkCommandBuffer commandBuffer,
|
|
|
|
|
uint32_t stageCount,
|
|
|
|
|
const VkShaderStageFlagBits *pStages,
|
|
|
|
|
const VkShaderEXT *pShaders) {
|
|
|
|
|
static PFN_vkCmdBindShadersEXT fn;
|
|
|
|
|
|
|
|
|
|
if (fn == nullptr) {
|
|
|
|
|
fn = (PFN_vkCmdBindShadersEXT)vkGetDeviceProcAddr(vk::g_vkDevice,
|
|
|
|
|
"vkCmdBindShadersEXT");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return fn(commandBuffer, stageCount, pStages, pShaders);
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static const bool kUseDirectMemory = false;
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
static util::MemoryAreaTable<util::StdSetInvalidationHandle> memoryAreaTable;
|
2023-06-25 21:58:15 +02:00
|
|
|
|
|
|
|
|
void device::setVkDevice(VkDevice device,
|
|
|
|
|
VkPhysicalDeviceMemoryProperties memProperties,
|
|
|
|
|
VkPhysicalDeviceProperties devProperties) {
|
2023-07-17 14:35:53 +02:00
|
|
|
vk::g_vkDevice = device;
|
|
|
|
|
vk::g_physicalMemoryProperties = memProperties;
|
|
|
|
|
vk::g_physicalDeviceProperties = devProperties;
|
2023-06-25 21:58:15 +02:00
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
static VkBlendFactor blendMultiplierToVkBlendFactor(BlendMultiplier mul) {
|
2023-06-24 14:59:27 +02:00
|
|
|
switch (mul) {
|
|
|
|
|
case kBlendMultiplierZero:
|
|
|
|
|
return VK_BLEND_FACTOR_ZERO;
|
|
|
|
|
case kBlendMultiplierOne:
|
|
|
|
|
return VK_BLEND_FACTOR_ONE;
|
|
|
|
|
case kBlendMultiplierSrcColor:
|
|
|
|
|
return VK_BLEND_FACTOR_SRC_COLOR;
|
|
|
|
|
case kBlendMultiplierOneMinusSrcColor:
|
|
|
|
|
return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR;
|
|
|
|
|
case kBlendMultiplierSrcAlpha:
|
|
|
|
|
return VK_BLEND_FACTOR_SRC_ALPHA;
|
|
|
|
|
case kBlendMultiplierOneMinusSrcAlpha:
|
|
|
|
|
return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
|
|
|
|
|
case kBlendMultiplierDestAlpha:
|
|
|
|
|
return VK_BLEND_FACTOR_DST_ALPHA;
|
|
|
|
|
case kBlendMultiplierOneMinusDestAlpha:
|
|
|
|
|
return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA;
|
|
|
|
|
case kBlendMultiplierDestColor:
|
|
|
|
|
return VK_BLEND_FACTOR_DST_COLOR;
|
|
|
|
|
case kBlendMultiplierOneMinusDestColor:
|
|
|
|
|
return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
|
|
|
|
|
case kBlendMultiplierSrcAlphaSaturate:
|
|
|
|
|
return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE;
|
|
|
|
|
case kBlendMultiplierConstantColor:
|
|
|
|
|
return VK_BLEND_FACTOR_CONSTANT_COLOR;
|
|
|
|
|
case kBlendMultiplierOneMinusConstantColor:
|
|
|
|
|
return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR;
|
|
|
|
|
case kBlendMultiplierSrc1Color:
|
|
|
|
|
return VK_BLEND_FACTOR_SRC1_COLOR;
|
|
|
|
|
case kBlendMultiplierInverseSrc1Color:
|
|
|
|
|
return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR;
|
|
|
|
|
case kBlendMultiplierSrc1Alpha:
|
|
|
|
|
return VK_BLEND_FACTOR_SRC1_ALPHA;
|
|
|
|
|
case kBlendMultiplierInverseSrc1Alpha:
|
|
|
|
|
return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
|
|
|
|
|
case kBlendMultiplierConstantAlpha:
|
|
|
|
|
return VK_BLEND_FACTOR_CONSTANT_ALPHA;
|
|
|
|
|
case kBlendMultiplierOneMinusConstantAlpha:
|
|
|
|
|
return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
util::unreachable();
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
static VkBlendOp blendFuncToVkBlendOp(BlendFunc func) {
|
2023-06-24 14:59:27 +02:00
|
|
|
switch (func) {
|
|
|
|
|
case kBlendFuncAdd:
|
|
|
|
|
return VK_BLEND_OP_ADD;
|
|
|
|
|
case kBlendFuncSubtract:
|
|
|
|
|
return VK_BLEND_OP_SUBTRACT;
|
|
|
|
|
case kBlendFuncMin:
|
|
|
|
|
return VK_BLEND_OP_MIN;
|
|
|
|
|
case kBlendFuncMax:
|
|
|
|
|
return VK_BLEND_OP_MAX;
|
|
|
|
|
case kBlendFuncReverseSubtract:
|
|
|
|
|
return VK_BLEND_OP_REVERSE_SUBTRACT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
util::unreachable();
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-25 15:54:49 +02:00
|
|
|
#define GNM_GET_FIELD(src, registername, field) \
|
2023-06-24 14:59:27 +02:00
|
|
|
(((src) & (GNM_##registername##__##field##__MASK)) >> \
|
|
|
|
|
(GNM_##registername##__##field##__SHIFT))
|
|
|
|
|
|
|
|
|
|
#define mmSQ_BUF_RSRC_WORD0 0x23C0
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD0__BASE_ADDRESS__MASK 0xffffffffL // size:32
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD0__BASE_ADDRESS__SHIFT 0
|
|
|
|
|
|
|
|
|
|
#define mmSQ_BUF_RSRC_WORD1 0x23C1
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD1__BASE_ADDRESS_HI__MASK 0x00000fffL // size:12
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD1__STRIDE__MASK 0x3fff0000L // size:14
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD1__SWIZZLE_ENABLE__MASK 0x80000000L // size: 1
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD1__BASE_ADDRESS_HI__SHIFT 0
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD1__STRIDE__SHIFT 16
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD1__SWIZZLE_ENABLE__SHIFT 31
|
|
|
|
|
|
|
|
|
|
#define mmSQ_BUF_RSRC_WORD2 0x23C2
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD2__NUM_RECORDS__MASK 0xffffffffL // size:32
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD2__NUM_RECORDS__SHIFT 0
|
|
|
|
|
|
|
|
|
|
#define mmSQ_BUF_RSRC_WORD3 0x23C3
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_X__MASK 0x00000007L // size: 3
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Y__MASK 0x00000038L // size: 3
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Z__MASK 0x000001c0L // size: 3
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_W__MASK 0x00000e00L // size: 3
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__ELEMENT_SIZE__MASK 0x00180000L // size: 2
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__INDEX_STRIDE__MASK 0x00600000L // size: 2
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__TYPE__MASK 0xc0000000L // size: 2
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_X__SHIFT 0
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Y__SHIFT 3
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Z__SHIFT 6
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_W__SHIFT 9
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__ELEMENT_SIZE__SHIFT 19
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__INDEX_STRIDE__SHIFT 21
|
|
|
|
|
#define GNM_SQ_BUF_RSRC_WORD3__TYPE__SHIFT 30
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_PITCH 0xA319
|
|
|
|
|
#define GNM_CB_COLOR0_PITCH__TILE_MAX__MASK 0x000007ffL // size:11
|
|
|
|
|
#define GNM_CB_COLOR0_PITCH__FMASK_TILE_MAX__MASK 0x7ff00000L // size:11
|
|
|
|
|
#define GNM_CB_COLOR0_PITCH__TILE_MAX__SHIFT 0
|
|
|
|
|
#define GNM_CB_COLOR0_PITCH__FMASK_TILE_MAX__SHIFT 20
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_SLICE 0xA31A
|
|
|
|
|
#define GNM_CB_COLOR0_SLICE__TILE_MAX__MASK 0x003fffffL // size:22
|
|
|
|
|
#define GNM_CB_COLOR0_SLICE__TILE_MAX__SHIFT 0
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_VIEW 0xA31B
|
|
|
|
|
#define GNM_CB_COLOR0_VIEW__SLICE_START__MASK 0x000007ffL // size:11
|
|
|
|
|
#define GNM_CB_COLOR0_VIEW__SLICE_MAX__MASK 0x00ffe000L // size:11
|
|
|
|
|
#define GNM_CB_COLOR0_VIEW__SLICE_START__SHIFT 0
|
|
|
|
|
#define GNM_CB_COLOR0_VIEW__SLICE_MAX__SHIFT 13
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_INFO 0xA31C
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__FAST_CLEAR__MASK 0x00002000L // size: 1
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__COMPRESSION__MASK 0x00004000L // size: 1
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__CMASK_IS_LINEAR__MASK 0x00080000L // size: 1
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__FMASK_COMPRESSION_MODE__MASK 0x0C000000L // size: 2
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__DCC_ENABLE__MASK 0x10000000L // size: 1
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__CMASK_ADDR_TYPE__MASK 0x60000000L // size: 2
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__ALT_TILE_MODE__MASK 0x80000000L // size: 1
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__FAST_CLEAR__SHIFT 13
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__COMPRESSION__SHIFT 14
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__CMASK_IS_LINEAR__SHIFT 19
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__FMASK_COMPRESSION_MODE__SHIFT 26
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__DCC_ENABLE__SHIFT 28
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__CMASK_ADDR_TYPE__SHIFT 29
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__ALT_TILE_MODE__SHIFT 31
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__FORMAT__MASK 0x3f << 2
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__FORMAT__SHIFT 2
|
|
|
|
|
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__ARRAY_MODE__MASK 0x0f << 8
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__ARRAY_MODE__SHIFT 8
|
|
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
ARRAY_LINEAR_GENERAL = 0x00, // Unaligned linear array
|
|
|
|
|
ARRAY_LINEAR_ALIGNED = 0x01, // Aligned linear array
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__NUMBER_TYPE__MASK 0x07 << 12
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__NUMBER_TYPE__SHIFT 12
|
|
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
NUMBER_UNORM = 0x00, // unsigned repeating fraction (urf): range [0..1], scale
|
|
|
|
|
// factor (2^n)-1
|
|
|
|
|
NUMBER_SNORM = 0x01, // Microsoft-style signed rf: range [-1..1], scale factor
|
|
|
|
|
// (2^(n-1))-1
|
|
|
|
|
NUMBER_USCALED = 0x02, // unsigned integer, converted to float in shader:
|
|
|
|
|
// range [0..(2^n)-1]
|
|
|
|
|
NUMBER_SSCALED = 0x03, // signed integer, converted to float in shader: range
|
|
|
|
|
// [-2^(n-1)..2^(n-1)-1]
|
|
|
|
|
NUMBER_UINT = 0x04, // zero-extended bit field, int in shader: not blendable
|
|
|
|
|
// or filterable
|
|
|
|
|
NUMBER_SINT = 0x05, // sign-extended bit field, int in shader: not blendable
|
|
|
|
|
// or filterable
|
|
|
|
|
NUMBER_SRGB = 0x06, // gamma corrected, range [0..1] (only suported for 8-bit
|
|
|
|
|
// components (always rounds color channels)
|
|
|
|
|
NUMBER_FLOAT =
|
|
|
|
|
0x07, // floating point, depends on component size: 32-bit: IEEE float,
|
|
|
|
|
// SE8M23, bias 127, range (- 2^129..2^129) 24-bit: Depth float,
|
|
|
|
|
// E4M20, bias 15, range [0..1] 16-bit: Short float SE5M10, bias 15,
|
|
|
|
|
// range (-2^17..2^17) 11-bit: Packed float, E5M6 bias 15, range
|
|
|
|
|
// [0..2^17) 10-bit: Packed float, E5M5 bias 15, range [0..2^17) all
|
|
|
|
|
// other component sizes are treated as UINT
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__READ_SIZE__MASK 1 << 15
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__READ_SIZE__SHIFT 15
|
|
|
|
|
|
|
|
|
|
// Specifies how to map the red, green, blue, and alpha components from the
|
|
|
|
|
// shader to the components in the frame buffer pixel format. There are four
|
|
|
|
|
// choices for each number of components. With one component, the four modes
|
|
|
|
|
// select any one component. With 2-4 components, SWAP_STD selects the low order
|
|
|
|
|
// shader components in little-endian order; SWAP_ALT selects an alternate order
|
|
|
|
|
// (for 4 compoents) or inclusion of alpha (for 2 or 3 components); and the
|
|
|
|
|
// other two reverse the component orders for use on big-endian machines. The
|
|
|
|
|
// following table specifies the exact component mappings:
|
|
|
|
|
//
|
|
|
|
|
// 1 comp std alt std_rev alt_rev
|
|
|
|
|
// ----------- ------- ------- ------- -------
|
|
|
|
|
// comp 0: red green blue alpha
|
|
|
|
|
//
|
|
|
|
|
// 3 comps std alt std_rev alt_rev
|
|
|
|
|
// ----------- ------- ------- ------- -------
|
|
|
|
|
// comp 0: red red green alpha
|
|
|
|
|
// comp 1: green alpha red red
|
|
|
|
|
//
|
|
|
|
|
// 3 comps std alt std_rev alt_rev
|
|
|
|
|
// ----------- ------- ------- ------- -------
|
|
|
|
|
// comp 0: red red blue alpha
|
|
|
|
|
// comp 1: green green green green
|
|
|
|
|
// comp 2: blue alpha red red
|
|
|
|
|
//
|
|
|
|
|
// 4 comps std alt std_rev alt_rev
|
|
|
|
|
// ----------- ------- ------- ------- -------
|
|
|
|
|
// comp 0: red blue alpha alpha
|
|
|
|
|
// comp 1: green green blue red
|
|
|
|
|
// comp 2: blue red green green
|
|
|
|
|
// comp 3: alpha alpha red blue
|
|
|
|
|
//
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__COMP_SWAP__MASK 0x03 << 16
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__COMP_SWAP__SHIFT 16
|
|
|
|
|
enum {
|
|
|
|
|
SWAP_STD = 0x00, // standard little-endian comp order
|
|
|
|
|
SWAP_ALT = 0x01, // alternate components or order
|
|
|
|
|
SWAP_STD_REV = 0x02, // reverses SWAP_STD order
|
|
|
|
|
SWAP_ALT_REV = 0x03, // reverses SWAP_ALT order
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Specifies whether to clamp source data to the render target range prior to
|
|
|
|
|
// blending, in addition to the post- blend clamp. This bit must be zero for
|
|
|
|
|
// uscaled, sscaled and float number types and when blend_bypass is set.
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__BLEND_CLAMP__MASK 1 << 20
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__BLEND_CLAMP__SHIFT 20
|
|
|
|
|
|
|
|
|
|
// If false, use RGB=0.0 and A=1.0 (0x3f800000) to expand fast-cleared tiles. If
|
|
|
|
|
// true, use the CB_CLEAR register values to expand fast-cleared tiles.
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__CLEAR_COLOR__MASK 1 << 21
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__CLEAR_COLOR__SHIFT 21
|
|
|
|
|
|
|
|
|
|
// If false, blending occurs normaly as specified in CB_BLEND#_CONTROL. If true,
|
|
|
|
|
// blending (but not fog) is disabled. This must be set for the 24_8 and 8_24
|
|
|
|
|
// formats and when the number type is uint or sint. It should also be set for
|
|
|
|
|
// number types that are required to ignore the blend state in a specific
|
|
|
|
|
// aplication interface.
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__BLEND_BYPASS__MASK 1 << 22
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__BLEND_BYPASS__SHIFT 22
|
|
|
|
|
|
|
|
|
|
// If true, use 32-bit float precision for source colors, else truncate to
|
|
|
|
|
// 12-bit mantissa precision. This applies even if blending is disabled so that
|
|
|
|
|
// a null blend and blend disable produce the same result. This field is ignored
|
|
|
|
|
// for NUMBER_UINT and NUMBER_SINT. It must be one for floating point components
|
|
|
|
|
// larger than 16-bits or non- floating components larger than 12-bits,
|
|
|
|
|
// otherwise it must be 0.
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__BLEND_FLOAT32__MASK 1 << 23
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__BLEND_FLOAT32__SHIFT 23
|
|
|
|
|
|
|
|
|
|
// If false, floating point processing follows full IEEE rules for INF, NaN, and
|
|
|
|
|
// -0. If true, 0*anything produces 0 and no operation produces -0.
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__SIMPLE_FLOAT__MASK 1 << 24
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__SIMPLE_FLOAT__SHIFT 24
|
|
|
|
|
|
|
|
|
|
// This field selects between truncating (standard for floats) and rounding
|
|
|
|
|
// (standard for most other cases) to convert blender results to frame buffer
|
|
|
|
|
// components. The ROUND_BY_HALF setting can be over-riden by the DITHER_ENABLE
|
|
|
|
|
// field in CB_COLOR_CONTROL.
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__ROUND_MODE__MASK 1 << 25
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__ROUND_MODE__SHIFT 25
|
|
|
|
|
|
|
|
|
|
// This field indicates the allowed format for color data being exported from
|
|
|
|
|
// the pixel shader into the output merge block. This field may only be set to
|
|
|
|
|
// EXPORT_NORM if BLEND_CLAMP is enabled, BLEND_FLOAT32 is disabled, and the
|
|
|
|
|
// render target has only 11-bit or smaller UNORM or SNORM components. Selecting
|
|
|
|
|
// EXPORT_NORM flushes to zero values with exponent less than 0x70 (values less
|
|
|
|
|
// than 2^-15).
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__SOURCE_FORMAT__MASK 1 << 27
|
|
|
|
|
#define GNM_CB_COLOR0_INFO__SOURCE_FORMAT__SHIFT 27
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_ATTRIB 0xA31D
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__TILE_MODE_INDEX__MASK 0x0000001fL // size: 5
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__FMASK_TILE_MODE_INDEX__MASK 0x000003e0L // size: 5
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__NUM_SAMPLES__MASK 0x00007000L // size: 3
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__NUM_FRAGMENTS__MASK 0x00018000L // size: 2
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__FORCE_DST_ALPHA_1__MASK 0x00020000L // size: 1
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__TILE_MODE_INDEX__SHIFT 0
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 5
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__NUM_SAMPLES__SHIFT 12
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__NUM_FRAGMENTS__SHIFT 15
|
|
|
|
|
#define GNM_CB_COLOR0_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 17
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_DCC_CONTROL 0xA31E
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE__MASK \
|
|
|
|
|
0x00000001L // size: 1
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE__MASK \
|
|
|
|
|
0x0000000cL // size: 2
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE__MASK \
|
|
|
|
|
0x00000010L // size: 1
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE__MASK \
|
|
|
|
|
0x00000060L // size: 2
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__COLOR_TRANSFORM__MASK 0x00000180L // size: 2
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__INDEPENDENT_64B_BLOCKS__MASK \
|
|
|
|
|
0x00000200L // size: 1
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE__SHIFT 0
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE__SHIFT 2
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE__SHIFT 4
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE__SHIFT 5
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__COLOR_TRANSFORM__SHIFT 7
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_CONTROL__INDEPENDENT_64B_BLOCKS__SHIFT 9
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_CMASK 0xA31F
|
|
|
|
|
#define GNM_CB_COLOR0_CMASK__BASE_256B__MASK 0xffffffffL // size:32
|
|
|
|
|
#define GNM_CB_COLOR0_CMASK__BASE_256B__SHIFT 0
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_CMASK_SLICE 0xA320
|
|
|
|
|
#define GNM_CB_COLOR0_CMASK_SLICE__TILE_MAX__MASK 0x00003fffL // size:14
|
|
|
|
|
#define GNM_CB_COLOR0_CMASK_SLICE__TILE_MAX__SHIFT 0
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_FMASK 0xA321
|
|
|
|
|
#define GNM_CB_COLOR0_FMASK__BASE_256B__MASK 0xffffffffL // size:32
|
|
|
|
|
#define GNM_CB_COLOR0_FMASK__BASE_256B__SHIFT 0
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_FMASK_SLICE 0xA322
|
|
|
|
|
#define GNM_CB_COLOR0_FMASK_SLICE__TILE_MAX__MASK 0x003fffffL // size:22
|
|
|
|
|
#define GNM_CB_COLOR0_FMASK_SLICE__TILE_MAX__SHIFT 0
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_CLEAR_WORD0 0xA323
|
|
|
|
|
#define GNM_CB_COLOR0_CLEAR_WORD0__CLEAR_WORD0__MASK 0xffffffffL // size:32
|
|
|
|
|
#define GNM_CB_COLOR0_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_CLEAR_WORD1 0xA324
|
|
|
|
|
#define GNM_CB_COLOR0_CLEAR_WORD1__CLEAR_WORD1__MASK 0xffffffffL // size:32
|
|
|
|
|
#define GNM_CB_COLOR0_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0
|
|
|
|
|
|
|
|
|
|
#define mmCB_COLOR0_DCC_BASE 0xA325
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_BASE__BASE_256B__MASK 0xffffffffL // size:32
|
|
|
|
|
#define GNM_CB_COLOR0_DCC_BASE__BASE_256B__SHIFT 0
|
|
|
|
|
|
|
|
|
|
static constexpr auto CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK = genMask(0, 5);
|
|
|
|
|
static constexpr auto CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK =
|
|
|
|
|
genMask(getMaskEnd(CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK), 3);
|
|
|
|
|
static constexpr auto CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK =
|
|
|
|
|
genMask(getMaskEnd(CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK), 5);
|
|
|
|
|
static constexpr auto CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK =
|
|
|
|
|
genMask(getMaskEnd(CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK), 1);
|
|
|
|
|
static constexpr auto CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK =
|
|
|
|
|
genMask(getMaskEnd(CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK) + 2, 5);
|
|
|
|
|
static constexpr auto CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK =
|
|
|
|
|
genMask(getMaskEnd(CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK), 3);
|
|
|
|
|
static constexpr auto CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK =
|
|
|
|
|
genMask(getMaskEnd(CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK), 5);
|
|
|
|
|
static constexpr auto CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK =
|
|
|
|
|
genMask(getMaskEnd(CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK), 1);
|
|
|
|
|
static constexpr auto CB_BLEND0_CONTROL_BLEND_ENABLE_MASK =
|
|
|
|
|
genMask(getMaskEnd(CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK), 1);
|
|
|
|
|
|
|
|
|
|
struct ColorBuffer {
|
|
|
|
|
std::uint64_t base;
|
|
|
|
|
std::uint8_t format;
|
|
|
|
|
std::uint8_t tileModeIndex;
|
|
|
|
|
|
|
|
|
|
void setRegister(unsigned index, std::uint32_t value) {
|
|
|
|
|
switch (index) {
|
|
|
|
|
case CB_COLOR0_BASE - CB_COLOR0_BASE:
|
|
|
|
|
base = static_cast<std::uint64_t>(value) << 8;
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * base = %lx\n", base);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case CB_COLOR0_PITCH - CB_COLOR0_BASE: {
|
|
|
|
|
auto pitchTileMax = GNM_GET_FIELD(value, CB_COLOR0_PITCH, TILE_MAX);
|
|
|
|
|
auto pitchFmaskTileMax =
|
|
|
|
|
GNM_GET_FIELD(value, CB_COLOR0_PITCH, FMASK_TILE_MAX);
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * TILE_MAX = %lx\n", pitchTileMax);
|
|
|
|
|
// std::printf(" * FMASK_TILE_MAX = %lx\n", pitchFmaskTileMax);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case CB_COLOR0_SLICE - CB_COLOR0_BASE: { // SLICE
|
|
|
|
|
auto sliceTileMax = GNM_GET_FIELD(value, CB_COLOR0_SLICE, TILE_MAX);
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * TILE_MAX = %lx\n", sliceTileMax);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case CB_COLOR0_VIEW - CB_COLOR0_BASE: { // VIEW
|
2023-06-25 15:54:49 +02:00
|
|
|
auto viewSliceStart = GNM_GET_FIELD(value, CB_COLOR0_VIEW, SLICE_START);
|
2023-06-24 14:59:27 +02:00
|
|
|
auto viewSliceMax = GNM_GET_FIELD(value, CB_COLOR0_VIEW, SLICE_MAX);
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * SLICE_START = %lx\n", viewSliceStart);
|
|
|
|
|
// std::printf(" * SLICE_MAX = %lx\n", viewSliceMax);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case CB_COLOR0_INFO - CB_COLOR0_BASE: { // INFO
|
|
|
|
|
auto fastClear = GNM_GET_FIELD(value, CB_COLOR0_INFO, FAST_CLEAR);
|
|
|
|
|
auto compression = GNM_GET_FIELD(value, CB_COLOR0_INFO, COMPRESSION);
|
|
|
|
|
auto cmaskIsLinear =
|
|
|
|
|
GNM_GET_FIELD(value, CB_COLOR0_INFO, CMASK_IS_LINEAR);
|
|
|
|
|
auto fmaskCompressionMode =
|
|
|
|
|
GNM_GET_FIELD(value, CB_COLOR0_INFO, FMASK_COMPRESSION_MODE);
|
|
|
|
|
auto dccEnable = GNM_GET_FIELD(value, CB_COLOR0_INFO, DCC_ENABLE);
|
|
|
|
|
auto cmaskAddrType =
|
|
|
|
|
GNM_GET_FIELD(value, CB_COLOR0_INFO, CMASK_ADDR_TYPE);
|
2023-06-25 15:54:49 +02:00
|
|
|
auto altTileMode = GNM_GET_FIELD(value, CB_COLOR0_INFO, ALT_TILE_MODE);
|
2023-06-24 14:59:27 +02:00
|
|
|
format = GNM_GET_FIELD(value, CB_COLOR0_INFO, FORMAT);
|
|
|
|
|
auto arrayMode = GNM_GET_FIELD(value, CB_COLOR0_INFO, ARRAY_MODE);
|
|
|
|
|
auto numberType = GNM_GET_FIELD(value, CB_COLOR0_INFO, NUMBER_TYPE);
|
|
|
|
|
auto readSize = GNM_GET_FIELD(value, CB_COLOR0_INFO, READ_SIZE);
|
|
|
|
|
auto compSwap = GNM_GET_FIELD(value, CB_COLOR0_INFO, COMP_SWAP);
|
|
|
|
|
auto blendClamp = GNM_GET_FIELD(value, CB_COLOR0_INFO, BLEND_CLAMP);
|
|
|
|
|
auto clearColor = GNM_GET_FIELD(value, CB_COLOR0_INFO, CLEAR_COLOR);
|
|
|
|
|
auto blendBypass = GNM_GET_FIELD(value, CB_COLOR0_INFO, BLEND_BYPASS);
|
2023-06-25 15:54:49 +02:00
|
|
|
auto blendFloat32 = GNM_GET_FIELD(value, CB_COLOR0_INFO, BLEND_FLOAT32);
|
2023-06-24 14:59:27 +02:00
|
|
|
auto simpleFloat = GNM_GET_FIELD(value, CB_COLOR0_INFO, SIMPLE_FLOAT);
|
|
|
|
|
auto roundMode = GNM_GET_FIELD(value, CB_COLOR0_INFO, ROUND_MODE);
|
2023-06-25 15:54:49 +02:00
|
|
|
auto sourceFormat = GNM_GET_FIELD(value, CB_COLOR0_INFO, SOURCE_FORMAT);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * FAST_CLEAR = %lu\n", fastClear);
|
|
|
|
|
// std::printf(" * COMPRESSION = %lu\n", compression);
|
|
|
|
|
// std::printf(" * CMASK_IS_LINEAR = %lu\n", cmaskIsLinear);
|
|
|
|
|
// std::printf(" * FMASK_COMPRESSION_MODE = %lu\n",
|
|
|
|
|
// fmaskCompressionMode); std::printf(" * DCC_ENABLE = %lu\n",
|
|
|
|
|
// dccEnable); std::printf(" * CMASK_ADDR_TYPE = %lu\n", cmaskAddrType);
|
|
|
|
|
// std::printf(" * ALT_TILE_MODE = %lu\n", altTileMode);
|
|
|
|
|
// std::printf(" * FORMAT = %x\n", format);
|
|
|
|
|
// std::printf(" * ARRAY_MODE = %u\n", arrayMode);
|
|
|
|
|
// std::printf(" * NUMBER_TYPE = %u\n", numberType);
|
|
|
|
|
// std::printf(" * READ_SIZE = %u\n", readSize);
|
|
|
|
|
// std::printf(" * COMP_SWAP = %u\n", compSwap);
|
|
|
|
|
// std::printf(" * BLEND_CLAMP = %u\n", blendClamp);
|
|
|
|
|
// std::printf(" * CLEAR_COLOR = %u\n", clearColor);
|
|
|
|
|
// std::printf(" * BLEND_BYPASS = %u\n", blendBypass);
|
|
|
|
|
// std::printf(" * BLEND_FLOAT32 = %u\n", blendFloat32);
|
|
|
|
|
// std::printf(" * SIMPLE_FLOAT = %u\n", simpleFloat);
|
|
|
|
|
// std::printf(" * ROUND_MODE = %u\n", roundMode);
|
|
|
|
|
// std::printf(" * SOURCE_FORMAT = %u\n", sourceFormat);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case CB_COLOR0_ATTRIB - CB_COLOR0_BASE: { // ATTRIB
|
2023-06-25 15:54:49 +02:00
|
|
|
tileModeIndex = GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, TILE_MODE_INDEX);
|
2023-07-17 14:35:53 +02:00
|
|
|
auto fmaskTileModeIndex =
|
2023-06-24 14:59:27 +02:00
|
|
|
GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, FMASK_TILE_MODE_INDEX);
|
2023-07-17 14:35:53 +02:00
|
|
|
auto numSamples = GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, NUM_SAMPLES);
|
|
|
|
|
auto numFragments = GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, NUM_FRAGMENTS);
|
|
|
|
|
auto forceDstAlpha1 =
|
2023-06-24 14:59:27 +02:00
|
|
|
GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, FORCE_DST_ALPHA_1);
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * TILE_MODE_INDEX = %u\n", tileModeIndex);
|
|
|
|
|
// std::printf(" * FMASK_TILE_MODE_INDEX = %lu\n", fmaskTileModeIndex);
|
|
|
|
|
// std::printf(" * NUM_SAMPLES = %lu\n", numSamples);
|
|
|
|
|
// std::printf(" * NUM_FRAGMENTS = %lu\n", numFragments);
|
|
|
|
|
// std::printf(" * FORCE_DST_ALPHA_1 = %lu\n", forceDstAlpha1);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case CB_COLOR0_CMASK - CB_COLOR0_BASE: { // CMASK
|
2023-06-25 15:54:49 +02:00
|
|
|
auto cmaskBase = GNM_GET_FIELD(value, CB_COLOR0_CMASK, BASE_256B) << 8;
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * cmaskBase = %lx\n", cmaskBase);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case CB_COLOR0_CMASK_SLICE - CB_COLOR0_BASE: { // CMASK_SLICE
|
|
|
|
|
auto cmaskSliceTileMax =
|
|
|
|
|
GNM_GET_FIELD(value, CB_COLOR0_CMASK_SLICE, TILE_MAX);
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * cmaskSliceTileMax = %lx\n", cmaskSliceTileMax);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case CB_COLOR0_FMASK - CB_COLOR0_BASE: { // FMASK
|
2023-06-25 15:54:49 +02:00
|
|
|
auto fmaskBase = GNM_GET_FIELD(value, CB_COLOR0_FMASK, BASE_256B) << 8;
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * fmaskBase = %lx\n", fmaskBase);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case CB_COLOR0_FMASK_SLICE - CB_COLOR0_BASE: { // FMASK_SLICE
|
|
|
|
|
auto fmaskSliceTileMax =
|
|
|
|
|
GNM_GET_FIELD(value, CB_COLOR0_FMASK_SLICE, TILE_MAX);
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * fmaskSliceTileMax = %lx\n", fmaskSliceTileMax);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case CB_COLOR0_CLEAR_WORD0 - CB_COLOR0_BASE: // CLEAR_WORD0
|
|
|
|
|
break;
|
|
|
|
|
case CB_COLOR1_CLEAR_WORD0 - CB_COLOR0_BASE: // CLEAR_WORD1
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static constexpr std::size_t colorBuffersCount = 6;
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
enum class CbRasterOp {
|
|
|
|
|
Blackness = 0x00,
|
|
|
|
|
Nor = 0x05, // ~(src | dst)
|
|
|
|
|
AndInverted = 0x0a, // ~src & dst
|
|
|
|
|
CopyInverted = 0x0f, // ~src
|
|
|
|
|
NotSrcErase = 0x11, // ~src & ~dst
|
|
|
|
|
SrcErase = 0x44, // src & ~dst
|
|
|
|
|
DstInvert = 0x55, // ~dst
|
|
|
|
|
Xor = 0x5a, // src ^ dst
|
|
|
|
|
Nand = 0x5f, // ~(src & dst)
|
|
|
|
|
And = 0x88, // src & dst
|
|
|
|
|
Equiv = 0x99, // ~(src ^ dst)
|
|
|
|
|
Noop = 0xaa, // dst
|
|
|
|
|
OrInverted = 0xaf, // ~src | dst
|
|
|
|
|
Copy = 0xcc, // src
|
|
|
|
|
OrReverse = 0xdd, // src | ~dst
|
|
|
|
|
Or = 0xEE, // src | dst
|
|
|
|
|
Whiteness = 0xff,
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
|
|
|
|
enum class CbColorFormat {
|
|
|
|
|
/*
|
|
|
|
|
00 - CB_DISABLE: Disables drawing to color
|
|
|
|
|
buffer. Causes DB to not send tiles/quads to CB. CB
|
|
|
|
|
itself ignores this field.
|
|
|
|
|
01 - CB_NORMAL: Normal rendering mode. DB
|
|
|
|
|
should send tiles and quads for pixel exports or just
|
|
|
|
|
quads for compute exports.
|
|
|
|
|
02 - CB_ELIMINATE_FAST_CLEAR: Fill fast
|
|
|
|
|
cleared color surface locations with clear color. DB
|
|
|
|
|
should send only tiles.
|
|
|
|
|
03 - CB_RESOLVE: Read from MRT0, average all
|
|
|
|
|
samples, and write to MRT1, which is one-sample. DB
|
|
|
|
|
should send only tiles.
|
|
|
|
|
04 - CB_DECOMPRESS: Decompress MRT0 to a
|
|
|
|
|
*/
|
|
|
|
|
Disable,
|
|
|
|
|
Normal,
|
|
|
|
|
EliminateFastClear,
|
|
|
|
|
Resolve,
|
|
|
|
|
};
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
struct QueueRegisters {
|
|
|
|
|
std::uint64_t pgmPsAddress = 0;
|
|
|
|
|
std::uint64_t pgmVsAddress = 0;
|
|
|
|
|
std::uint64_t pgmComputeAddress = 0;
|
|
|
|
|
std::uint32_t userVsData[16];
|
|
|
|
|
std::uint32_t userPsData[16];
|
|
|
|
|
std::uint32_t userComputeData[16];
|
|
|
|
|
std::uint32_t computeNumThreadX = 1;
|
|
|
|
|
std::uint32_t computeNumThreadY = 1;
|
|
|
|
|
std::uint32_t computeNumThreadZ = 1;
|
|
|
|
|
std::uint8_t psUserSpgrs;
|
|
|
|
|
std::uint8_t vsUserSpgrs;
|
|
|
|
|
std::uint8_t computeUserSpgrs;
|
|
|
|
|
|
|
|
|
|
ColorBuffer colorBuffers[colorBuffersCount];
|
|
|
|
|
|
|
|
|
|
std::uint32_t indexType;
|
|
|
|
|
|
|
|
|
|
std::uint32_t screenScissorX = 0;
|
|
|
|
|
std::uint32_t screenScissorY = 0;
|
|
|
|
|
std::uint32_t screenScissorW = 0;
|
|
|
|
|
std::uint32_t screenScissorH = 0;
|
|
|
|
|
|
|
|
|
|
CbColorFormat cbColorFormat = CbColorFormat::Normal;
|
|
|
|
|
|
|
|
|
|
CbRasterOp cbRasterOp = CbRasterOp::Copy;
|
|
|
|
|
|
|
|
|
|
std::uint32_t vgtPrimitiveType = 0;
|
|
|
|
|
bool stencilEnable = false;
|
|
|
|
|
bool depthEnable = false;
|
|
|
|
|
bool depthWriteEnable = false;
|
|
|
|
|
bool depthBoundsEnable = false;
|
|
|
|
|
int zFunc = 0;
|
|
|
|
|
bool backFaceEnable = false;
|
|
|
|
|
int stencilFunc = 0;
|
|
|
|
|
int stencilFuncBackFace = 0;
|
|
|
|
|
|
|
|
|
|
float depthClear = 1.f;
|
|
|
|
|
|
|
|
|
|
bool cullFront = false;
|
|
|
|
|
bool cullBack = false;
|
|
|
|
|
int face = 0; // 0 - CCW, 1 - CW
|
|
|
|
|
bool polyMode = false;
|
|
|
|
|
int polyModeFrontPType = 0;
|
|
|
|
|
int polyModeBackPType = 0;
|
|
|
|
|
bool polyOffsetFrontEnable = false;
|
|
|
|
|
bool polyOffsetBackEnable = false;
|
|
|
|
|
bool polyOffsetParaEnable = false;
|
|
|
|
|
bool vtxWindowOffsetEnable = false;
|
|
|
|
|
bool provokingVtxLast = false;
|
|
|
|
|
bool erspCorrDis = false;
|
|
|
|
|
bool multiPrimIbEna = false;
|
|
|
|
|
|
|
|
|
|
bool depthClearEnable = false;
|
|
|
|
|
bool stencilClearEnable = false;
|
|
|
|
|
bool depthCopy = false;
|
|
|
|
|
bool stencilCopy = false;
|
|
|
|
|
bool resummarizeEnable = false;
|
|
|
|
|
bool stencilCompressDisable = false;
|
|
|
|
|
bool depthCompressDisable = false;
|
|
|
|
|
bool copyCentroid = false;
|
|
|
|
|
int copySample = 0;
|
|
|
|
|
bool zpassIncrementDisable = false;
|
|
|
|
|
|
|
|
|
|
std::uint64_t zReadBase = 0;
|
|
|
|
|
std::uint64_t zWriteBase = 0;
|
|
|
|
|
|
|
|
|
|
BlendMultiplier blendColorSrc = {};
|
|
|
|
|
BlendFunc blendColorFn = {};
|
|
|
|
|
BlendMultiplier blendColorDst = {};
|
|
|
|
|
BlendMultiplier blendAlphaSrc = {};
|
|
|
|
|
BlendFunc blendAlphaFn = {};
|
|
|
|
|
BlendMultiplier blendAlphaDst = {};
|
|
|
|
|
bool blendSeparateAlpha = false;
|
|
|
|
|
bool blendEnable = false;
|
|
|
|
|
std::uint32_t cbRenderTargetMask = 0;
|
|
|
|
|
|
|
|
|
|
void setRegister(std::uint32_t regId, std::uint32_t value) {
|
|
|
|
|
switch (regId) {
|
|
|
|
|
case SPI_SHADER_PGM_LO_PS:
|
|
|
|
|
pgmPsAddress &= ~((1ull << 40) - 1);
|
|
|
|
|
pgmPsAddress |= static_cast<std::uint64_t>(value) << 8;
|
|
|
|
|
break;
|
|
|
|
|
case SPI_SHADER_PGM_HI_PS:
|
|
|
|
|
pgmPsAddress &= (1ull << 40) - 1;
|
|
|
|
|
pgmPsAddress |= static_cast<std::uint64_t>(value) << 40;
|
|
|
|
|
break;
|
|
|
|
|
case SPI_SHADER_PGM_LO_VS:
|
|
|
|
|
pgmVsAddress &= ~((1ull << 40) - 1);
|
|
|
|
|
pgmVsAddress |= static_cast<std::uint64_t>(value) << 8;
|
|
|
|
|
break;
|
|
|
|
|
case SPI_SHADER_PGM_HI_VS:
|
|
|
|
|
pgmVsAddress &= (1ull << 40) - 1;
|
|
|
|
|
pgmVsAddress |= static_cast<std::uint64_t>(value) << 40;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case SPI_SHADER_USER_DATA_VS_0:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_1:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_2:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_3:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_4:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_5:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_6:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_7:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_8:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_9:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_10:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_11:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_12:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_13:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_14:
|
|
|
|
|
case SPI_SHADER_USER_DATA_VS_15:
|
|
|
|
|
userVsData[regId - SPI_SHADER_USER_DATA_VS_0] = value;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case SPI_SHADER_USER_DATA_PS_0:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_1:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_2:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_3:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_4:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_5:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_6:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_7:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_8:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_9:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_10:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_11:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_12:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_13:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_14:
|
|
|
|
|
case SPI_SHADER_USER_DATA_PS_15:
|
|
|
|
|
userPsData[regId - SPI_SHADER_USER_DATA_PS_0] = value;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case SPI_SHADER_PGM_RSRC2_PS:
|
|
|
|
|
psUserSpgrs = (value >> 1) & 0x1f;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case SPI_SHADER_PGM_RSRC2_VS:
|
|
|
|
|
vsUserSpgrs = (value >> 1) & 0x1f;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case CB_COLOR0_BASE ... CB_COLOR6_DCC_BASE: {
|
|
|
|
|
auto buffer =
|
|
|
|
|
(regId - CB_COLOR0_BASE) / (CB_COLOR1_BASE - CB_COLOR0_BASE);
|
|
|
|
|
auto index = (regId - CB_COLOR0_BASE) % (CB_COLOR1_BASE - CB_COLOR0_BASE);
|
|
|
|
|
colorBuffers[buffer].setRegister(index, value);
|
|
|
|
|
break;
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case DB_RENDER_CONTROL:
|
|
|
|
|
depthClearEnable = getBit(value, 0);
|
|
|
|
|
stencilClearEnable = getBit(value, 1);
|
|
|
|
|
depthCopy = getBit(value, 2);
|
|
|
|
|
stencilCopy = getBit(value, 3);
|
|
|
|
|
resummarizeEnable = getBit(value, 4);
|
|
|
|
|
stencilCompressDisable = getBit(value, 5);
|
|
|
|
|
depthCompressDisable = getBit(value, 6);
|
|
|
|
|
copyCentroid = getBit(value, 7);
|
|
|
|
|
copySample = getBits(value, 10, 8);
|
|
|
|
|
zpassIncrementDisable = getBit(value, 11);
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case DB_Z_READ_BASE:
|
|
|
|
|
zReadBase = static_cast<std::uint64_t>(value) << 8;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case DB_Z_WRITE_BASE:
|
|
|
|
|
zWriteBase = static_cast<std::uint64_t>(value) << 8;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case DB_DEPTH_CLEAR:
|
|
|
|
|
depthClear = std::bit_cast<float>(value);
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case DB_DEPTH_CONTROL:
|
|
|
|
|
stencilEnable = getBit(value, 0) != 0;
|
|
|
|
|
depthEnable = getBit(value, 1) != 0;
|
|
|
|
|
depthWriteEnable = getBit(value, 2) != 0;
|
|
|
|
|
depthBoundsEnable = getBit(value, 3) != 0;
|
|
|
|
|
zFunc = getBits(value, 6, 4);
|
|
|
|
|
backFaceEnable = getBit(value, 7);
|
|
|
|
|
stencilFunc = getBits(value, 11, 8);
|
|
|
|
|
stencilFuncBackFace = getBits(value, 23, 20);
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf("stencilEnable=%u, depthEnable=%u, depthWriteEnable=%u, "
|
|
|
|
|
// "depthBoundsEnable=%u, zFunc=%u, backFaceEnable=%u, "
|
|
|
|
|
// "stencilFunc=%u, stencilFuncBackFace=%u\n",
|
|
|
|
|
// stencilEnable, depthEnable, depthWriteEnable,
|
|
|
|
|
// depthBoundsEnable, zFunc, backFaceEnable, stencilFunc,
|
|
|
|
|
// stencilFuncBackFace);
|
2023-07-19 14:16:38 +02:00
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case CB_TARGET_MASK: {
|
|
|
|
|
cbRenderTargetMask = value;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case CB_COLOR_CONTROL: {
|
|
|
|
|
/*
|
|
|
|
|
If true, then each UNORM format COLOR_8_8_8_8
|
|
|
|
|
MRT is treated as an SRGB format instead. This affects
|
|
|
|
|
both normal draw and resolve. This bit exists for
|
|
|
|
|
compatibility with older architectures that did not have
|
|
|
|
|
an SRGB number type.
|
|
|
|
|
*/
|
|
|
|
|
auto degammaEnable = getBits(value, 3, 0);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
/*
|
|
|
|
|
This field selects standard color processing or one of
|
|
|
|
|
several major operation modes.
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
POSSIBLE VALUES:
|
|
|
|
|
00 - CB_DISABLE: Disables drawing to color
|
|
|
|
|
buffer. Causes DB to not send tiles/quads to CB. CB
|
|
|
|
|
itself ignores this field.
|
|
|
|
|
01 - CB_NORMAL: Normal rendering mode. DB
|
|
|
|
|
should send tiles and quads for pixel exports or just
|
|
|
|
|
quads for compute exports.
|
|
|
|
|
02 - CB_ELIMINATE_FAST_CLEAR: Fill fast
|
|
|
|
|
cleared color surface locations with clear color. DB
|
|
|
|
|
should send only tiles.
|
|
|
|
|
03 - CB_RESOLVE: Read from MRT0, average all
|
|
|
|
|
samples, and write to MRT1, which is one-sample. DB
|
|
|
|
|
should send only tiles.
|
|
|
|
|
04 - CB_DECOMPRESS: Decompress MRT0 to a
|
|
|
|
|
uncompressed color format. This is required before a
|
|
|
|
|
multisampled surface is accessed by the CPU, or used as
|
|
|
|
|
a texture. This also decompresses the FMASK buffer. A
|
|
|
|
|
CB_ELIMINATE_FAST_CLEAR pass before this is
|
|
|
|
|
unnecessary. DB should send tiles and quads.
|
|
|
|
|
05 - CB_FMASK_DECOMPRESS: Decompress the
|
|
|
|
|
FMASK buffer into a texture readable format. A
|
|
|
|
|
CB_ELIMINATE_FAST_CLEAR pass before this is
|
|
|
|
|
unnecessary. DB should send only tiles.
|
|
|
|
|
*/
|
|
|
|
|
auto mode = getBits(value, 6, 4);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
This field supports the 28 boolean ops that combine
|
|
|
|
|
either source and dest or brush and dest, with brush
|
|
|
|
|
provided by the shader in place of source. The code
|
|
|
|
|
0xCC (11001100) copies the source to the destination,
|
|
|
|
|
which disables the ROP function. ROP must be disabled
|
|
|
|
|
if any MRT enables blending.
|
|
|
|
|
|
|
|
|
|
POSSIBLE VALUES:
|
|
|
|
|
00 - 0x00: BLACKNESS
|
|
|
|
|
05 - 0x05
|
|
|
|
|
10 - 0x0A
|
|
|
|
|
15 - 0x0F
|
|
|
|
|
17 - 0x11: NOTSRCERASE
|
|
|
|
|
34 - 0x22
|
|
|
|
|
51 - 0x33: NOTSRCCOPY
|
|
|
|
|
68 - 0x44: SRCERASE
|
|
|
|
|
80 - 0x50
|
|
|
|
|
85 - 0x55: DSTINVERT
|
|
|
|
|
90 - 0x5A: PATINVERT
|
|
|
|
|
95 - 0x5F
|
|
|
|
|
102 - 0x66: SRCINVERT
|
|
|
|
|
119 - 0x77
|
|
|
|
|
136 - 0x88: SRCAND
|
|
|
|
|
153 - 0x99
|
|
|
|
|
160 - 0xA0
|
|
|
|
|
165 - 0xA5
|
|
|
|
|
170 - 0xAA
|
|
|
|
|
175 - 0xAF
|
|
|
|
|
187 - 0xBB: MERGEPAINT
|
|
|
|
|
204 - 0xCC: SRCCOPY
|
|
|
|
|
221 - 0xDD
|
|
|
|
|
238 - 0xEE: SRCPAINT
|
|
|
|
|
240 - 0xF0: PATCOPY
|
|
|
|
|
245 - 0xF5
|
|
|
|
|
250 - 0xFA
|
|
|
|
|
255 - 0xFF: WHITENESS
|
|
|
|
|
*/
|
|
|
|
|
auto rop3 = getBits(value, 23, 16);
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * degammaEnable = %x\n", degammaEnable);
|
|
|
|
|
// std::printf(" * mode = %x\n", mode);
|
|
|
|
|
// std::printf(" * rop3 = %x\n", rop3);
|
2023-07-19 14:16:38 +02:00
|
|
|
|
|
|
|
|
cbColorFormat = static_cast<CbColorFormat>(mode);
|
|
|
|
|
cbRasterOp = static_cast<CbRasterOp>(rop3);
|
|
|
|
|
break;
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case PA_CL_CLIP_CNTL:
|
|
|
|
|
cullFront = getBit(value, 0);
|
|
|
|
|
cullBack = getBit(value, 1);
|
|
|
|
|
face = getBit(value, 2);
|
|
|
|
|
polyMode = getBits(value, 4, 3);
|
|
|
|
|
polyModeFrontPType = getBits(value, 7, 5);
|
|
|
|
|
polyModeBackPType = getBits(value, 10, 8);
|
|
|
|
|
polyOffsetFrontEnable = getBit(value, 11);
|
|
|
|
|
polyOffsetBackEnable = getBit(value, 12);
|
|
|
|
|
polyOffsetParaEnable = getBit(value, 13);
|
|
|
|
|
vtxWindowOffsetEnable = getBit(value, 16);
|
|
|
|
|
provokingVtxLast = getBit(value, 19);
|
|
|
|
|
erspCorrDis = getBit(value, 20);
|
|
|
|
|
multiPrimIbEna = getBit(value, 21);
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case PA_SC_SCREEN_SCISSOR_TL:
|
|
|
|
|
screenScissorX = static_cast<std::uint16_t>(value);
|
|
|
|
|
screenScissorY = static_cast<std::uint16_t>(value >> 16);
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case PA_SC_SCREEN_SCISSOR_BR:
|
|
|
|
|
screenScissorW = static_cast<std::uint16_t>(value) - screenScissorX;
|
|
|
|
|
screenScissorH = static_cast<std::uint16_t>(value >> 16) - screenScissorY;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case VGT_PRIMITIVE_TYPE:
|
|
|
|
|
vgtPrimitiveType = value;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case COMPUTE_NUM_THREAD_X:
|
|
|
|
|
computeNumThreadX = value;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case COMPUTE_NUM_THREAD_Y:
|
|
|
|
|
computeNumThreadY = value;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case COMPUTE_NUM_THREAD_Z:
|
|
|
|
|
computeNumThreadZ = value;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case COMPUTE_PGM_LO:
|
|
|
|
|
pgmComputeAddress &= ~((1ull << 40) - 1);
|
|
|
|
|
pgmComputeAddress |= static_cast<std::uint64_t>(value) << 8;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case COMPUTE_PGM_HI:
|
|
|
|
|
pgmComputeAddress &= (1ull << 40) - 1;
|
|
|
|
|
pgmComputeAddress |= static_cast<std::uint64_t>(value) << 40;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case COMPUTE_PGM_RSRC1:
|
|
|
|
|
break;
|
|
|
|
|
case COMPUTE_PGM_RSRC2:
|
|
|
|
|
computeUserSpgrs = (value >> 1) & 0x1f;
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
case COMPUTE_USER_DATA_0:
|
|
|
|
|
case COMPUTE_USER_DATA_1:
|
|
|
|
|
case COMPUTE_USER_DATA_2:
|
|
|
|
|
case COMPUTE_USER_DATA_3:
|
|
|
|
|
case COMPUTE_USER_DATA_4:
|
|
|
|
|
case COMPUTE_USER_DATA_5:
|
|
|
|
|
case COMPUTE_USER_DATA_6:
|
|
|
|
|
case COMPUTE_USER_DATA_7:
|
|
|
|
|
case COMPUTE_USER_DATA_8:
|
|
|
|
|
case COMPUTE_USER_DATA_9:
|
|
|
|
|
case COMPUTE_USER_DATA_10:
|
|
|
|
|
case COMPUTE_USER_DATA_11:
|
|
|
|
|
case COMPUTE_USER_DATA_12:
|
|
|
|
|
case COMPUTE_USER_DATA_13:
|
|
|
|
|
case COMPUTE_USER_DATA_14:
|
|
|
|
|
case COMPUTE_USER_DATA_15:
|
|
|
|
|
userComputeData[regId - COMPUTE_USER_DATA_0] = value;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case CB_BLEND0_CONTROL: {
|
|
|
|
|
blendColorSrc = (BlendMultiplier)fetchMaskedValue(
|
|
|
|
|
value, CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK);
|
|
|
|
|
blendColorFn = (BlendFunc)fetchMaskedValue(
|
|
|
|
|
value, CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK);
|
|
|
|
|
blendColorDst = (BlendMultiplier)fetchMaskedValue(
|
|
|
|
|
value, CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK);
|
|
|
|
|
auto opacity_weight =
|
|
|
|
|
fetchMaskedValue(value, CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK);
|
|
|
|
|
blendAlphaSrc = (BlendMultiplier)fetchMaskedValue(
|
|
|
|
|
value, CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK);
|
|
|
|
|
blendAlphaFn = (BlendFunc)fetchMaskedValue(
|
|
|
|
|
value, CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK);
|
|
|
|
|
blendAlphaDst = (BlendMultiplier)fetchMaskedValue(
|
|
|
|
|
value, CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK);
|
|
|
|
|
blendSeparateAlpha =
|
|
|
|
|
fetchMaskedValue(value,
|
|
|
|
|
CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK) != 0;
|
|
|
|
|
blendEnable =
|
|
|
|
|
fetchMaskedValue(value, CB_BLEND0_CONTROL_BLEND_ENABLE_MASK) != 0;
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf(" * COLOR_SRCBLEND = %x\n", blendColorSrc);
|
|
|
|
|
// std::printf(" * COLOR_COMB_FCN = %x\n", blendColorFn);
|
|
|
|
|
// std::printf(" * COLOR_DESTBLEND = %x\n", blendColorDst);
|
|
|
|
|
// std::printf(" * OPACITY_WEIGHT = %x\n", opacity_weight);
|
|
|
|
|
// std::printf(" * ALPHA_SRCBLEND = %x\n", blendAlphaSrc);
|
|
|
|
|
// std::printf(" * ALPHA_COMB_FCN = %x\n", blendAlphaFn);
|
|
|
|
|
// std::printf(" * ALPHA_DESTBLEND = %x\n", blendAlphaDst);
|
|
|
|
|
// std::printf(" * SEPARATE_ALPHA_BLEND = %x\n", blendSeparateAlpha);
|
|
|
|
|
// std::printf(" * BLEND_ENABLE = %x\n", blendEnable);
|
2023-07-19 14:16:38 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
2023-07-19 14:16:38 +02:00
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
static void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
|
|
|
|
VkImageAspectFlags aspectFlags,
|
|
|
|
|
VkImageLayout oldLayout,
|
|
|
|
|
VkImageLayout newLayout) {
|
2023-06-24 14:59:27 +02:00
|
|
|
VkImageMemoryBarrier barrier{};
|
|
|
|
|
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
|
|
|
|
barrier.oldLayout = oldLayout;
|
|
|
|
|
barrier.newLayout = newLayout;
|
|
|
|
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
|
barrier.image = image;
|
|
|
|
|
barrier.subresourceRange.aspectMask = aspectFlags;
|
|
|
|
|
barrier.subresourceRange.baseMipLevel = 0;
|
|
|
|
|
barrier.subresourceRange.levelCount = 1;
|
|
|
|
|
barrier.subresourceRange.baseArrayLayer = 0;
|
|
|
|
|
barrier.subresourceRange.layerCount = 1;
|
|
|
|
|
|
|
|
|
|
auto layoutToStageAccess = [](VkImageLayout layout)
|
|
|
|
|
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
|
|
|
|
|
switch (layout) {
|
|
|
|
|
case VK_IMAGE_LAYOUT_UNDEFINED:
|
2023-06-25 15:54:49 +02:00
|
|
|
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
|
2023-06-24 14:59:27 +02:00
|
|
|
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
|
|
|
|
|
|
|
|
|
|
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
|
|
|
|
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
|
|
|
|
|
|
|
|
|
|
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
|
|
|
|
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
|
|
|
|
|
|
|
|
|
|
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
|
|
|
|
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT};
|
|
|
|
|
|
|
|
|
|
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
|
|
|
|
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
|
|
|
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
|
|
|
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
|
|
|
|
|
|
|
|
|
|
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
|
|
|
|
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
|
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
|
|
|
|
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
util::unreachable("unsupported layout transition! %d", layout);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout);
|
|
|
|
|
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
|
|
|
|
|
|
|
|
|
|
barrier.srcAccessMask = sourceAccess;
|
|
|
|
|
barrier.dstAccessMask = destinationAccess;
|
|
|
|
|
|
|
|
|
|
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
|
|
|
|
|
nullptr, 0, nullptr, 1, &barrier);
|
2023-06-25 15:54:49 +02:00
|
|
|
}
|
|
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
static int getBitWidthOfSurfaceFormat(SurfaceFormat format) {
|
2023-06-24 14:59:27 +02:00
|
|
|
switch (format) {
|
|
|
|
|
case kSurfaceFormatInvalid:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormat8:
|
|
|
|
|
return 8;
|
|
|
|
|
case kSurfaceFormat16:
|
|
|
|
|
return 16;
|
|
|
|
|
case kSurfaceFormat8_8:
|
|
|
|
|
return 8 + 8;
|
|
|
|
|
case kSurfaceFormat32:
|
|
|
|
|
return 32;
|
|
|
|
|
case kSurfaceFormat16_16:
|
|
|
|
|
return 16 + 16;
|
|
|
|
|
case kSurfaceFormat10_11_11:
|
|
|
|
|
return 10 + 11 + 11;
|
|
|
|
|
case kSurfaceFormat11_11_10:
|
|
|
|
|
return 11 + 11 + 10;
|
|
|
|
|
case kSurfaceFormat10_10_10_2:
|
|
|
|
|
return 10 + 10 + 10 + 2;
|
|
|
|
|
case kSurfaceFormat2_10_10_10:
|
|
|
|
|
return 2 + 10 + 10 + 10;
|
|
|
|
|
case kSurfaceFormat8_8_8_8:
|
|
|
|
|
return 8 + 8 + 8 + 8;
|
|
|
|
|
case kSurfaceFormat32_32:
|
|
|
|
|
return 32 + 32;
|
|
|
|
|
case kSurfaceFormat16_16_16_16:
|
|
|
|
|
return 16 + 16 + 16 + 16;
|
|
|
|
|
case kSurfaceFormat32_32_32:
|
|
|
|
|
return 32 + 32 + 32;
|
|
|
|
|
case kSurfaceFormat32_32_32_32:
|
|
|
|
|
return 32 + 32 + 32 + 32;
|
|
|
|
|
case kSurfaceFormat5_6_5:
|
|
|
|
|
return 5 + 6 + 5;
|
|
|
|
|
case kSurfaceFormat1_5_5_5:
|
|
|
|
|
return 1 + 5 + 5 + 5;
|
|
|
|
|
case kSurfaceFormat5_5_5_1:
|
|
|
|
|
return 5 + 5 + 5 + 1;
|
|
|
|
|
case kSurfaceFormat4_4_4_4:
|
|
|
|
|
return 4 + 4 + 4 + 4;
|
|
|
|
|
case kSurfaceFormat8_24:
|
|
|
|
|
return 8 + 24;
|
|
|
|
|
case kSurfaceFormat24_8:
|
|
|
|
|
return 24 + 8;
|
|
|
|
|
case kSurfaceFormatX24_8_32:
|
|
|
|
|
return 24 + 8 + 32;
|
|
|
|
|
case kSurfaceFormatGB_GR:
|
|
|
|
|
return 2 + 2;
|
|
|
|
|
case kSurfaceFormatBG_RG:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormat5_9_9_9:
|
|
|
|
|
return 5 + 9 + 9 + 9;
|
|
|
|
|
case kSurfaceFormatBc1:
|
2023-07-17 14:35:53 +02:00
|
|
|
return 8;
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormatBc2:
|
2023-07-17 14:35:53 +02:00
|
|
|
return 8;
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormatBc3:
|
2023-07-17 14:35:53 +02:00
|
|
|
return 8;
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormatBc4:
|
2023-07-17 14:35:53 +02:00
|
|
|
return 8;
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormatBc5:
|
2023-07-17 14:35:53 +02:00
|
|
|
return 8;
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormatBc6:
|
2023-07-17 14:35:53 +02:00
|
|
|
return 8;
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormatBc7:
|
2023-07-17 14:35:53 +02:00
|
|
|
return 8;
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormatFmask8_S2_F1:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask8_S4_F1:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask8_S8_F1:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask8_S2_F2:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask8_S4_F2:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask8_S4_F4:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask16_S16_F1:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask16_S8_F2:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask32_S16_F2:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask32_S8_F4:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask32_S8_F8:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask64_S16_F4:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormatFmask64_S16_F8:
|
|
|
|
|
return 0;
|
|
|
|
|
case kSurfaceFormat4_4:
|
|
|
|
|
return 4 + 4;
|
|
|
|
|
case kSurfaceFormat6_5_5:
|
|
|
|
|
return 6 + 5 + 5;
|
|
|
|
|
case kSurfaceFormat1:
|
|
|
|
|
return 1;
|
|
|
|
|
case kSurfaceFormat1Reversed:
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
static VkFormat surfaceFormatToVkFormat(SurfaceFormat surface,
|
2023-06-24 14:59:27 +02:00
|
|
|
TextureChannelType channel) {
|
|
|
|
|
switch (surface) {
|
2023-07-17 14:35:53 +02:00
|
|
|
case kSurfaceFormat8: {
|
|
|
|
|
switch (channel) {
|
2023-07-17 22:07:10 +02:00
|
|
|
case kTextureChannelTypeUNorm:
|
|
|
|
|
return VK_FORMAT_R8_UNORM;
|
|
|
|
|
case kTextureChannelTypeSNorm:
|
|
|
|
|
return VK_FORMAT_R8_SNORM;
|
2023-07-17 14:35:53 +02:00
|
|
|
case kTextureChannelTypeUInt:
|
|
|
|
|
return VK_FORMAT_R8_UINT;
|
|
|
|
|
case kTextureChannelTypeSInt:
|
|
|
|
|
return VK_FORMAT_R8_SINT;
|
|
|
|
|
case kTextureChannelTypeSrgb:
|
|
|
|
|
return VK_FORMAT_R8_SRGB;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormat32:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUInt:
|
|
|
|
|
return VK_FORMAT_R32_UINT;
|
|
|
|
|
case kTextureChannelTypeSInt:
|
|
|
|
|
return VK_FORMAT_R32_SINT;
|
|
|
|
|
case kTextureChannelTypeFloat:
|
|
|
|
|
return VK_FORMAT_R32_SFLOAT;
|
2023-07-17 14:35:53 +02:00
|
|
|
case kTextureChannelTypeSrgb:
|
|
|
|
|
return VK_FORMAT_R32_UINT; // FIXME
|
2023-06-24 14:59:27 +02:00
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2023-07-14 03:33:45 +02:00
|
|
|
case kSurfaceFormat8_8:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUNorm:
|
|
|
|
|
return VK_FORMAT_R8G8_UNORM;
|
|
|
|
|
case kTextureChannelTypeSNorm:
|
|
|
|
|
return VK_FORMAT_R8G8_SNORM;
|
|
|
|
|
case kTextureChannelTypeUInt:
|
|
|
|
|
return VK_FORMAT_R8G8_UINT;
|
|
|
|
|
case kTextureChannelTypeSInt:
|
|
|
|
|
return VK_FORMAT_R8G8_SINT;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case kSurfaceFormat16_16:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUInt:
|
|
|
|
|
return VK_FORMAT_R16G16_UINT;
|
|
|
|
|
case kTextureChannelTypeSInt:
|
|
|
|
|
return VK_FORMAT_R16G16_SINT;
|
|
|
|
|
case kTextureChannelTypeFloat:
|
|
|
|
|
return VK_FORMAT_R16G16_SFLOAT;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormat32_32:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUInt:
|
|
|
|
|
return VK_FORMAT_R32G32_UINT;
|
|
|
|
|
case kTextureChannelTypeSInt:
|
|
|
|
|
return VK_FORMAT_R32G32_SINT;
|
|
|
|
|
case kTextureChannelTypeFloat:
|
|
|
|
|
return VK_FORMAT_R32G32_SFLOAT;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case kSurfaceFormat16_16_16_16:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUNorm:
|
|
|
|
|
return VK_FORMAT_R16G16B16A16_UNORM;
|
|
|
|
|
case kTextureChannelTypeSNorm:
|
|
|
|
|
return VK_FORMAT_R16G16B16A16_SNORM;
|
|
|
|
|
case kTextureChannelTypeUScaled:
|
|
|
|
|
return VK_FORMAT_R16G16B16A16_USCALED;
|
|
|
|
|
case kTextureChannelTypeSScaled:
|
|
|
|
|
return VK_FORMAT_R16G16B16A16_SSCALED;
|
|
|
|
|
case kTextureChannelTypeUInt:
|
|
|
|
|
return VK_FORMAT_R16G16B16A16_UINT;
|
|
|
|
|
case kTextureChannelTypeSInt:
|
|
|
|
|
return VK_FORMAT_R16G16B16A16_SINT;
|
|
|
|
|
case kTextureChannelTypeFloat:
|
|
|
|
|
return VK_FORMAT_R16G16B16A16_SFLOAT;
|
2023-07-17 14:35:53 +02:00
|
|
|
case kTextureChannelTypeSrgb:
|
|
|
|
|
return VK_FORMAT_R16G16B16A16_UNORM; // FIXME: wrong
|
2023-06-24 14:59:27 +02:00
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case kSurfaceFormat32_32_32:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUInt:
|
|
|
|
|
return VK_FORMAT_R32G32B32_UINT;
|
|
|
|
|
case kTextureChannelTypeSInt:
|
|
|
|
|
return VK_FORMAT_R32G32B32_SINT;
|
|
|
|
|
case kTextureChannelTypeFloat:
|
|
|
|
|
return VK_FORMAT_R32G32B32_SFLOAT;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case kSurfaceFormat32_32_32_32:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUInt:
|
|
|
|
|
return VK_FORMAT_R32G32B32A32_UINT;
|
|
|
|
|
case kTextureChannelTypeSInt:
|
|
|
|
|
return VK_FORMAT_R32G32B32A32_SINT;
|
|
|
|
|
case kTextureChannelTypeFloat:
|
|
|
|
|
return VK_FORMAT_R32G32B32A32_SFLOAT;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
case kSurfaceFormat24_8:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUNorm:
|
|
|
|
|
return VK_FORMAT_D24_UNORM_S8_UINT;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
2023-06-24 14:59:27 +02:00
|
|
|
case kSurfaceFormat8_8_8_8:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUNorm:
|
|
|
|
|
return VK_FORMAT_R8G8B8A8_UNORM;
|
|
|
|
|
case kTextureChannelTypeSNorm:
|
|
|
|
|
return VK_FORMAT_R8G8B8A8_SNORM;
|
|
|
|
|
case kTextureChannelTypeUScaled:
|
|
|
|
|
return VK_FORMAT_R8G8B8A8_USCALED;
|
|
|
|
|
case kTextureChannelTypeSScaled:
|
|
|
|
|
return VK_FORMAT_R8G8B8A8_SSCALED;
|
|
|
|
|
case kTextureChannelTypeUInt:
|
|
|
|
|
return VK_FORMAT_R8G8B8A8_UINT;
|
|
|
|
|
case kTextureChannelTypeSInt:
|
|
|
|
|
return VK_FORMAT_R8G8B8A8_SINT;
|
|
|
|
|
// case kTextureChannelTypeSNormNoZero:
|
|
|
|
|
// return VK_FORMAT_R8G8B8A8_SNORM;
|
|
|
|
|
case kTextureChannelTypeSrgb:
|
|
|
|
|
return VK_FORMAT_R8G8B8A8_SRGB;
|
|
|
|
|
// case kTextureChannelTypeUBNorm:
|
|
|
|
|
// return VK_FORMAT_R8G8B8A8_UNORM;
|
|
|
|
|
// case kTextureChannelTypeUBNormNoZero:
|
|
|
|
|
// return VK_FORMAT_R8G8B8A8_UNORM;
|
|
|
|
|
// case kTextureChannelTypeUBInt:
|
|
|
|
|
// return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK;
|
|
|
|
|
// case kTextureChannelTypeUBScaled:
|
|
|
|
|
// return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case kSurfaceFormatBc1:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeSrgb:
|
|
|
|
|
return VK_FORMAT_BC1_RGBA_SRGB_BLOCK;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kSurfaceFormatBc3:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeSrgb:
|
|
|
|
|
return VK_FORMAT_BC3_SRGB_BLOCK;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-28 18:43:42 +02:00
|
|
|
case kSurfaceFormatBc4:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUNorm:
|
|
|
|
|
return VK_FORMAT_BC4_UNORM_BLOCK;
|
|
|
|
|
|
|
|
|
|
case kTextureChannelTypeSNorm:
|
|
|
|
|
return VK_FORMAT_BC4_SNORM_BLOCK;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case kSurfaceFormatBc5:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUNorm:
|
|
|
|
|
return VK_FORMAT_BC5_UNORM_BLOCK;
|
|
|
|
|
|
|
|
|
|
case kTextureChannelTypeSNorm:
|
|
|
|
|
return VK_FORMAT_BC5_SNORM_BLOCK;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kSurfaceFormatBc7:
|
|
|
|
|
switch (channel) {
|
|
|
|
|
case kTextureChannelTypeUNorm:
|
|
|
|
|
return VK_FORMAT_BC7_UNORM_BLOCK;
|
|
|
|
|
|
|
|
|
|
case kTextureChannelTypeSrgb:
|
|
|
|
|
return VK_FORMAT_BC7_SRGB_BLOCK;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-24 14:59:27 +02:00
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
util::unreachable("unimplemented surface format. %x.%x\n", (int)surface,
|
|
|
|
|
(int)channel);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static VkPrimitiveTopology getVkPrimitiveType(PrimitiveType type) {
|
|
|
|
|
switch (type) {
|
|
|
|
|
case kPrimitiveTypePointList:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
|
|
|
|
case kPrimitiveTypeLineList:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
|
|
|
|
case kPrimitiveTypeLineStrip:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
|
|
|
|
case kPrimitiveTypeTriList:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
|
|
|
|
case kPrimitiveTypeTriFan:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
|
|
|
|
case kPrimitiveTypeTriStrip:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
|
|
|
|
|
case kPrimitiveTypePatch:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
|
|
|
|
case kPrimitiveTypeLineListAdjacency:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
|
|
|
|
|
case kPrimitiveTypeLineStripAdjacency:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
|
|
|
|
|
case kPrimitiveTypeTriListAdjacency:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
|
|
|
|
|
case kPrimitiveTypeTriStripAdjacency:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY;
|
|
|
|
|
case kPrimitiveTypeLineLoop:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // FIXME
|
|
|
|
|
|
|
|
|
|
case kPrimitiveTypeRectList:
|
|
|
|
|
case kPrimitiveTypeQuadList:
|
|
|
|
|
case kPrimitiveTypeQuadStrip:
|
|
|
|
|
case kPrimitiveTypePolygon:
|
|
|
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
util::unreachable();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static std::pair<std::uint64_t, std::uint64_t>
|
|
|
|
|
quadListPrimConverter(std::uint64_t index) {
|
|
|
|
|
static constexpr int indecies[] = {0, 1, 2, 2, 3, 0};
|
|
|
|
|
return {index, index / 6 + indecies[index % 6]};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static std::pair<std::uint64_t, std::uint64_t>
|
|
|
|
|
quadStripPrimConverter(std::uint64_t index) {
|
|
|
|
|
static constexpr int indecies[] = {0, 1, 3, 0, 3, 2};
|
|
|
|
|
return {index, (index / 6) * 4 + indecies[index % 6]};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
using ConverterFn =
|
|
|
|
|
std::pair<std::uint64_t, std::uint64_t>(std::uint64_t index);
|
|
|
|
|
|
|
|
|
|
static ConverterFn *getPrimConverterFn(PrimitiveType primType,
|
|
|
|
|
std::uint32_t *count) {
|
|
|
|
|
switch (primType) {
|
|
|
|
|
case kPrimitiveTypeQuadList:
|
|
|
|
|
*count = *count / 4 * 6;
|
|
|
|
|
return quadListPrimConverter;
|
|
|
|
|
|
|
|
|
|
case kPrimitiveTypeQuadStrip:
|
|
|
|
|
*count = *count / 4 * 6;
|
|
|
|
|
return quadStripPrimConverter;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
util::unreachable();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool isPrimRequiresConversion(PrimitiveType primType) {
|
|
|
|
|
switch (primType) {
|
|
|
|
|
case kPrimitiveTypePointList:
|
|
|
|
|
case kPrimitiveTypeLineList:
|
|
|
|
|
case kPrimitiveTypeLineStrip:
|
|
|
|
|
case kPrimitiveTypeTriList:
|
|
|
|
|
case kPrimitiveTypeTriFan:
|
|
|
|
|
case kPrimitiveTypeTriStrip:
|
|
|
|
|
case kPrimitiveTypePatch:
|
|
|
|
|
case kPrimitiveTypeLineListAdjacency:
|
|
|
|
|
case kPrimitiveTypeLineStripAdjacency:
|
|
|
|
|
case kPrimitiveTypeTriListAdjacency:
|
|
|
|
|
case kPrimitiveTypeTriStripAdjacency:
|
|
|
|
|
return false;
|
|
|
|
|
case kPrimitiveTypeLineLoop: // FIXME
|
|
|
|
|
util::unreachable();
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
case kPrimitiveTypeRectList:
|
|
|
|
|
return false; // handled by geometry shader
|
|
|
|
|
|
|
|
|
|
case kPrimitiveTypeQuadList:
|
|
|
|
|
case kPrimitiveTypeQuadStrip:
|
|
|
|
|
case kPrimitiveTypePolygon:
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
util::unreachable();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool validateSpirv(const std::vector<uint32_t> &bin) {
|
|
|
|
|
spv_target_env target_env = SPV_ENV_VULKAN_1_3;
|
|
|
|
|
spv_context spvContext = spvContextCreate(target_env);
|
|
|
|
|
spv_diagnostic diagnostic = nullptr;
|
|
|
|
|
spv_const_binary_t binary = {bin.data(), bin.size()};
|
|
|
|
|
spv_result_t error = spvValidate(spvContext, &binary, &diagnostic);
|
|
|
|
|
if (error != 0)
|
|
|
|
|
spvDiagnosticPrint(diagnostic);
|
|
|
|
|
spvDiagnosticDestroy(diagnostic);
|
|
|
|
|
spvContextDestroy(spvContext);
|
|
|
|
|
return error == 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void printSpirv(const std::vector<uint32_t> &bin) {
|
2023-07-14 03:33:45 +02:00
|
|
|
spv_target_env target_env = SPV_ENV_VULKAN_1_3;
|
|
|
|
|
spv_context spvContext = spvContextCreate(target_env);
|
|
|
|
|
spv_diagnostic diagnostic = nullptr;
|
|
|
|
|
|
|
|
|
|
spv_result_t error = spvBinaryToText(
|
|
|
|
|
spvContext, bin.data(), bin.size(),
|
|
|
|
|
SPV_BINARY_TO_TEXT_OPTION_PRINT | // SPV_BINARY_TO_TEXT_OPTION_COLOR |
|
|
|
|
|
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES |
|
|
|
|
|
SPV_BINARY_TO_TEXT_OPTION_COMMENT | SPV_BINARY_TO_TEXT_OPTION_INDENT,
|
|
|
|
|
nullptr, &diagnostic);
|
|
|
|
|
|
|
|
|
|
if (error != 0) {
|
|
|
|
|
spvDiagnosticPrint(diagnostic);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spvDiagnosticDestroy(diagnostic);
|
|
|
|
|
spvContextDestroy(spvContext);
|
|
|
|
|
|
|
|
|
|
if (error != 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spirv_cross::CompilerGLSL glsl(bin);
|
|
|
|
|
spirv_cross::CompilerGLSL::Options options;
|
|
|
|
|
options.version = 460;
|
|
|
|
|
options.es = false;
|
|
|
|
|
options.vulkan_semantics = true;
|
|
|
|
|
glsl.set_common_options(options);
|
|
|
|
|
std::printf("%s\n", glsl.compile().c_str());
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static std::optional<std::vector<uint32_t>>
|
|
|
|
|
optimizeSpirv(std::span<const std::uint32_t> spirv) {
|
|
|
|
|
spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_3);
|
|
|
|
|
optimizer.RegisterPerformancePasses();
|
|
|
|
|
optimizer.RegisterPass(spvtools::CreateSimplificationPass());
|
|
|
|
|
|
|
|
|
|
std::vector<uint32_t> result;
|
|
|
|
|
if (optimizer.Run(spirv.data(), spirv.size(), &result)) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
util::unreachable();
|
|
|
|
|
return {};
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
static VkShaderStageFlagBits shaderStageToVk(amdgpu::shader::Stage stage) {
|
2023-06-24 14:59:27 +02:00
|
|
|
switch (stage) {
|
|
|
|
|
case amdgpu::shader::Stage::None:
|
|
|
|
|
break;
|
|
|
|
|
case amdgpu::shader::Stage::Fragment:
|
|
|
|
|
return VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
|
|
|
case amdgpu::shader::Stage::Vertex:
|
|
|
|
|
return VK_SHADER_STAGE_VERTEX_BIT;
|
|
|
|
|
case amdgpu::shader::Stage::Geometry:
|
|
|
|
|
return VK_SHADER_STAGE_GEOMETRY_BIT;
|
|
|
|
|
case amdgpu::shader::Stage::Compute:
|
|
|
|
|
return VK_SHADER_STAGE_COMPUTE_BIT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return VK_SHADER_STAGE_ALL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static vk::MemoryResource hostVisibleMemory;
|
|
|
|
|
static vk::MemoryResource deviceLocalMemory;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static vk::MemoryResource &getHostVisibleMemory() {
|
|
|
|
|
if (!hostVisibleMemory) {
|
2023-07-24 05:03:55 +02:00
|
|
|
hostVisibleMemory.initHostVisible(1024 * 1024 * 512);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
return hostVisibleMemory;
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static vk::MemoryResource &getDeviceLocalMemory() {
|
|
|
|
|
if (!deviceLocalMemory) {
|
2023-07-24 05:03:55 +02:00
|
|
|
deviceLocalMemory.initDeviceLocal(1024 * 1024 * 512);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
return deviceLocalMemory;
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
struct BufferRef {
|
|
|
|
|
VkBuffer buffer = VK_NULL_HANDLE;
|
2023-06-24 14:59:27 +02:00
|
|
|
VkDeviceSize offset = 0;
|
|
|
|
|
VkDeviceSize size = 0;
|
|
|
|
|
};
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static constexpr bool isAligned(std::uint64_t offset, std::uint64_t alignment) {
|
|
|
|
|
return (offset & (alignment - 1)) == 0;
|
|
|
|
|
}
|
2023-07-24 05:03:55 +02:00
|
|
|
/*
|
2023-07-17 14:35:53 +02:00
|
|
|
struct AreaCache {
|
|
|
|
|
vk::MemoryResource hostMemoryResource;
|
|
|
|
|
vk::Buffer directBuffer;
|
|
|
|
|
std::uint64_t areaAddress;
|
|
|
|
|
std::uint64_t areaSize;
|
|
|
|
|
std::vector<vk::Buffer> buffers;
|
|
|
|
|
std::forward_list<vk::Image2D> images;
|
2023-07-24 05:03:55 +02:00
|
|
|
std::mutex mtx;
|
2023-07-17 14:35:53 +02:00
|
|
|
|
|
|
|
|
struct WriteBackBuffer {
|
|
|
|
|
void *bufferMemory;
|
|
|
|
|
std::uint64_t address;
|
|
|
|
|
std::uint64_t size;
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
struct WriteBackImage {
|
2023-07-19 14:02:57 +02:00
|
|
|
vk::ImageRef image;
|
2023-07-17 14:35:53 +02:00
|
|
|
std::uint64_t address;
|
|
|
|
|
std::uint32_t tileMode;
|
|
|
|
|
std::uint32_t width;
|
|
|
|
|
std::uint32_t height;
|
|
|
|
|
std::uint32_t depth;
|
|
|
|
|
std::uint32_t pitch;
|
|
|
|
|
VkImageAspectFlags aspect;
|
|
|
|
|
SurfaceFormat format;
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
std::vector<WriteBackBuffer> writeBackBuffers;
|
|
|
|
|
std::vector<WriteBackImage> writeBackImages;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
AreaCache(std::uint64_t areaAddress, std::uint64_t areaSize)
|
|
|
|
|
: areaAddress(areaAddress), areaSize(areaSize) {
|
|
|
|
|
if (kUseDirectMemory) {
|
2023-07-24 05:03:55 +02:00
|
|
|
hostMemoryResource.initFromHost(
|
2023-07-17 14:35:53 +02:00
|
|
|
(char *)g_rwMemory + areaAddress - g_memoryBase, areaSize);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
directBuffer = vk::Buffer::CreateExternal(
|
|
|
|
|
areaSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
|
|
|
VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
|
|
|
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
|
|
|
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
|
|
|
|
directBuffer.bindMemory(hostMemoryResource.getFromOffset(0, areaSize));
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
~AreaCache() { writeBack(); }
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
BufferRef getBuffer(std::uint64_t areaOffset, std::uint64_t size,
|
|
|
|
|
VkBufferUsageFlags usage, shader::AccessOp access) {
|
|
|
|
|
if (kUseDirectMemory &&
|
|
|
|
|
isAligned(areaOffset, vk::g_physicalDeviceProperties.limits
|
|
|
|
|
.minStorageBufferOffsetAlignment)) {
|
|
|
|
|
// offset is supported natively, return direct buffer
|
|
|
|
|
return {directBuffer.getHandle(), areaOffset, size};
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) {
|
|
|
|
|
usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if ((access & shader::AccessOp::Load) == shader::AccessOp::Load) {
|
|
|
|
|
usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
auto tmpBuffer = vk::Buffer::Allocate(getHostVisibleMemory(), size, usage);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if ((access & shader::AccessOp::Load) == shader::AccessOp::Load) {
|
|
|
|
|
std::memcpy(tmpBuffer.getData(),
|
|
|
|
|
g_hostMemory.getPointer(areaAddress + areaOffset), size);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
auto result =
|
|
|
|
|
BufferRef{.buffer = tmpBuffer.getHandle(), .offset = 0, .size = size};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) {
|
|
|
|
|
writeBackBuffers.push_back({.bufferMemory = tmpBuffer.getData(),
|
|
|
|
|
.address = areaAddress + areaOffset,
|
|
|
|
|
.size = result.size});
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
buffers.push_back(std::move(tmpBuffer));
|
2023-06-24 14:59:27 +02:00
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:02:57 +02:00
|
|
|
vk::ImageRef getImage(std::uint64_t areaOffset, VkCommandBuffer cmdBuffer,
|
2023-07-17 14:35:53 +02:00
|
|
|
TileMode tileMode, std::uint32_t width,
|
|
|
|
|
std::uint32_t height, std::uint32_t depth,
|
|
|
|
|
std::uint32_t pitch, SurfaceFormat format,
|
|
|
|
|
TextureChannelType channelType, VkImageUsageFlags usage,
|
|
|
|
|
VkImageAspectFlags aspect, shader::AccessOp access,
|
|
|
|
|
std::uint64_t writeBackAddress) {
|
|
|
|
|
if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) {
|
|
|
|
|
usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
|
2023-06-25 21:58:15 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if ((access & shader::AccessOp::Load) == shader::AccessOp::Load) {
|
|
|
|
|
usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if (depth != 1) {
|
|
|
|
|
util::unreachable();
|
2023-06-25 21:58:15 +02:00
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
auto vkFormat = surfaceFormatToVkFormat(format, channelType);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:02:57 +02:00
|
|
|
auto imageHandle = vk::Image2D::Allocate(getDeviceLocalMemory(), width,
|
|
|
|
|
height, vkFormat, usage);
|
|
|
|
|
|
2023-07-20 02:14:52 +02:00
|
|
|
auto image = vk::ImageRef(images.emplace_front(std::move(imageHandle)));
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if ((access & shader::AccessOp::Load) == shader::AccessOp::Load) {
|
|
|
|
|
buffers.push_back(image.read(
|
|
|
|
|
cmdBuffer, getHostVisibleMemory(),
|
|
|
|
|
g_hostMemory.getPointer(areaAddress + areaOffset), tileMode, aspect,
|
|
|
|
|
getBitWidthOfSurfaceFormat(format) / 8, pitch));
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) {
|
|
|
|
|
writeBackImages.push_back({
|
2023-07-19 14:02:57 +02:00
|
|
|
.image = image,
|
2023-07-17 14:35:53 +02:00
|
|
|
.address = writeBackAddress,
|
|
|
|
|
.tileMode = tileMode,
|
|
|
|
|
.width = width,
|
|
|
|
|
.height = height,
|
|
|
|
|
.depth = depth,
|
|
|
|
|
.pitch = pitch,
|
|
|
|
|
.aspect = aspect,
|
|
|
|
|
.format = format,
|
|
|
|
|
});
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:02:57 +02:00
|
|
|
return image;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
void writeImageToBuffers(VkCommandBuffer cmd) {
|
|
|
|
|
for (auto wbImage : writeBackImages) {
|
2023-07-19 14:02:57 +02:00
|
|
|
auto buffer = wbImage.image.writeToBuffer(cmd, getHostVisibleMemory(),
|
|
|
|
|
wbImage.aspect);
|
2023-07-17 14:35:53 +02:00
|
|
|
writeBackBuffers.push_back(
|
|
|
|
|
{.bufferMemory = buffer.getData(),
|
|
|
|
|
.address = wbImage.address,
|
|
|
|
|
.size = wbImage.pitch * wbImage.height * wbImage.depth *
|
|
|
|
|
(getBitWidthOfSurfaceFormat(wbImage.format) / 8)});
|
|
|
|
|
buffers.push_back(std::move(buffer));
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
void writeBack() {
|
|
|
|
|
for (auto wbBuffer : writeBackBuffers) {
|
|
|
|
|
std::memcpy(g_hostMemory.getPointer(wbBuffer.address),
|
|
|
|
|
wbBuffer.bufferMemory, wbBuffer.size);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static std::map<std::uint64_t, AreaCache, std::greater<>> areaCaches;
|
2023-07-24 05:03:55 +02:00
|
|
|
static std::mutex areaMutex;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static AreaCache &getArea(std::uint64_t address, std::size_t size) {
|
2023-07-24 05:03:55 +02:00
|
|
|
std::lock_guard lock(areaMutex);
|
2023-07-17 14:35:53 +02:00
|
|
|
auto it = areaCaches.lower_bound(address);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if (it == areaCaches.end() ||
|
|
|
|
|
address >= it->second.areaAddress + it->second.areaSize ||
|
|
|
|
|
it->second.areaAddress >= address + size) {
|
|
|
|
|
auto area = memoryAreaTable.queryArea(address / kPageSize);
|
|
|
|
|
area.beginAddress *= kPageSize;
|
|
|
|
|
area.endAddress *= kPageSize;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
assert(address >= area.beginAddress && address + size < area.endAddress);
|
|
|
|
|
it = areaCaches.emplace_hint(
|
|
|
|
|
it, std::piecewise_construct, std::tuple{area.beginAddress},
|
|
|
|
|
std::tuple{area.beginAddress, area.endAddress});
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return it->second;
|
|
|
|
|
}
|
2023-07-17 14:35:53 +02:00
|
|
|
static void updateCacheAreasState() {
|
|
|
|
|
auto areas = std::exchange(memoryAreaTable.invalidated, {});
|
|
|
|
|
auto it = areaCaches.begin();
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if (it == areaCaches.end()) {
|
2023-06-24 14:59:27 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
for (auto area : areas) {
|
|
|
|
|
while (it->first > area) {
|
|
|
|
|
if (++it == areaCaches.end()) {
|
2023-06-24 14:59:27 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if (it->first == area) {
|
|
|
|
|
it = areaCaches.erase(it);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
if (it == areaCaches.end()) {
|
2023-06-24 14:59:27 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-07-24 05:03:55 +02:00
|
|
|
*/
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static void submitToQueue(VkQueue queue, VkCommandBuffer cmdBuffer,
|
|
|
|
|
vk::Semaphore &sem, std::uint64_t signalValue) {
|
|
|
|
|
VkTimelineSemaphoreSubmitInfo timelineSubmitInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
|
|
|
|
|
.signalSemaphoreValueCount = 1,
|
|
|
|
|
.pSignalSemaphoreValues = &signalValue,
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
VkSubmitInfo submitInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
|
|
|
|
.pNext = &timelineSubmitInfo,
|
|
|
|
|
.commandBufferCount = 1,
|
|
|
|
|
.pCommandBuffers = &cmdBuffer,
|
|
|
|
|
.signalSemaphoreCount = 1,
|
|
|
|
|
.pSignalSemaphores = &sem.mSemaphore,
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
Verify() << vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static void submitToQueue(VkQueue queue, VkCommandBuffer cmdBuffer,
|
|
|
|
|
vk::Semaphore &sem, std::uint64_t waitValue,
|
|
|
|
|
std::uint64_t signalValue,
|
|
|
|
|
VkPipelineStageFlags waitDstStage) {
|
|
|
|
|
VkTimelineSemaphoreSubmitInfo timelineSubmitInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
|
|
|
|
|
.waitSemaphoreValueCount = 1,
|
|
|
|
|
.pWaitSemaphoreValues = &waitValue,
|
|
|
|
|
.signalSemaphoreValueCount = 1,
|
|
|
|
|
.pSignalSemaphoreValues = &signalValue,
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
VkSubmitInfo submitInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
|
|
|
|
.pNext = &timelineSubmitInfo,
|
|
|
|
|
.waitSemaphoreCount = 1,
|
|
|
|
|
.pWaitSemaphores = &sem.mSemaphore,
|
|
|
|
|
.pWaitDstStageMask = &waitDstStage,
|
|
|
|
|
.commandBufferCount = 1,
|
|
|
|
|
.pCommandBuffers = &cmdBuffer,
|
|
|
|
|
.signalSemaphoreCount = 1,
|
|
|
|
|
.pSignalSemaphores = &sem.mSemaphore,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Verify() << vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
static void
|
|
|
|
|
fillStageBindings(std::vector<VkDescriptorSetLayoutBinding> &bindings,
|
|
|
|
|
shader::Stage stage) {
|
|
|
|
|
for (std::size_t i = 0; i < shader::UniformBindings::kBufferSlots; ++i) {
|
|
|
|
|
auto binding = shader::UniformBindings::getBufferBinding(stage, i);
|
|
|
|
|
bindings[binding] = VkDescriptorSetLayoutBinding{
|
|
|
|
|
.binding = binding,
|
|
|
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
|
|
|
.descriptorCount = 1,
|
|
|
|
|
.stageFlags = shaderStageToVk(stage),
|
|
|
|
|
.pImmutableSamplers = nullptr};
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
for (std::size_t i = 0; i < shader::UniformBindings::kImageSlots; ++i) {
|
|
|
|
|
auto binding = shader::UniformBindings::getImageBinding(stage, i);
|
|
|
|
|
bindings[binding] = VkDescriptorSetLayoutBinding{
|
|
|
|
|
.binding = binding,
|
|
|
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
|
|
|
|
.descriptorCount = 1,
|
|
|
|
|
.stageFlags = shaderStageToVk(stage),
|
|
|
|
|
.pImmutableSamplers = nullptr};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (std::size_t i = 0; i < shader::UniformBindings::kSamplerSlots; ++i) {
|
|
|
|
|
auto binding = shader::UniformBindings::getSamplerBinding(stage, i);
|
|
|
|
|
bindings[binding] = VkDescriptorSetLayoutBinding{
|
|
|
|
|
.binding = binding,
|
|
|
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER,
|
|
|
|
|
.descriptorCount = 1,
|
|
|
|
|
.stageFlags = shaderStageToVk(stage),
|
|
|
|
|
.pImmutableSamplers = nullptr};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
static std::pair<VkDescriptorSetLayout, VkPipelineLayout> getGraphicsLayout() {
|
|
|
|
|
static std::pair<VkDescriptorSetLayout, VkPipelineLayout> result{};
|
|
|
|
|
|
|
|
|
|
if (result.first != VK_NULL_HANDLE) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<VkDescriptorSetLayoutBinding> bindings(
|
|
|
|
|
shader::UniformBindings::kStageSize * 2);
|
|
|
|
|
|
|
|
|
|
for (auto stage : {shader::Stage::Vertex, shader::Stage::Fragment}) {
|
|
|
|
|
fillStageBindings(bindings, stage);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VkDescriptorSetLayoutCreateInfo descLayoutInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
|
|
|
|
.bindingCount = static_cast<uint32_t>(bindings.size()),
|
|
|
|
|
.pBindings = bindings.data(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Verify() << vkCreateDescriptorSetLayout(vk::g_vkDevice, &descLayoutInfo,
|
|
|
|
|
vk::g_vkAllocator, &result.first);
|
|
|
|
|
|
|
|
|
|
VkPipelineLayoutCreateInfo piplineLayoutInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
|
|
|
.setLayoutCount = 1,
|
|
|
|
|
.pSetLayouts = &result.first,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Verify() << vkCreatePipelineLayout(vk::g_vkDevice, &piplineLayoutInfo,
|
|
|
|
|
vk::g_vkAllocator, &result.second);
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static std::pair<VkDescriptorSetLayout, VkPipelineLayout> getComputeLayout() {
|
|
|
|
|
static std::pair<VkDescriptorSetLayout, VkPipelineLayout> result{};
|
|
|
|
|
|
|
|
|
|
if (result.first != VK_NULL_HANDLE) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<VkDescriptorSetLayoutBinding> bindings(
|
|
|
|
|
shader::UniformBindings::kStageSize);
|
|
|
|
|
|
|
|
|
|
fillStageBindings(bindings, shader::Stage::Compute);
|
|
|
|
|
|
|
|
|
|
VkDescriptorSetLayoutCreateInfo layoutInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
|
|
|
|
.bindingCount = static_cast<uint32_t>(bindings.size()),
|
|
|
|
|
.pBindings = bindings.data(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Verify() << vkCreateDescriptorSetLayout(vk::g_vkDevice, &layoutInfo, nullptr,
|
|
|
|
|
&result.first);
|
|
|
|
|
|
|
|
|
|
VkPipelineLayoutCreateInfo piplineLayoutInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
|
|
|
.setLayoutCount = 1,
|
|
|
|
|
.pSetLayouts = &result.first,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Verify() << vkCreatePipelineLayout(vk::g_vkDevice, &piplineLayoutInfo,
|
|
|
|
|
vk::g_vkAllocator, &result.second);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct Cache {
|
|
|
|
|
struct ShaderKey {
|
|
|
|
|
std::uint64_t address;
|
|
|
|
|
std::uint16_t dimX;
|
|
|
|
|
std::uint16_t dimY;
|
|
|
|
|
std::uint16_t dimZ;
|
|
|
|
|
shader::Stage stage;
|
|
|
|
|
std::uint8_t userSgprCount;
|
|
|
|
|
std::uint32_t userSgprs[16];
|
|
|
|
|
|
|
|
|
|
auto operator<=>(const ShaderKey &other) const {
|
|
|
|
|
auto result = address <=> other.address;
|
|
|
|
|
if (result != std::strong_ordering::equal) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = dimX <=> other.dimX;
|
|
|
|
|
if (result != std::strong_ordering::equal) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = dimY <=> other.dimY;
|
|
|
|
|
if (result != std::strong_ordering::equal) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = dimZ <=> other.dimZ;
|
|
|
|
|
if (result != std::strong_ordering::equal) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = stage <=> other.stage;
|
|
|
|
|
if (result != std::strong_ordering::equal) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = userSgprCount <=> other.userSgprCount;
|
|
|
|
|
if (result != std::strong_ordering::equal) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (std::size_t i = 0; i < std::size(userSgprs); ++i) {
|
|
|
|
|
if (i >= userSgprCount) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = userSgprs[i] <=> other.userSgprs[i];
|
|
|
|
|
if (result != std::strong_ordering::equal) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
2023-07-24 05:03:55 +02:00
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
struct CachedShader {
|
|
|
|
|
std::map<std::uint64_t, std::vector<char>> cachedData;
|
|
|
|
|
shader::Shader info;
|
|
|
|
|
VkShaderEXT shader;
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
struct ImageKey {
|
|
|
|
|
std::uint64_t address;
|
|
|
|
|
SurfaceFormat dataFormat;
|
|
|
|
|
TextureChannelType channelType;
|
|
|
|
|
TileMode tileMode;
|
|
|
|
|
std::uint32_t width;
|
|
|
|
|
std::uint32_t height;
|
|
|
|
|
std::uint32_t depth;
|
|
|
|
|
std::uint32_t pitch;
|
|
|
|
|
|
|
|
|
|
auto operator<=>(const ImageKey &other) const = default;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct CachedImageView {
|
|
|
|
|
VkImageView view;
|
|
|
|
|
VkFormat format;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct CachedImage {
|
|
|
|
|
vk::Image2D image;
|
|
|
|
|
std::vector<CachedImageView> views;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct BufferKey {
|
|
|
|
|
std::uint64_t address;
|
|
|
|
|
std::uint64_t size;
|
|
|
|
|
|
|
|
|
|
auto operator<=>(const BufferKey &other) const {
|
|
|
|
|
auto result = other.address <=> address;
|
|
|
|
|
|
|
|
|
|
if (result != std::strong_ordering::equal) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return size <=> other.size;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
2023-07-24 05:03:55 +02:00
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
struct CachedBuffer {
|
|
|
|
|
vk::Buffer buffer;
|
|
|
|
|
BufferRef ref;
|
|
|
|
|
void *data;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
std::mutex mtx;
|
|
|
|
|
std::map<ShaderKey, std::forward_list<CachedShader>> shaders;
|
|
|
|
|
std::map<ImageKey, CachedImage> images;
|
|
|
|
|
std::map<BufferKey, CachedBuffer> buffers;
|
|
|
|
|
VkSampler sampler{};
|
|
|
|
|
vk::Semaphore semaphore;
|
|
|
|
|
|
|
|
|
|
struct WriteBuffer {
|
|
|
|
|
std::uint64_t size;
|
|
|
|
|
Ref<AsyncTaskCtl> taskCtl;
|
|
|
|
|
CachedBuffer *bufferPtr;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
std::map<std::uint64_t, WriteBuffer> writeBuffers;
|
|
|
|
|
|
|
|
|
|
// TODO: use descriptor buffer instead
|
|
|
|
|
VkDescriptorPool graphicsDescriptorPool{};
|
|
|
|
|
VkDescriptorPool computeDescriptorPool{};
|
|
|
|
|
std::vector<VkDescriptorSet> graphicsDecsriptorSets;
|
|
|
|
|
std::vector<VkDescriptorSet> computeDecsriptorSets;
|
|
|
|
|
|
|
|
|
|
vk::Semaphore &getSemaphore() {
|
|
|
|
|
if (semaphore.mSemaphore == VK_NULL_HANDLE) {
|
|
|
|
|
semaphore = vk::Semaphore::Create();
|
|
|
|
|
}
|
|
|
|
|
return semaphore;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
VkDescriptorSet getComputeDescriptorSet() {
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
if (computeDescriptorPool == nullptr) {
|
|
|
|
|
VkDescriptorPoolSize poolSizes[]{
|
|
|
|
|
{
|
|
|
|
|
.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
|
|
|
|
.descriptorCount = shader::UniformBindings::kImageSlots,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.type = VK_DESCRIPTOR_TYPE_SAMPLER,
|
|
|
|
|
.descriptorCount = shader::UniformBindings::kSamplerSlots,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
|
|
|
.descriptorCount = shader::UniformBindings::kBufferSlots,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
VkDescriptorPoolCreateInfo info{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
|
|
|
|
.maxSets = 32,
|
|
|
|
|
.poolSizeCount = static_cast<uint32_t>(std::size(poolSizes)),
|
|
|
|
|
.pPoolSizes = poolSizes,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Verify() << vkCreateDescriptorPool(
|
|
|
|
|
vk::g_vkDevice, &info, vk::g_vkAllocator, &graphicsDescriptorPool);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!computeDecsriptorSets.empty()) {
|
|
|
|
|
auto result = computeDecsriptorSets.back();
|
|
|
|
|
computeDecsriptorSets.pop_back();
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto layout = getComputeLayout().first;
|
|
|
|
|
|
|
|
|
|
VkDescriptorSetAllocateInfo info{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
|
|
|
|
.descriptorPool = graphicsDescriptorPool,
|
|
|
|
|
.descriptorSetCount = 1,
|
|
|
|
|
.pSetLayouts = &layout,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
VkDescriptorSet result;
|
|
|
|
|
Verify() << vkAllocateDescriptorSets(vk::g_vkDevice, &info, &result);
|
|
|
|
|
return result;
|
2023-06-25 15:54:49 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
VkDescriptorSet getGraphicsDescriptorSet() {
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
if (graphicsDescriptorPool == nullptr) {
|
|
|
|
|
VkDescriptorPoolSize poolSizes[]{
|
|
|
|
|
{
|
|
|
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
|
|
|
.descriptorCount = shader::UniformBindings::kBufferSlots * 2,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
|
|
|
|
.descriptorCount = shader::UniformBindings::kImageSlots * 2,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.type = VK_DESCRIPTOR_TYPE_SAMPLER,
|
|
|
|
|
.descriptorCount = shader::UniformBindings::kSamplerSlots * 2,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
VkDescriptorPoolCreateInfo info{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
|
|
|
|
.maxSets = 32,
|
|
|
|
|
.poolSizeCount = static_cast<uint32_t>(std::size(poolSizes)),
|
|
|
|
|
.pPoolSizes = poolSizes,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Verify() << vkCreateDescriptorPool(
|
|
|
|
|
vk::g_vkDevice, &info, vk::g_vkAllocator, &graphicsDescriptorPool);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!graphicsDecsriptorSets.empty()) {
|
|
|
|
|
auto result = graphicsDecsriptorSets.back();
|
|
|
|
|
graphicsDecsriptorSets.pop_back();
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto layout = getGraphicsLayout().first;
|
|
|
|
|
|
|
|
|
|
VkDescriptorSetAllocateInfo info{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
|
|
|
|
.descriptorPool = graphicsDescriptorPool,
|
|
|
|
|
.descriptorSetCount = 1,
|
|
|
|
|
.pSetLayouts = &layout,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
VkDescriptorSet result;
|
|
|
|
|
Verify() << vkAllocateDescriptorSets(vk::g_vkDevice, &info, &result);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void releaseGraphicsDescriptorSet(VkDescriptorSet descSet) {
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
graphicsDecsriptorSets.push_back(descSet);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void releaseComputeDescriptorSet(VkDescriptorSet descSet) {
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
computeDecsriptorSets.push_back(descSet);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const CachedShader &getShader(TaskSet &taskSet,
|
|
|
|
|
VkDescriptorSetLayout descriptorSetLayout,
|
|
|
|
|
shader::Stage stage, std::uint64_t address,
|
|
|
|
|
std::uint32_t *userSgprs,
|
|
|
|
|
std::uint8_t userSgprsCount,
|
|
|
|
|
std::uint16_t dimX = 1, std::uint16_t dimY = 1,
|
|
|
|
|
std::uint16_t dimZ = 1) {
|
|
|
|
|
ShaderKey key{.address = address,
|
|
|
|
|
.dimX = dimX,
|
|
|
|
|
.dimY = dimY,
|
|
|
|
|
.dimZ = dimZ,
|
|
|
|
|
.stage = stage,
|
|
|
|
|
.userSgprCount = userSgprsCount};
|
|
|
|
|
|
|
|
|
|
std::memcpy(key.userSgprs, userSgprs,
|
|
|
|
|
userSgprsCount * sizeof(std::uint32_t));
|
|
|
|
|
|
|
|
|
|
decltype(shaders)::iterator it;
|
|
|
|
|
CachedShader *entry;
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
|
|
|
|
|
auto [emplacedIt, inserted] =
|
|
|
|
|
shaders.try_emplace(key, std::forward_list<CachedShader>{});
|
|
|
|
|
|
|
|
|
|
if (!inserted) {
|
|
|
|
|
for (auto &shader : emplacedIt->second) {
|
|
|
|
|
bool isAllSame = true;
|
|
|
|
|
for (auto &[startAddress, bytes] : shader.cachedData) {
|
|
|
|
|
if (std::memcmp(g_hostMemory.getPointer(startAddress), bytes.data(),
|
|
|
|
|
bytes.size()) != 0) {
|
|
|
|
|
isAllSame = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isAllSame) {
|
|
|
|
|
return shader;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::printf("cache: found shader with different data, recompiling\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
it = emplacedIt;
|
|
|
|
|
entry = &it->second.emplace_front();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
g_scheduler.enqueue(taskSet, [=](const AsyncTaskCtl &) {
|
|
|
|
|
util::MemoryAreaTable<> dependencies;
|
|
|
|
|
auto info = shader::convert(
|
|
|
|
|
g_hostMemory, stage, address,
|
|
|
|
|
std::span<const std::uint32_t>(userSgprs, userSgprsCount), dimX, dimY,
|
|
|
|
|
dimZ, dependencies);
|
|
|
|
|
|
|
|
|
|
if (!validateSpirv(info.spirv)) {
|
|
|
|
|
printSpirv(info.spirv);
|
|
|
|
|
dumpShader(g_hostMemory.getPointer<std::uint32_t>(address));
|
|
|
|
|
util::unreachable();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// printSpirv(info.spirv);
|
|
|
|
|
|
|
|
|
|
// for (auto [startAddress, endAddress] : dependencies) {
|
|
|
|
|
// auto ptr = g_hostMemory.getPointer(startAddress);
|
|
|
|
|
// auto &target = entry->cachedData[startAddress];
|
|
|
|
|
// target.resize(endAddress - startAddress);
|
|
|
|
|
|
|
|
|
|
// std::printf("shader dependency %lx-%lx\n", startAddress, endAddress);
|
|
|
|
|
// std::memcpy(target.data(), ptr, target.size());
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
VkShaderCreateInfoEXT createInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT,
|
|
|
|
|
.flags = 0,
|
|
|
|
|
.stage = shaderStageToVk(stage),
|
|
|
|
|
.nextStage = 0,
|
|
|
|
|
.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT,
|
|
|
|
|
.codeSize = info.spirv.size() * sizeof(info.spirv[0]),
|
|
|
|
|
.pCode = info.spirv.data(),
|
|
|
|
|
.pName = "main",
|
|
|
|
|
.setLayoutCount = 1,
|
|
|
|
|
.pSetLayouts = &descriptorSetLayout};
|
|
|
|
|
|
|
|
|
|
VkShaderEXT shader;
|
|
|
|
|
Verify() << _vkCreateShadersEXT(vk::g_vkDevice, 1, &createInfo,
|
|
|
|
|
vk::g_vkAllocator, &shader);
|
|
|
|
|
entry->info = std::move(info);
|
|
|
|
|
entry->shader = shader;
|
|
|
|
|
return true;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return *entry;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BufferRef getBuffer(TaskSet &readTaskSet, TaskSet &writeTaskSet,
|
|
|
|
|
std::uint64_t address, std::uint64_t size,
|
|
|
|
|
shader::AccessOp access) {
|
|
|
|
|
auto [buffer, ref, data] =
|
|
|
|
|
getBufferInternal(readTaskSet, address, size, access);
|
|
|
|
|
|
|
|
|
|
if ((access & shader::AccessOp::Load) == shader::AccessOp::Load) {
|
|
|
|
|
g_scheduler.enqueue(readTaskSet, [=](const AsyncTaskCtl &) {
|
|
|
|
|
std::memcpy(data, g_hostMemory.getPointer(address), size);
|
|
|
|
|
return true;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) {
|
|
|
|
|
auto writeBackTask = createTask([=](const AsyncTaskCtl &ctl) {
|
|
|
|
|
if (ctl.isCanceled()) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::memcpy(g_hostMemory.getPointer(address), data, size);
|
|
|
|
|
return true;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
writeTaskSet.append(std::move(writeBackTask));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ref;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VkImage getImage(TaskSet &readTaskSet, VkCommandBuffer readCommandBuffer,
|
|
|
|
|
std::uint64_t address, SurfaceFormat dataFormat,
|
|
|
|
|
TextureChannelType channelType, TileMode tileMode,
|
|
|
|
|
std::uint32_t width, std::uint32_t height,
|
|
|
|
|
std::uint32_t pitch, VkImageLayout layout) {
|
|
|
|
|
auto &result = getImageInternal(
|
|
|
|
|
readTaskSet, readCommandBuffer, address, dataFormat, channelType,
|
|
|
|
|
tileMode, width, height, 1, pitch, shader::AccessOp::Load, true);
|
|
|
|
|
vk::ImageRef(result.image).transitionLayout(readCommandBuffer, layout);
|
|
|
|
|
return result.image.getHandle();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VkSampler getSampler() {
|
|
|
|
|
if (sampler != VK_NULL_HANDLE) {
|
|
|
|
|
return sampler;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
VkSamplerCreateInfo samplerInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
|
|
|
|
.magFilter = VK_FILTER_LINEAR,
|
|
|
|
|
.minFilter = VK_FILTER_LINEAR,
|
|
|
|
|
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
|
|
|
|
|
.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
|
|
|
|
|
.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
|
|
|
|
|
.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
|
|
|
|
|
.mipLodBias = 0.0f,
|
|
|
|
|
.anisotropyEnable = VK_FALSE,
|
|
|
|
|
.maxAnisotropy = 1.0,
|
|
|
|
|
.compareOp = VK_COMPARE_OP_NEVER,
|
|
|
|
|
.minLod = 0.0f,
|
|
|
|
|
.maxLod = 0.0f,
|
|
|
|
|
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Verify() << vkCreateSampler(vk::g_vkDevice, &samplerInfo, nullptr,
|
|
|
|
|
&sampler);
|
|
|
|
|
return sampler;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VkImageView getImageView(TaskSet &readTaskSet, TaskSet &writeTaskSet,
|
|
|
|
|
VkCommandBuffer readCommandBuffer,
|
|
|
|
|
std::uint64_t address, SurfaceFormat dataFormat,
|
|
|
|
|
TextureChannelType channelType, TileMode tileMode,
|
|
|
|
|
std::uint32_t width, std::uint32_t height,
|
|
|
|
|
std::uint32_t depth, std::uint32_t pitch,
|
|
|
|
|
shader::AccessOp access, VkImageLayout layout,
|
|
|
|
|
bool isColor = true) {
|
|
|
|
|
auto &result = getImageInternal(readTaskSet, readCommandBuffer, address,
|
|
|
|
|
dataFormat, channelType, tileMode, width,
|
|
|
|
|
height, depth, pitch, access, isColor);
|
|
|
|
|
if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) {
|
|
|
|
|
// TODO
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto colorFormat = surfaceFormatToVkFormat(dataFormat, channelType);
|
|
|
|
|
vk::ImageRef(result.image).transitionLayout(readCommandBuffer, layout);
|
|
|
|
|
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
|
|
|
|
|
for (auto view : result.views) {
|
|
|
|
|
if (view.format == colorFormat) {
|
|
|
|
|
return view.view;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VkImageViewCreateInfo viewInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
|
|
|
|
.image = result.image.getHandle(),
|
|
|
|
|
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
|
|
|
|
.format = colorFormat,
|
|
|
|
|
.components = {}, // TODO
|
|
|
|
|
.subresourceRange =
|
|
|
|
|
{
|
|
|
|
|
.aspectMask = static_cast<VkImageAspectFlags>(
|
|
|
|
|
isColor ? VK_IMAGE_ASPECT_COLOR_BIT
|
|
|
|
|
: VK_IMAGE_ASPECT_DEPTH_BIT |
|
|
|
|
|
VK_IMAGE_ASPECT_STENCIL_BIT),
|
|
|
|
|
.baseMipLevel = 0, // TODO
|
|
|
|
|
.levelCount = 1,
|
|
|
|
|
.baseArrayLayer = 0,
|
|
|
|
|
.layerCount = 1,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
VkImageView imageView;
|
|
|
|
|
Verify() << vkCreateImageView(vk::g_vkDevice, &viewInfo, nullptr,
|
|
|
|
|
&imageView);
|
|
|
|
|
|
|
|
|
|
result.views.push_back({.view = imageView, .format = colorFormat});
|
|
|
|
|
return imageView;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
std::tuple<vk::Buffer *, BufferRef, void *>
|
|
|
|
|
getBufferInternal(TaskSet &readTaskSet, std::uint64_t address,
|
|
|
|
|
std::uint64_t size, shader::AccessOp access) {
|
|
|
|
|
BufferKey key{.address = address, .size = size};
|
|
|
|
|
|
|
|
|
|
decltype(buffers)::iterator it;
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
it = buffers.lower_bound(key);
|
|
|
|
|
|
|
|
|
|
auto isCompatiableKey = [&] {
|
|
|
|
|
if (it == buffers.end()) {
|
|
|
|
|
// std::printf(
|
|
|
|
|
// "Buffer cache miss: address: %lx, end address: %lx: not
|
|
|
|
|
// found\n", address, address + size);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (it->first.address + it->first.size < key.address + key.size) {
|
|
|
|
|
// std::printf("Buffer cache miss: address: %lx, end address: %lx: "
|
|
|
|
|
// "exists smaller %lx-%lx\n",
|
|
|
|
|
// address, address + size, it->first.address,
|
|
|
|
|
// it->first.address + it->first.size);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!isAligned(key.address - it->first.address,
|
|
|
|
|
vk::g_physicalDeviceProperties.limits
|
|
|
|
|
.minStorageBufferOffsetAlignment)) {
|
|
|
|
|
// std::printf("Buffer cache miss: address: %lx, end address: %lx: "
|
|
|
|
|
// "incompatiable offset %lx-%lx\n",
|
|
|
|
|
// address, address + size, it->first.address,
|
|
|
|
|
// it->first.address + it->first.size);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (!isCompatiableKey()) {
|
|
|
|
|
it = buffers.emplace_hint(it, key, CachedBuffer{});
|
|
|
|
|
} else {
|
|
|
|
|
assert(it->first.address <= address);
|
|
|
|
|
assert(it->first.address + it->first.size >= address + size);
|
|
|
|
|
|
|
|
|
|
// if (it->first.address != address || it->first.size != size) {
|
|
|
|
|
// std::printf("cache: reusing buffer %lx-%lx for %lx-%lx\n",
|
|
|
|
|
// it->first.address, it->first.address + it->first.size,
|
|
|
|
|
// address, address + size);
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
BufferRef ref{.buffer = it->second.buffer.getHandle(),
|
|
|
|
|
.offset =
|
|
|
|
|
it->second.ref.offset + (address - it->first.address),
|
|
|
|
|
.size = size};
|
|
|
|
|
|
|
|
|
|
auto data = reinterpret_cast<char *>(it->second.data) + ref.offset;
|
|
|
|
|
return {&it->second.buffer, ref, data};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto usage =
|
|
|
|
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
|
|
|
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
|
|
|
|
auto buffer = vk::Buffer::Allocate(getHostVisibleMemory(), size, usage);
|
|
|
|
|
|
|
|
|
|
it->second.buffer = std::move(buffer);
|
|
|
|
|
it->second.ref = {
|
|
|
|
|
.buffer = it->second.buffer.getHandle(), .offset = 0, .size = size};
|
|
|
|
|
it->second.data = reinterpret_cast<char *>(it->second.buffer.getData());
|
|
|
|
|
|
|
|
|
|
if ((access & shader::AccessOp::Load) == shader::AccessOp::Load) {
|
|
|
|
|
g_scheduler.enqueue(
|
|
|
|
|
readTaskSet, [=, data = it->second.data](const AsyncTaskCtl &) {
|
|
|
|
|
std::memcpy(data, g_hostMemory.getPointer(address), size);
|
|
|
|
|
return true;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {&it->second.buffer, it->second.ref, it->second.data};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CachedImage &getImageInternal(TaskSet &readTaskSet,
|
|
|
|
|
VkCommandBuffer readCommandBuffer,
|
|
|
|
|
std::uint64_t address, SurfaceFormat dataFormat,
|
|
|
|
|
TextureChannelType channelType,
|
|
|
|
|
TileMode tileMode, std::uint32_t width,
|
|
|
|
|
std::uint32_t height, std::uint32_t depth,
|
|
|
|
|
std::uint32_t pitch, shader::AccessOp access,
|
|
|
|
|
bool isColor = true) {
|
|
|
|
|
ImageKey key{
|
|
|
|
|
.address = address,
|
|
|
|
|
.dataFormat = dataFormat,
|
|
|
|
|
.channelType = channelType,
|
|
|
|
|
.tileMode = tileMode,
|
|
|
|
|
.width = width,
|
|
|
|
|
.height = height,
|
|
|
|
|
.depth = depth,
|
|
|
|
|
.pitch = pitch,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
decltype(images)::iterator it;
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
|
|
|
|
|
auto [emplacedIt, inserted] = images.try_emplace(key, CachedImage{});
|
|
|
|
|
|
|
|
|
|
if (!inserted) {
|
|
|
|
|
return emplacedIt->second;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
it = emplacedIt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::printf(
|
|
|
|
|
"Image cache miss: address: %lx, dataFormat: %u, channelType: %u, "
|
|
|
|
|
"tileMode: %u, width: %u, height: %u, depth: %u, pitch: %u\n",
|
|
|
|
|
address, dataFormat, channelType, tileMode, width, height, depth,
|
|
|
|
|
pitch);
|
|
|
|
|
|
|
|
|
|
auto colorFormat = surfaceFormatToVkFormat(dataFormat, channelType);
|
|
|
|
|
auto usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
|
|
|
|
VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
|
|
|
|
|
|
|
|
|
bool isCompressed =
|
|
|
|
|
dataFormat == kSurfaceFormatBc1 || dataFormat == kSurfaceFormatBc2 ||
|
|
|
|
|
dataFormat == kSurfaceFormatBc3 || dataFormat == kSurfaceFormatBc4 ||
|
|
|
|
|
dataFormat == kSurfaceFormatBc5 || dataFormat == kSurfaceFormatBc6 ||
|
|
|
|
|
dataFormat == kSurfaceFormatBc7;
|
|
|
|
|
if (!isCompressed) {
|
|
|
|
|
if (isColor) {
|
|
|
|
|
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
|
|
|
|
} else {
|
|
|
|
|
usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto imageHandle = vk::Image2D::Allocate(getDeviceLocalMemory(), width,
|
|
|
|
|
height, colorFormat, usage);
|
|
|
|
|
|
|
|
|
|
if ((access & shader::AccessOp::Load) == shader::AccessOp::Load) {
|
|
|
|
|
auto bpp = getBitWidthOfSurfaceFormat(dataFormat) / 8;
|
|
|
|
|
|
|
|
|
|
/*if (dataFormat == kSurfaceFormatBc1) {
|
|
|
|
|
width = (width + 7) / 8;
|
|
|
|
|
height = (height + 7) / 8;
|
|
|
|
|
pitch = (pitch + 7) / 8;
|
|
|
|
|
bpp = 8;
|
|
|
|
|
} else */
|
|
|
|
|
if (dataFormat == kSurfaceFormatBc1 || dataFormat == kSurfaceFormatBc2 ||
|
|
|
|
|
dataFormat == kSurfaceFormatBc3 || dataFormat == kSurfaceFormatBc4 ||
|
|
|
|
|
dataFormat == kSurfaceFormatBc5 || dataFormat == kSurfaceFormatBc6 ||
|
|
|
|
|
dataFormat == kSurfaceFormatBc7) {
|
|
|
|
|
width = (width + 3) / 4;
|
|
|
|
|
height = (height + 3) / 4;
|
|
|
|
|
pitch = (pitch + 3) / 4;
|
|
|
|
|
bpp = 16;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto image = vk::ImageRef(imageHandle);
|
|
|
|
|
auto memSize = image.getMemoryRequirements().size;
|
|
|
|
|
auto [buffer, ref, data] = getBufferInternal(
|
|
|
|
|
readTaskSet, address, memSize, shader::AccessOp::None);
|
|
|
|
|
|
|
|
|
|
g_scheduler.enqueue(readTaskSet, [=](const AsyncTaskCtl &) {
|
|
|
|
|
buffer->readFromImage(g_hostMemory.getPointer(address), bpp, tileMode,
|
|
|
|
|
width, height, 1, pitch);
|
|
|
|
|
return true;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
image.readFromBuffer(readCommandBuffer, ref.buffer,
|
|
|
|
|
isColor ? VK_IMAGE_ASPECT_COLOR_BIT
|
|
|
|
|
: VK_IMAGE_ASPECT_DEPTH_BIT,
|
|
|
|
|
ref.offset);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
it->second.image = std::move(imageHandle);
|
|
|
|
|
return it->second;
|
|
|
|
|
}
|
|
|
|
|
} g_cache;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
static VkShaderEXT getPrimTypeRectGeomShader() {
|
|
|
|
|
static VkShaderEXT shader = VK_NULL_HANDLE;
|
|
|
|
|
if (shader != VK_NULL_HANDLE) {
|
|
|
|
|
return shader;
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto layout = getGraphicsLayout().first;
|
|
|
|
|
VkShaderCreateInfoEXT createInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT,
|
|
|
|
|
.flags = 0,
|
|
|
|
|
.stage = VK_SHADER_STAGE_GEOMETRY_BIT,
|
|
|
|
|
.nextStage = 0,
|
|
|
|
|
.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT,
|
|
|
|
|
.codeSize = sizeof(spirv_rect_list_geom),
|
|
|
|
|
.pCode = spirv_rect_list_geom,
|
|
|
|
|
.pName = "main",
|
|
|
|
|
.setLayoutCount = 1,
|
|
|
|
|
.pSetLayouts = &layout,
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
Verify() << _vkCreateShadersEXT(vk::g_vkDevice, 1, &createInfo,
|
|
|
|
|
vk::g_vkAllocator, &shader);
|
|
|
|
|
return shader;
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
struct RenderState {
|
|
|
|
|
DrawContext &ctxt;
|
|
|
|
|
QueueRegisters ®s;
|
|
|
|
|
|
|
|
|
|
void loadShaderBindings(TaskSet &readTaskSet, TaskSet &storeTaskSet,
|
|
|
|
|
VkDescriptorSet descSet, const shader::Shader &shader,
|
|
|
|
|
VkCommandBuffer cmdBuffer, bool isCompute = false) {
|
2023-06-24 14:59:27 +02:00
|
|
|
for (auto &uniform : shader.uniforms) {
|
|
|
|
|
switch (uniform.kind) {
|
|
|
|
|
case shader::Shader::UniformKind::Buffer: {
|
2023-07-24 05:03:55 +02:00
|
|
|
auto &vbuffer = *reinterpret_cast<const GnmVBuffer *>(uniform.buffer);
|
|
|
|
|
auto size = vbuffer.getSize();
|
2023-06-24 14:59:27 +02:00
|
|
|
if (size == 0) {
|
|
|
|
|
size = 0x10;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
if (isCompute) {
|
|
|
|
|
std::printf("compute buffer %lx-%lx\n", vbuffer.getAddress(),
|
|
|
|
|
vbuffer.getAddress() + vbuffer.getSize());
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
auto storageBuffer =
|
2023-07-24 05:03:55 +02:00
|
|
|
g_cache.getBuffer(readTaskSet, storeTaskSet, vbuffer.getAddress(),
|
|
|
|
|
size, uniform.accessOp);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
VkDescriptorBufferInfo bufferInfo{
|
|
|
|
|
.buffer = storageBuffer.buffer,
|
|
|
|
|
.offset = storageBuffer.offset,
|
|
|
|
|
.range = size,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
VkWriteDescriptorSet writeDescSet{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
|
|
|
.dstSet = descSet,
|
|
|
|
|
.dstBinding = uniform.binding,
|
|
|
|
|
.descriptorCount = 1,
|
|
|
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
|
|
|
.pBufferInfo = &bufferInfo,
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
vkUpdateDescriptorSets(vk::g_vkDevice, 1, &writeDescSet, 0, nullptr);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case shader::Shader::UniformKind::Image: {
|
2023-07-24 05:03:55 +02:00
|
|
|
auto &tbuffer = *reinterpret_cast<const GnmTBuffer *>(uniform.buffer);
|
|
|
|
|
auto dataFormat = tbuffer.dfmt;
|
|
|
|
|
auto channelType = tbuffer.nfmt;
|
2023-07-17 14:35:53 +02:00
|
|
|
|
2023-07-14 03:33:45 +02:00
|
|
|
// assert(tbuffer->width == tbuffer->pitch);
|
2023-07-24 05:03:55 +02:00
|
|
|
std::size_t width = tbuffer.width + 1;
|
|
|
|
|
std::size_t height = tbuffer.height + 1;
|
|
|
|
|
std::size_t depth = tbuffer.depth + 1;
|
|
|
|
|
std::size_t pitch = tbuffer.pitch + 1;
|
|
|
|
|
auto tileMode = (TileMode)tbuffer.tiling_idx;
|
|
|
|
|
|
|
|
|
|
auto imageView = g_cache.getImageView(
|
|
|
|
|
readTaskSet, storeTaskSet, cmdBuffer, tbuffer.getAddress(),
|
|
|
|
|
dataFormat, channelType, tileMode, width, height, depth, pitch,
|
|
|
|
|
uniform.accessOp, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
|
|
|
|
|
|
|
|
|
VkDescriptorImageInfo imageInfo{
|
|
|
|
|
.imageView = imageView,
|
|
|
|
|
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
|
|
|
|
};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
VkWriteDescriptorSet writeDescSet{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
|
|
|
.dstSet = descSet,
|
|
|
|
|
.dstBinding = uniform.binding,
|
|
|
|
|
.descriptorCount = 1,
|
|
|
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
|
|
|
|
.pImageInfo = &imageInfo,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
vkUpdateDescriptorSets(vk::g_vkDevice, 1, &writeDescSet, 0, nullptr);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case shader::Shader::UniformKind::Sampler: {
|
|
|
|
|
// TODO: load S# sampler
|
2023-07-24 05:03:55 +02:00
|
|
|
auto sampler = g_cache.getSampler();
|
|
|
|
|
|
|
|
|
|
VkDescriptorImageInfo imageInfo{
|
|
|
|
|
.sampler = sampler,
|
|
|
|
|
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
VkWriteDescriptorSet writeDescSet{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
|
|
|
.dstSet = descSet,
|
|
|
|
|
.dstBinding = uniform.binding,
|
|
|
|
|
.descriptorCount = 1,
|
|
|
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER,
|
|
|
|
|
.pImageInfo = &imageInfo,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
vkUpdateDescriptorSets(vk::g_vkDevice, 1, &writeDescSet, 0, nullptr);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void eliminateFastClear() {
|
|
|
|
|
// TODO
|
|
|
|
|
// util::unreachable();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void resolve() {
|
|
|
|
|
// TODO: when texture cache will be implemented it MSAA should be done by
|
|
|
|
|
// GPU
|
2023-07-19 14:16:38 +02:00
|
|
|
auto srcBuffer = regs.colorBuffers[0];
|
|
|
|
|
auto dstBuffer = regs.colorBuffers[1];
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
const auto src = g_hostMemory.getPointer(srcBuffer.base);
|
|
|
|
|
auto dst = g_hostMemory.getPointer(dstBuffer.base);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
|
|
|
|
if (src == nullptr || dst == nullptr) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
std::memcpy(dst, src, regs.screenScissorH * regs.screenScissorW * 4);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void draw(std::uint32_t count, std::uint64_t indeciesAddress,
|
|
|
|
|
std::uint32_t indexCount) {
|
2023-07-19 14:16:38 +02:00
|
|
|
if (regs.cbColorFormat == CbColorFormat::Disable) {
|
2023-06-24 14:59:27 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
if (regs.cbColorFormat == CbColorFormat::EliminateFastClear) {
|
2023-06-24 14:59:27 +02:00
|
|
|
eliminateFastClear();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
if (regs.cbColorFormat == CbColorFormat::Resolve) {
|
2023-06-24 14:59:27 +02:00
|
|
|
resolve();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
if (regs.pgmVsAddress == 0 || regs.pgmPsAddress == 0) {
|
2023-06-24 14:59:27 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
if (regs.cbRenderTargetMask == 0 || regs.colorBuffers[0].base == 0) {
|
2023-06-24 14:59:27 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// regs.depthClearEnable = true;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
TaskSet loadTaskSet, storeTaskSet;
|
|
|
|
|
auto [desriptorSetLayout, pipelineLayout] = getGraphicsLayout();
|
|
|
|
|
auto &vertexShader = g_cache.getShader(
|
|
|
|
|
loadTaskSet, desriptorSetLayout, shader::Stage::Vertex,
|
|
|
|
|
regs.pgmVsAddress, regs.userVsData, regs.vsUserSpgrs);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto &fragmentShader = g_cache.getShader(
|
|
|
|
|
loadTaskSet, desriptorSetLayout, shader::Stage::Fragment,
|
|
|
|
|
regs.pgmPsAddress, regs.userPsData, regs.psUserSpgrs);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto &sem = g_cache.getSemaphore();
|
|
|
|
|
loadTaskSet.wait();
|
2023-07-17 14:35:53 +02:00
|
|
|
|
2023-06-25 15:54:49 +02:00
|
|
|
VkCommandBufferBeginInfo beginInfo{};
|
|
|
|
|
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
|
|
|
|
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
|
|
|
|
|
|
|
|
|
VkCommandBuffer transferCommandBuffers[2];
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
VkCommandBufferAllocateInfo allocInfo{};
|
|
|
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
|
|
|
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
|
|
|
allocInfo.commandPool = ctxt.commandPool;
|
|
|
|
|
allocInfo.commandBufferCount = std::size(transferCommandBuffers);
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
Verify() << vkAllocateCommandBuffers(vk::g_vkDevice, &allocInfo,
|
2023-06-25 17:37:20 +02:00
|
|
|
transferCommandBuffers);
|
2023-06-25 15:54:49 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VkCommandBuffer readCommandBuffer = transferCommandBuffers[0];
|
|
|
|
|
Verify() << vkBeginCommandBuffer(readCommandBuffer, &beginInfo);
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
auto primType = static_cast<PrimitiveType>(regs.vgtPrimitiveType);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
std::vector<vk::Image2D> colorImages;
|
2023-07-24 05:03:55 +02:00
|
|
|
std::vector<VkRenderingAttachmentInfo> colorAttachments;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
for (auto targetMask = regs.cbRenderTargetMask;
|
|
|
|
|
auto &colorBuffer : regs.colorBuffers) {
|
2023-06-24 14:59:27 +02:00
|
|
|
if (targetMask == 0 || colorBuffer.base == 0) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((targetMask & 0xf) == 0) {
|
|
|
|
|
targetMask >>= 4;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
targetMask >>= 4;
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
shader::AccessOp access =
|
|
|
|
|
shader::AccessOp::Load | shader::AccessOp::Store;
|
2023-07-17 14:35:53 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto dataFormat = (SurfaceFormat)colorBuffer.format;
|
|
|
|
|
auto channelType = kTextureChannelTypeSrgb; // TODO
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto colorImageView = g_cache.getImageView(
|
|
|
|
|
loadTaskSet, storeTaskSet, readCommandBuffer, colorBuffer.base,
|
|
|
|
|
dataFormat, channelType, (TileMode)colorBuffer.tileModeIndex,
|
|
|
|
|
regs.screenScissorW, regs.screenScissorH, 1, regs.screenScissorW,
|
|
|
|
|
access, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
colorAttachments.push_back({
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
|
|
|
|
.imageView = colorImageView,
|
|
|
|
|
.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
2023-07-17 14:35:53 +02:00
|
|
|
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
|
|
|
|
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
|
|
|
|
});
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto descSet = g_cache.getGraphicsDescriptorSet();
|
2023-07-17 14:35:53 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
loadShaderBindings(loadTaskSet, storeTaskSet, descSet, vertexShader.info,
|
|
|
|
|
readCommandBuffer);
|
2023-07-17 14:35:53 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
loadShaderBindings(loadTaskSet, storeTaskSet, descSet, fragmentShader.info,
|
|
|
|
|
readCommandBuffer);
|
2023-07-17 14:35:53 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
shader::AccessOp depthAccess = shader::AccessOp::None;
|
2023-07-17 14:35:53 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
if (!regs.depthClearEnable && regs.zReadBase != 0) {
|
|
|
|
|
depthAccess |= shader::AccessOp::Load;
|
2023-07-17 14:35:53 +02:00
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
if (regs.depthWriteEnable && regs.zWriteBase != 0) {
|
|
|
|
|
depthAccess |= shader::AccessOp::Store;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto depthImageView = g_cache.getImageView(
|
|
|
|
|
loadTaskSet, storeTaskSet, readCommandBuffer, regs.zReadBase,
|
|
|
|
|
kSurfaceFormat24_8, kTextureChannelTypeUNorm,
|
|
|
|
|
kTileModeDisplay_LinearAligned, regs.screenScissorW,
|
|
|
|
|
regs.screenScissorH, 1, regs.screenScissorW, depthAccess,
|
|
|
|
|
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false);
|
|
|
|
|
|
|
|
|
|
VkRenderingAttachmentInfo depthAttachment{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
|
|
|
|
.imageView = depthImageView,
|
|
|
|
|
.imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
|
|
|
|
.loadOp = !regs.depthClearEnable && regs.zReadBase
|
|
|
|
|
? VK_ATTACHMENT_LOAD_OP_LOAD
|
|
|
|
|
: VK_ATTACHMENT_LOAD_OP_CLEAR,
|
|
|
|
|
.storeOp = regs.depthWriteEnable && regs.zWriteBase
|
|
|
|
|
? VK_ATTACHMENT_STORE_OP_STORE
|
|
|
|
|
: VK_ATTACHMENT_STORE_OP_DONT_CARE,
|
|
|
|
|
.clearValue = {.depthStencil = {.depth = regs.depthClear}}};
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-06-25 15:54:49 +02:00
|
|
|
VkCommandBuffer drawCommandBuffer;
|
|
|
|
|
{
|
|
|
|
|
VkCommandBufferAllocateInfo allocInfo{};
|
|
|
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
|
|
|
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
|
|
|
allocInfo.commandPool = ctxt.commandPool;
|
|
|
|
|
allocInfo.commandBufferCount = 1;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
Verify() << vkAllocateCommandBuffers(vk::g_vkDevice, &allocInfo,
|
2023-06-25 15:54:49 +02:00
|
|
|
&drawCommandBuffer);
|
|
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-06-25 15:54:49 +02:00
|
|
|
Verify() << vkBeginCommandBuffer(drawCommandBuffer, &beginInfo);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
VkRenderingInfo renderInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
|
|
|
|
|
.renderArea =
|
|
|
|
|
{
|
|
|
|
|
.offset = {.x = static_cast<int32_t>(regs.screenScissorX),
|
|
|
|
|
.y = static_cast<int32_t>(regs.screenScissorY)},
|
|
|
|
|
.extent =
|
|
|
|
|
{
|
|
|
|
|
.width = regs.screenScissorW,
|
|
|
|
|
.height = regs.screenScissorH,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
.layerCount = 1,
|
|
|
|
|
.colorAttachmentCount = static_cast<uint32_t>(colorAttachments.size()),
|
|
|
|
|
.pColorAttachments = colorAttachments.data(),
|
|
|
|
|
.pDepthAttachment = &depthAttachment,
|
|
|
|
|
.pStencilAttachment = &depthAttachment,
|
|
|
|
|
};
|
|
|
|
|
vkCmdBeginRendering(drawCommandBuffer, &renderInfo);
|
|
|
|
|
|
|
|
|
|
// std::printf("viewport: %ux%u, %ux%u\n", regs.screenScissorX,
|
|
|
|
|
// regs.screenScissorY, regs.screenScissorW,
|
|
|
|
|
// regs.screenScissorH);
|
2023-06-24 14:59:27 +02:00
|
|
|
VkViewport viewport{};
|
2023-07-19 14:16:38 +02:00
|
|
|
viewport.x = regs.screenScissorX;
|
|
|
|
|
viewport.y = (float)regs.screenScissorH - regs.screenScissorY;
|
|
|
|
|
viewport.width = regs.screenScissorW;
|
|
|
|
|
viewport.height = -(float)regs.screenScissorH;
|
2023-07-24 05:03:55 +02:00
|
|
|
viewport.minDepth = -1.0f;
|
2023-06-24 14:59:27 +02:00
|
|
|
viewport.maxDepth = 1.0f;
|
2023-06-25 15:54:49 +02:00
|
|
|
vkCmdSetViewport(drawCommandBuffer, 0, 1, &viewport);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
|
|
|
|
VkRect2D scissor{};
|
2023-07-19 14:16:38 +02:00
|
|
|
scissor.extent.width = regs.screenScissorW;
|
|
|
|
|
scissor.extent.height = regs.screenScissorH;
|
|
|
|
|
scissor.offset.x = regs.screenScissorX;
|
|
|
|
|
scissor.offset.y = regs.screenScissorY;
|
2023-06-25 15:54:49 +02:00
|
|
|
vkCmdSetScissor(drawCommandBuffer, 0, 1, &scissor);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// vkCmdSetDepthClampEnableEXT(drawCommandBuffer, VK_TRUE);
|
|
|
|
|
vkCmdSetDepthCompareOp(drawCommandBuffer, (VkCompareOp)regs.zFunc);
|
|
|
|
|
vkCmdSetDepthTestEnable(drawCommandBuffer,
|
|
|
|
|
regs.depthEnable ? VK_TRUE : VK_FALSE);
|
|
|
|
|
vkCmdSetDepthWriteEnable(drawCommandBuffer,
|
|
|
|
|
regs.depthWriteEnable ? VK_TRUE : VK_FALSE);
|
|
|
|
|
// vkCmdSetDepthBounds(drawCommandBuffer, -1.f, 1.f);
|
|
|
|
|
// vkCmdSetDepthBoundsTestEnable(drawCommandBuffer,
|
|
|
|
|
// regs.depthBoundsEnable ? VK_TRUE :
|
|
|
|
|
// VK_FALSE);
|
|
|
|
|
// vkCmdSetCullMode(
|
|
|
|
|
// drawCommandBuffer,
|
|
|
|
|
// (regs.cullBack ? VK_CULL_MODE_BACK_BIT : VK_CULL_MODE_NONE) |
|
|
|
|
|
// (regs.cullFront ? VK_CULL_MODE_FRONT_BIT : VK_CULL_MODE_NONE));
|
|
|
|
|
vkCmdSetFrontFace(drawCommandBuffer, regs.face
|
|
|
|
|
? VK_FRONT_FACE_CLOCKWISE
|
|
|
|
|
: VK_FRONT_FACE_COUNTER_CLOCKWISE);
|
|
|
|
|
|
|
|
|
|
vkCmdSetPrimitiveTopology(drawCommandBuffer, getVkPrimitiveType(primType));
|
|
|
|
|
vkCmdSetStencilTestEnable(drawCommandBuffer, VK_FALSE);
|
|
|
|
|
|
2023-06-25 15:54:49 +02:00
|
|
|
vkCmdBindDescriptorSets(drawCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
2023-07-24 05:03:55 +02:00
|
|
|
pipelineLayout, 0, 1, &descSet, 0, nullptr);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
vk::Buffer indexBufferStorage;
|
2023-06-24 14:59:27 +02:00
|
|
|
BufferRef indexBuffer;
|
|
|
|
|
auto needConversion = isPrimRequiresConversion(primType);
|
2023-07-19 14:16:38 +02:00
|
|
|
VkIndexType vkIndexType = (regs.indexType & 0x1f) == 0
|
|
|
|
|
? VK_INDEX_TYPE_UINT16
|
|
|
|
|
: VK_INDEX_TYPE_UINT32;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
|
|
|
|
if (needConversion) {
|
2023-07-17 14:35:53 +02:00
|
|
|
auto indecies = g_hostMemory.getPointer(indeciesAddress);
|
2023-06-24 14:59:27 +02:00
|
|
|
if (indecies == nullptr) {
|
|
|
|
|
indexCount = count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned origIndexSize = vkIndexType == VK_INDEX_TYPE_UINT16 ? 16 : 32;
|
|
|
|
|
auto converterFn = getPrimConverterFn(primType, &indexCount);
|
|
|
|
|
|
|
|
|
|
if (indecies == nullptr) {
|
|
|
|
|
if (indexCount < 0x10000) {
|
|
|
|
|
vkIndexType = VK_INDEX_TYPE_UINT16;
|
|
|
|
|
} else if (indecies) {
|
|
|
|
|
vkIndexType = VK_INDEX_TYPE_UINT32;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned indexSize = vkIndexType == VK_INDEX_TYPE_UINT16 ? 16 : 32;
|
|
|
|
|
auto indexBufferSize = indexSize * indexCount;
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
indexBufferStorage = vk::Buffer::Allocate(
|
2023-06-25 15:54:49 +02:00
|
|
|
getHostVisibleMemory(), indexBufferSize,
|
2023-06-24 14:59:27 +02:00
|
|
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
|
|
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
void *data = indexBufferStorage.getData();
|
2023-06-24 14:59:27 +02:00
|
|
|
|
|
|
|
|
if (indecies == nullptr) {
|
|
|
|
|
if (indexSize == 16) {
|
|
|
|
|
for (std::uint32_t i = 0; i < indexCount; ++i) {
|
|
|
|
|
auto [dstIndex, srcIndex] = converterFn(i);
|
|
|
|
|
((std::uint16_t *)data)[dstIndex] = srcIndex;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for (std::uint32_t i = 0; i < indexCount; ++i) {
|
|
|
|
|
auto [dstIndex, srcIndex] = converterFn(i);
|
|
|
|
|
((std::uint32_t *)data)[dstIndex] = srcIndex;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (indexSize == 16) {
|
|
|
|
|
for (std::uint32_t i = 0; i < indexCount; ++i) {
|
|
|
|
|
auto [dstIndex, srcIndex] = converterFn(i);
|
|
|
|
|
std::uint32_t origIndex =
|
|
|
|
|
origIndexSize == 16 ? ((std::uint16_t *)indecies)[srcIndex]
|
|
|
|
|
: ((std::uint32_t *)indecies)[srcIndex];
|
|
|
|
|
((std::uint16_t *)data)[dstIndex] = origIndex;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
for (std::uint32_t i = 0; i < indexCount; ++i) {
|
|
|
|
|
auto [dstIndex, srcIndex] = converterFn(i);
|
|
|
|
|
std::uint32_t origIndex =
|
|
|
|
|
origIndexSize == 16 ? ((std::uint16_t *)indecies)[srcIndex]
|
|
|
|
|
: ((std::uint32_t *)indecies)[srcIndex];
|
|
|
|
|
((std::uint32_t *)data)[dstIndex] = origIndex;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-25 23:12:44 +02:00
|
|
|
indexBuffer = {indexBufferStorage.getHandle(), 0, indexBufferSize};
|
2023-06-24 14:59:27 +02:00
|
|
|
} else if (indeciesAddress != 0) {
|
2023-07-17 14:35:53 +02:00
|
|
|
unsigned indexSize = vkIndexType == VK_INDEX_TYPE_UINT16 ? 2 : 4;
|
2023-06-24 14:59:27 +02:00
|
|
|
auto indexBufferSize = indexSize * indexCount;
|
2023-07-17 14:35:53 +02:00
|
|
|
|
|
|
|
|
indexBuffer =
|
2023-07-24 05:03:55 +02:00
|
|
|
g_cache.getBuffer(loadTaskSet, storeTaskSet, indeciesAddress,
|
|
|
|
|
indexBufferSize, shader::AccessOp::Load);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_VERTEX_BIT,
|
|
|
|
|
VK_SHADER_STAGE_FRAGMENT_BIT};
|
|
|
|
|
_vkCmdBindShadersEXT(drawCommandBuffer, 1, stages + 0,
|
|
|
|
|
&vertexShader.shader);
|
|
|
|
|
_vkCmdBindShadersEXT(drawCommandBuffer, 1, stages + 1,
|
|
|
|
|
&fragmentShader.shader);
|
|
|
|
|
|
|
|
|
|
if (primType == kPrimitiveTypeRectList) {
|
|
|
|
|
VkShaderStageFlagBits stage = VK_SHADER_STAGE_GEOMETRY_BIT;
|
|
|
|
|
auto shader = getPrimTypeRectGeomShader();
|
|
|
|
|
_vkCmdBindShadersEXT(drawCommandBuffer, 1, &stage, &shader);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (indexBuffer.buffer == nullptr) {
|
2023-06-25 15:54:49 +02:00
|
|
|
vkCmdDraw(drawCommandBuffer, count, 1, 0, 0);
|
2023-06-24 14:59:27 +02:00
|
|
|
} else {
|
2023-06-26 12:14:07 +02:00
|
|
|
vkCmdBindIndexBuffer(drawCommandBuffer, indexBuffer.buffer,
|
2023-06-24 14:59:27 +02:00
|
|
|
indexBuffer.offset, vkIndexType);
|
2023-06-25 15:54:49 +02:00
|
|
|
vkCmdDrawIndexed(drawCommandBuffer, indexCount, 1, 0, 0, 0);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
vkCmdEndRendering(drawCommandBuffer);
|
2023-06-25 15:54:49 +02:00
|
|
|
vkEndCommandBuffer(drawCommandBuffer);
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
vkEndCommandBuffer(readCommandBuffer);
|
2023-07-24 05:03:55 +02:00
|
|
|
|
|
|
|
|
loadTaskSet.wait();
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
submitToQueue(ctxt.queue, readCommandBuffer, sem, 1);
|
|
|
|
|
submitToQueue(ctxt.queue, drawCommandBuffer, sem, 1, 2,
|
|
|
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT);
|
2023-06-25 15:54:49 +02:00
|
|
|
|
|
|
|
|
VkCommandBuffer writeCommandBuffer = transferCommandBuffers[1];
|
|
|
|
|
Verify() << vkBeginCommandBuffer(writeCommandBuffer, &beginInfo);
|
2023-07-17 14:35:53 +02:00
|
|
|
sem.wait(2, UINTMAX_MAX);
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
g_cache.releaseGraphicsDescriptorSet(descSet);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
vkEndCommandBuffer(writeCommandBuffer);
|
|
|
|
|
submitToQueue(ctxt.queue, writeCommandBuffer, sem, 2, 3,
|
|
|
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT);
|
2023-06-25 15:54:49 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
sem.wait(3, UINTMAX_MAX);
|
2023-07-24 05:03:55 +02:00
|
|
|
storeTaskSet.enqueue(g_scheduler);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void dispatch(std::size_t dimX, std::size_t dimY, std::size_t dimZ) {
|
2023-07-24 05:03:55 +02:00
|
|
|
VkCommandBufferAllocateInfo allocInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
|
|
|
|
.commandPool = ctxt.commandPool,
|
|
|
|
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
|
|
|
|
.commandBufferCount = 1,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
VkCommandBufferBeginInfo beginInfo{
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
|
|
|
|
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
|
|
|
|
};
|
2023-06-25 15:54:49 +02:00
|
|
|
|
|
|
|
|
VkCommandBuffer commandBuffer;
|
2023-07-17 14:35:53 +02:00
|
|
|
vkAllocateCommandBuffers(vk::g_vkDevice, &allocInfo, &commandBuffer);
|
2023-06-25 15:54:49 +02:00
|
|
|
vkBeginCommandBuffer(commandBuffer, &beginInfo);
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
TaskSet loadTaskSet, storeTaskSet;
|
|
|
|
|
auto [desriptorSetLayout, pipelineLayout] = getComputeLayout();
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto &computeShader = g_cache.getShader(
|
|
|
|
|
loadTaskSet, desriptorSetLayout, shader::Stage::Compute,
|
|
|
|
|
regs.pgmComputeAddress, regs.userComputeData, regs.computeUserSpgrs,
|
|
|
|
|
regs.computeNumThreadX, regs.computeNumThreadY, regs.computeNumThreadZ);
|
|
|
|
|
loadTaskSet.wait();
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto descSet = g_cache.getComputeDescriptorSet();
|
|
|
|
|
loadShaderBindings(loadTaskSet, storeTaskSet, descSet, computeShader.info,
|
|
|
|
|
commandBuffer);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT};
|
|
|
|
|
_vkCmdBindShadersEXT(commandBuffer, 1, stages + 0, &computeShader.shader);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
loadTaskSet.wait();
|
2023-06-24 14:59:27 +02:00
|
|
|
|
|
|
|
|
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
2023-07-24 05:03:55 +02:00
|
|
|
pipelineLayout, 0, 1, &descSet, 0, nullptr);
|
|
|
|
|
|
2023-06-24 14:59:27 +02:00
|
|
|
vkCmdDispatch(commandBuffer, dimX, dimY, dimZ);
|
|
|
|
|
vkEndCommandBuffer(commandBuffer);
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
auto &sem = g_cache.getSemaphore();
|
|
|
|
|
submitToQueue(ctxt.queue, commandBuffer, sem, 1);
|
|
|
|
|
sem.wait(1, UINTMAX_MAX);
|
|
|
|
|
// storeTaskSet.enqueue(g_scheduler);
|
|
|
|
|
// storeTaskSet.wait();
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
static void draw(DrawContext &ctxt, QueueRegisters ®s, std::uint32_t count,
|
2023-07-17 14:35:53 +02:00
|
|
|
std::uint64_t indeciesAddress, std::uint32_t indexCount) {
|
2023-07-19 14:16:38 +02:00
|
|
|
RenderState{.ctxt = ctxt, .regs = regs}.draw(count, indeciesAddress,
|
|
|
|
|
indexCount);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
static void dispatch(DrawContext &ctxt, QueueRegisters ®s, std::size_t dimX,
|
|
|
|
|
std::size_t dimY, std::size_t dimZ) {
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
RenderState{.ctxt = ctxt, .regs = regs}.dispatch(dimX, dimY, dimZ);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum class EventWriteSource : std::uint8_t {
|
|
|
|
|
Immediate32 = 0x1,
|
|
|
|
|
Immediate64 = 0x2,
|
|
|
|
|
GlobalClockCounter = 0x3,
|
|
|
|
|
GpuCoreClockCounter = 0x4,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct EopData {
|
|
|
|
|
std::uint32_t eventType;
|
|
|
|
|
std::uint32_t eventIndex;
|
|
|
|
|
std::uint64_t address;
|
|
|
|
|
std::uint64_t value;
|
|
|
|
|
std::uint8_t dstSel;
|
|
|
|
|
std::uint8_t intSel;
|
|
|
|
|
EventWriteSource eventSource;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static std::uint64_t globalClock() {
|
|
|
|
|
// TODO
|
|
|
|
|
return 0x0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static std::uint64_t gpuCoreClock() {
|
|
|
|
|
// TODO
|
|
|
|
|
return 0x0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static void writeEop(EopData data) {
|
2023-06-24 14:59:27 +02:00
|
|
|
switch (data.eventSource) {
|
|
|
|
|
case EventWriteSource::Immediate32: {
|
2023-07-17 14:35:53 +02:00
|
|
|
*g_hostMemory.getPointer<std::uint32_t>(data.address) = data.value;
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case EventWriteSource::Immediate64: {
|
2023-07-17 14:35:53 +02:00
|
|
|
*g_hostMemory.getPointer<std::uint64_t>(data.address) = data.value;
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case EventWriteSource::GlobalClockCounter: {
|
2023-07-17 14:35:53 +02:00
|
|
|
*g_hostMemory.getPointer<std::uint64_t>(data.address) = globalClock();
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case EventWriteSource::GpuCoreClockCounter: {
|
2023-07-17 14:35:53 +02:00
|
|
|
*g_hostMemory.getPointer<std::uint64_t>(data.address) = gpuCoreClock();
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
static void drawIndexAuto(amdgpu::device::DrawContext &ctxt,
|
2023-07-19 14:16:38 +02:00
|
|
|
QueueRegisters ®s, std::uint32_t count) {
|
|
|
|
|
draw(ctxt, regs, count, 0, 0);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
static void drawIndex2(amdgpu::device::DrawContext &ctxt, QueueRegisters ®s,
|
|
|
|
|
std::uint32_t maxSize, std::uint64_t address,
|
|
|
|
|
std::uint32_t count) {
|
|
|
|
|
draw(ctxt, regs, count, address, maxSize);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
static void handleCommandBuffer(DrawContext &ctxt, QueueRegisters ®s,
|
|
|
|
|
std::uint32_t *cmds, std::uint32_t count) {
|
2023-07-24 05:03:55 +02:00
|
|
|
bool log = false;
|
2023-06-24 14:59:27 +02:00
|
|
|
for (std::uint32_t cmdOffset = 0; cmdOffset < count; ++cmdOffset) {
|
|
|
|
|
auto cmd = cmds[cmdOffset];
|
|
|
|
|
auto type = getBits(cmd, 31, 30);
|
|
|
|
|
|
|
|
|
|
if (type == 0) {
|
|
|
|
|
std::printf("!packet type 0!\n");
|
|
|
|
|
auto baseIndex = getBits(cmd, 15, 0);
|
|
|
|
|
auto count = getBits(cmd, 29, 16);
|
|
|
|
|
std::printf("-- %04x: %08x: baseIndex=%x, count=%d\n", cmdOffset, cmd,
|
|
|
|
|
baseIndex, count);
|
|
|
|
|
cmdOffset += count;
|
|
|
|
|
} else if (type == 1) {
|
|
|
|
|
std::printf("Unexpected packet type 1!\n");
|
|
|
|
|
} else if (type == 2) {
|
|
|
|
|
std::printf("!packet type 2!\n");
|
|
|
|
|
continue;
|
|
|
|
|
} else if (type == 3) {
|
|
|
|
|
auto predicate = getBit(cmd, 0);
|
|
|
|
|
auto shaderType = getBit(cmd, 1);
|
|
|
|
|
auto op = getBits(cmd, 15, 8);
|
|
|
|
|
auto len = getBits(cmd, 29, 16) + 1;
|
|
|
|
|
|
|
|
|
|
if (log) {
|
|
|
|
|
std::printf("-- %04x: %08x: %s len=%d, shaderType = %cX\n", cmdOffset,
|
|
|
|
|
cmd, opcodeToString(op).c_str(), len,
|
|
|
|
|
'G' ^ (shaderType << 1));
|
|
|
|
|
|
|
|
|
|
for (std::uint32_t offset = 0; offset < len; ++offset) {
|
|
|
|
|
std::printf(" %04x: %08x\n", cmdOffset + 1 + offset,
|
|
|
|
|
cmds[cmdOffset + 1 + offset]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
|
case kOpcodeLOAD_CONST_RAM: {
|
|
|
|
|
std::uint64_t addressLo = cmds[cmdOffset + 1];
|
|
|
|
|
std::uint64_t addressHi = cmds[cmdOffset + 2];
|
|
|
|
|
std::uint32_t numDw = getBits(cmds[cmdOffset + 3], 14, 0);
|
|
|
|
|
std::uint32_t offset = getBits(cmds[cmdOffset + 4], 15, 0);
|
|
|
|
|
if (log) {
|
|
|
|
|
std::printf(" ` address=%lx, numDw = %x, offset=%x\n",
|
|
|
|
|
addressLo | (addressHi << 32), numDw, offset);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeSET_UCONFIG_REG: {
|
|
|
|
|
std::uint32_t baseRegOffset = 0xc000 + cmds[cmdOffset + 1];
|
|
|
|
|
|
|
|
|
|
for (std::uint32_t regOffset = 0; regOffset < len - 1; ++regOffset) {
|
|
|
|
|
if (log) {
|
|
|
|
|
std::printf(" %04x: %04x: %s = 0x%08x\n",
|
|
|
|
|
cmdOffset + 2 + regOffset,
|
|
|
|
|
(baseRegOffset + regOffset) << 2,
|
|
|
|
|
registerToString(baseRegOffset + regOffset).c_str(),
|
|
|
|
|
cmds[cmdOffset + 2 + regOffset]);
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
regs.setRegister(baseRegOffset + regOffset,
|
|
|
|
|
cmds[cmdOffset + 2 + regOffset]);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeSET_CONTEXT_REG: {
|
|
|
|
|
std::uint32_t baseRegOffset = 0xa000 + cmds[cmdOffset + 1];
|
|
|
|
|
|
|
|
|
|
for (std::uint32_t regOffset = 0; regOffset < len - 1; ++regOffset) {
|
|
|
|
|
if (log) {
|
|
|
|
|
std::printf(" %04x: %04x: %s = 0x%08x\n",
|
|
|
|
|
cmdOffset + 2 + regOffset,
|
|
|
|
|
(baseRegOffset + regOffset) << 2,
|
|
|
|
|
registerToString(baseRegOffset + regOffset).c_str(),
|
|
|
|
|
cmds[cmdOffset + 2 + regOffset]);
|
|
|
|
|
}
|
2023-07-19 14:16:38 +02:00
|
|
|
regs.setRegister(baseRegOffset + regOffset,
|
|
|
|
|
cmds[cmdOffset + 2 + regOffset]);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeSET_SH_REG: {
|
|
|
|
|
std::uint32_t baseRegOffset = 0x2c00 + cmds[cmdOffset + 1];
|
|
|
|
|
|
|
|
|
|
for (std::uint32_t regOffset = 0; regOffset < len - 1; ++regOffset) {
|
|
|
|
|
if (log) {
|
|
|
|
|
std::printf(" %04x: %04x: %s = 0x%08x\n",
|
|
|
|
|
cmdOffset + 2 + regOffset,
|
|
|
|
|
(baseRegOffset + regOffset) << 2,
|
|
|
|
|
registerToString(baseRegOffset + regOffset).c_str(),
|
|
|
|
|
cmds[cmdOffset + 2 + regOffset]);
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
regs.setRegister(baseRegOffset + regOffset,
|
|
|
|
|
cmds[cmdOffset + 2 + regOffset]);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeWRITE_DATA: {
|
|
|
|
|
auto control = cmds[cmdOffset + 1];
|
|
|
|
|
auto destAddrLo = cmds[cmdOffset + 2];
|
|
|
|
|
auto destAddrHi = cmds[cmdOffset + 3];
|
|
|
|
|
auto data = cmds + cmdOffset + 4;
|
|
|
|
|
auto size = len - 3;
|
|
|
|
|
|
|
|
|
|
// 0 - Micro Engine - ME
|
|
|
|
|
// 1 - Prefetch parser - PFP
|
|
|
|
|
// 2 - Constant engine - CE
|
|
|
|
|
// 3 - Dispatch engine - DE
|
|
|
|
|
auto engineSel = getBits(control, 31, 30);
|
|
|
|
|
|
|
|
|
|
// wait for confirmation that write complete
|
|
|
|
|
auto wrConfirm = getBit(control, 20);
|
|
|
|
|
|
|
|
|
|
// do not increment address
|
|
|
|
|
auto wrOneAddr = getBit(control, 16);
|
|
|
|
|
|
|
|
|
|
// 0 - mem-mapped register
|
|
|
|
|
// 1 - memory sync
|
|
|
|
|
// 2 - tc/l2
|
|
|
|
|
// 3 - gds
|
|
|
|
|
// 4 - reserved
|
|
|
|
|
// 5 - memory async
|
|
|
|
|
auto dstSel = getBits(control, 11, 8);
|
|
|
|
|
|
|
|
|
|
auto memMappedRegisterAddress = getBits(destAddrLo, 15, 0);
|
|
|
|
|
auto memory32bit = getBits(destAddrLo, 31, 2);
|
|
|
|
|
auto memory64bit = getBits(destAddrLo, 31, 3);
|
|
|
|
|
auto gdsOffset = getBits(destAddrLo, 15, 0);
|
|
|
|
|
|
|
|
|
|
if (log) {
|
|
|
|
|
std::printf(" %04x: control=%x [engineSel=%d, "
|
|
|
|
|
"wrConfirm=%d,wrOneAddr=%d, dstSel=%d]\n",
|
|
|
|
|
cmdOffset + 1, control, engineSel, wrConfirm, wrOneAddr,
|
|
|
|
|
dstSel);
|
|
|
|
|
|
|
|
|
|
std::printf(" %04x: destAddrLo=%x "
|
|
|
|
|
"[memory32bit=%x,memory64bit=%x,gdsOffset=%x]\n",
|
|
|
|
|
cmdOffset + 2, destAddrLo, memory32bit, memory64bit,
|
|
|
|
|
gdsOffset);
|
|
|
|
|
|
|
|
|
|
std::printf(" %04x: destAddrHi=%x\n", cmdOffset + 3, destAddrHi);
|
|
|
|
|
|
|
|
|
|
for (std::uint32_t offset = 4; offset < len; ++offset) {
|
|
|
|
|
std::printf(" %04x: %08x\n", cmdOffset + offset,
|
|
|
|
|
cmds[cmdOffset + offset]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
auto address =
|
|
|
|
|
destAddrLo | (static_cast<std::uint64_t>(destAddrHi) << 32);
|
2023-07-17 14:35:53 +02:00
|
|
|
auto dest = g_hostMemory.getPointer<std::uint32_t>(address);
|
2023-06-24 14:59:27 +02:00
|
|
|
if (log) {
|
|
|
|
|
std::printf(" address=%lx\n", address);
|
|
|
|
|
}
|
|
|
|
|
for (unsigned i = 0; i < size; ++i) {
|
|
|
|
|
dest[i] = data[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeINDEX_TYPE: {
|
2023-07-19 14:16:38 +02:00
|
|
|
regs.indexType = cmds[cmdOffset + 1];
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeDRAW_INDEX_AUTO: {
|
2023-07-19 14:16:38 +02:00
|
|
|
drawIndexAuto(ctxt, regs, cmds[cmdOffset + 1]);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeDRAW_INDEX_2: {
|
|
|
|
|
auto maxSize = cmds[cmdOffset + 1];
|
|
|
|
|
auto address = cmds[cmdOffset + 2] |
|
|
|
|
|
(static_cast<std::uint64_t>(cmds[cmdOffset + 3]) << 32);
|
|
|
|
|
auto count = cmds[cmdOffset + 4];
|
2023-07-19 14:16:38 +02:00
|
|
|
drawIndex2(ctxt, regs, maxSize, address, count);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeDISPATCH_DIRECT: {
|
|
|
|
|
auto dimX = cmds[cmdOffset + 1];
|
|
|
|
|
auto dimY = cmds[cmdOffset + 2];
|
|
|
|
|
auto dimZ = cmds[cmdOffset + 3];
|
|
|
|
|
if (log) {
|
|
|
|
|
std::printf(" %04x: DIM X=%u\n", cmdOffset + 1, dimX);
|
|
|
|
|
std::printf(" %04x: DIM Y=%u\n", cmdOffset + 2, dimY);
|
|
|
|
|
std::printf(" %04x: DIM Z=%u\n", cmdOffset + 3, dimZ);
|
|
|
|
|
}
|
2023-07-19 14:16:38 +02:00
|
|
|
dispatch(ctxt, regs, dimX, dimY, dimZ);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeEVENT_WRITE_EOP: {
|
|
|
|
|
EopData eopData{};
|
|
|
|
|
eopData.eventType = getBits(cmds[cmdOffset + 1], 6, 0);
|
|
|
|
|
eopData.eventIndex = getBits(cmds[cmdOffset + 1], 12, 8);
|
|
|
|
|
eopData.address =
|
|
|
|
|
cmds[cmdOffset + 2] |
|
|
|
|
|
(static_cast<std::uint64_t>(getBits(cmds[cmdOffset + 3], 16, 0))
|
|
|
|
|
<< 32);
|
|
|
|
|
eopData.value = cmds[cmdOffset + 4] |
|
|
|
|
|
(static_cast<std::uint64_t>(cmds[cmdOffset + 5]) << 32);
|
|
|
|
|
eopData.dstSel = 0;
|
|
|
|
|
eopData.intSel = getBits(cmds[cmdOffset + 3], 26, 24);
|
|
|
|
|
eopData.eventSource =
|
|
|
|
|
static_cast<EventWriteSource>(getBits(cmds[cmdOffset + 3], 32, 29));
|
2023-07-17 14:35:53 +02:00
|
|
|
writeEop(eopData);
|
2023-06-24 14:59:27 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeEVENT_WRITE_EOS: {
|
|
|
|
|
std::uint32_t eventType = getBits(cmds[cmdOffset + 1], 6, 0);
|
|
|
|
|
std::uint32_t eventIndex = getBits(cmds[cmdOffset + 1], 12, 8);
|
|
|
|
|
std::uint64_t address =
|
|
|
|
|
cmds[cmdOffset + 2] |
|
|
|
|
|
(static_cast<std::uint64_t>(getBits(cmds[cmdOffset + 3], 16, 0))
|
|
|
|
|
<< 32);
|
|
|
|
|
std::uint32_t command = getBits(cmds[cmdOffset + 3], 32, 16);
|
|
|
|
|
|
|
|
|
|
if (log) {
|
|
|
|
|
std::printf("address = %#lx\n", address);
|
|
|
|
|
std::printf("command = %#x\n", command);
|
|
|
|
|
}
|
|
|
|
|
if (command == 0x4000) { // store 32bit data
|
2023-07-17 14:35:53 +02:00
|
|
|
*g_hostMemory.getPointer<std::uint32_t>(address) =
|
|
|
|
|
cmds[cmdOffset + 4];
|
2023-06-24 14:59:27 +02:00
|
|
|
} else {
|
|
|
|
|
util::unreachable();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeWAIT_REG_MEM: {
|
|
|
|
|
auto function = cmds[cmdOffset + 1] & 7;
|
|
|
|
|
auto pollAddressLo = cmds[cmdOffset + 2];
|
|
|
|
|
auto pollAddressHi = cmds[cmdOffset + 3];
|
|
|
|
|
auto reference = cmds[cmdOffset + 4];
|
|
|
|
|
auto mask = cmds[cmdOffset + 5];
|
|
|
|
|
auto pollInterval = cmds[cmdOffset + 5];
|
|
|
|
|
|
|
|
|
|
auto pollAddress =
|
|
|
|
|
pollAddressLo | (static_cast<std::uint64_t>(pollAddressHi) << 32);
|
2023-07-17 14:35:53 +02:00
|
|
|
auto pointer =
|
|
|
|
|
g_hostMemory.getPointer<volatile std::uint32_t>(pollAddress);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
|
|
|
|
reference &= mask;
|
|
|
|
|
|
|
|
|
|
auto compare = [&](std::uint32_t value, std::uint32_t reference,
|
|
|
|
|
int function) {
|
|
|
|
|
switch (function) {
|
|
|
|
|
case 0:
|
|
|
|
|
return true;
|
|
|
|
|
case 1:
|
|
|
|
|
return value < reference;
|
|
|
|
|
case 2:
|
|
|
|
|
return value <= reference;
|
|
|
|
|
case 3:
|
|
|
|
|
return value == reference;
|
|
|
|
|
case 4:
|
|
|
|
|
return value != reference;
|
|
|
|
|
case 5:
|
|
|
|
|
return value >= reference;
|
|
|
|
|
case 6:
|
|
|
|
|
return value > reference;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
util::unreachable();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (log) {
|
|
|
|
|
std::printf(" polling address %lx, reference = %x, function = %u\n",
|
|
|
|
|
pollAddress, reference, function);
|
|
|
|
|
std::fflush(stdout);
|
|
|
|
|
}
|
|
|
|
|
while (true) {
|
|
|
|
|
auto value = *pointer & mask;
|
|
|
|
|
|
|
|
|
|
if (compare(value, reference, function)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case kOpcodeNOP:
|
|
|
|
|
if (log) {
|
|
|
|
|
for (std::uint32_t offset = 0; offset < len; ++offset) {
|
|
|
|
|
std::printf(" %04x: %08x\n", cmdOffset + 1 + offset,
|
|
|
|
|
cmds[cmdOffset + 1 + offset]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
2023-07-24 05:03:55 +02:00
|
|
|
if (log) {
|
|
|
|
|
for (std::uint32_t offset = 0; offset < len; ++offset) {
|
|
|
|
|
std::printf(" %04x: %08x\n", cmdOffset + 1 + offset,
|
|
|
|
|
cmds[cmdOffset + 1 + offset]);
|
|
|
|
|
}
|
|
|
|
|
break;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cmdOffset += len;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-25 15:54:49 +02:00
|
|
|
void amdgpu::device::AmdgpuDevice::handleProtectMemory(std::uint64_t address,
|
|
|
|
|
std::uint64_t size,
|
|
|
|
|
std::uint32_t prot) {
|
|
|
|
|
auto beginPage = address / kPageSize;
|
|
|
|
|
auto endPage = (address + size + kPageSize - 1) / kPageSize;
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-17 14:35:53 +02:00
|
|
|
::mprotect(g_hostMemory.getPointer(address), size, prot >> 4);
|
2023-07-14 03:33:45 +02:00
|
|
|
|
2023-06-25 15:54:49 +02:00
|
|
|
if (prot >> 4) {
|
2023-07-17 14:35:53 +02:00
|
|
|
memoryAreaTable.map(beginPage, endPage);
|
2023-07-14 03:33:45 +02:00
|
|
|
const char *protStr;
|
|
|
|
|
switch (prot >> 4) {
|
|
|
|
|
case PROT_READ:
|
|
|
|
|
protStr = "R";
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case PROT_WRITE:
|
|
|
|
|
protStr = "W";
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case PROT_WRITE | PROT_READ:
|
|
|
|
|
protStr = "W";
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
protStr = "unknown";
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
std::printf("Allocated area at %zx, size %lx, prot %s\n", address, size,
|
|
|
|
|
protStr);
|
2023-06-25 15:54:49 +02:00
|
|
|
} else {
|
2023-07-17 14:35:53 +02:00
|
|
|
memoryAreaTable.unmap(beginPage, endPage);
|
2023-07-14 03:33:45 +02:00
|
|
|
std::printf("Unmapped area at %zx, size %lx\n", address, size);
|
2023-06-25 15:54:49 +02:00
|
|
|
}
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
2023-07-19 14:16:38 +02:00
|
|
|
|
|
|
|
|
static std::unordered_map<std::uint64_t, QueueRegisters> queueRegs;
|
|
|
|
|
|
|
|
|
|
void amdgpu::device::AmdgpuDevice::handleCommandBuffer(std::uint64_t queueId,
|
|
|
|
|
std::uint64_t address,
|
2023-06-25 15:54:49 +02:00
|
|
|
std::uint64_t size) {
|
2023-06-24 14:59:27 +02:00
|
|
|
auto count = size / sizeof(std::uint32_t);
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::printf("address = %lx, count = %lx\n", address, count);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-19 14:16:38 +02:00
|
|
|
::handleCommandBuffer(dc, queueRegs[queueId],
|
|
|
|
|
g_hostMemory.getPointer<std::uint32_t>(address), count);
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-06-25 17:37:20 +02:00
|
|
|
bool amdgpu::device::AmdgpuDevice::handleFlip(
|
|
|
|
|
std::uint32_t bufferIndex, std::uint64_t arg, VkCommandBuffer cmd,
|
|
|
|
|
VkImage targetImage, VkExtent2D targetExtent,
|
2023-06-25 21:58:15 +02:00
|
|
|
std::vector<VkBuffer> &usedBuffers, std::vector<VkImage> &usedImages) {
|
2023-06-24 14:59:27 +02:00
|
|
|
std::printf("requested flip %d\n", bufferIndex);
|
|
|
|
|
|
2023-06-30 16:10:22 +02:00
|
|
|
if (bufferIndex == ~static_cast<std::uint32_t>(0)) {
|
2023-07-20 02:14:52 +02:00
|
|
|
bridge->flipBuffer = bufferIndex;
|
|
|
|
|
bridge->flipArg = arg;
|
|
|
|
|
bridge->flipCount = bridge->flipCount + 1;
|
|
|
|
|
|
2023-06-30 16:10:22 +02:00
|
|
|
// black surface, ignore for now
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
// std::fprintf(stderr, "device local memory: ");
|
|
|
|
|
// getDeviceLocalMemory().dump();
|
|
|
|
|
// std::fprintf(stderr, "host visible memory: ");
|
|
|
|
|
// getHostVisibleMemory().dump();
|
|
|
|
|
|
2023-07-20 02:14:52 +02:00
|
|
|
auto buffer = bridge->buffers[bufferIndex];
|
|
|
|
|
|
2023-06-25 15:54:49 +02:00
|
|
|
if (buffer.pitch == 0 || buffer.height == 0 || buffer.address == 0) {
|
|
|
|
|
std::printf("Attempt to flip unallocated buffer\n");
|
|
|
|
|
return false;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|
|
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
std::printf("flip: address=%lx, buffer=%ux%u, target=%ux%u\n", buffer.address,
|
|
|
|
|
buffer.width, buffer.height, targetExtent.width,
|
|
|
|
|
targetExtent.height);
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
TaskSet readTask;
|
|
|
|
|
auto image =
|
|
|
|
|
g_cache.getImage(readTask, cmd, buffer.address, kSurfaceFormat8_8_8_8,
|
|
|
|
|
kTextureChannelTypeSrgb,
|
|
|
|
|
buffer.tilingMode == 1 ? kTileModeDisplay_2dThin
|
|
|
|
|
: kTileModeDisplay_LinearAligned,
|
|
|
|
|
buffer.width, buffer.height, buffer.pitch,
|
|
|
|
|
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
2023-07-19 14:02:57 +02:00
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
readTask.wait();
|
2023-06-25 21:58:15 +02:00
|
|
|
|
|
|
|
|
transitionImageLayout(cmd, targetImage, VK_IMAGE_ASPECT_COLOR_BIT,
|
|
|
|
|
VK_IMAGE_LAYOUT_UNDEFINED,
|
|
|
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
|
|
|
|
|
|
|
|
|
VkImageBlit region{
|
|
|
|
|
.srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
|
|
|
|
.mipLevel = 0,
|
|
|
|
|
.baseArrayLayer = 0,
|
|
|
|
|
.layerCount = 1},
|
|
|
|
|
.srcOffsets = {{},
|
|
|
|
|
{static_cast<int32_t>(buffer.width),
|
|
|
|
|
static_cast<int32_t>(buffer.height), 1}},
|
|
|
|
|
.dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
|
|
|
|
.mipLevel = 0,
|
|
|
|
|
.baseArrayLayer = 0,
|
|
|
|
|
.layerCount = 1},
|
|
|
|
|
.dstOffsets = {{},
|
|
|
|
|
{static_cast<int32_t>(targetExtent.width),
|
|
|
|
|
static_cast<int32_t>(targetExtent.height), 1}},
|
|
|
|
|
};
|
|
|
|
|
|
2023-07-24 05:03:55 +02:00
|
|
|
vkCmdBlitImage(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, targetImage,
|
2023-06-25 21:58:15 +02:00
|
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion,
|
|
|
|
|
VK_FILTER_LINEAR);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-06-25 21:58:15 +02:00
|
|
|
transitionImageLayout(cmd, targetImage, VK_IMAGE_ASPECT_COLOR_BIT,
|
|
|
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
|
|
|
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR);
|
2023-06-24 14:59:27 +02:00
|
|
|
|
2023-07-20 02:14:52 +02:00
|
|
|
bridge->flipBuffer = bufferIndex;
|
|
|
|
|
bridge->flipArg = arg;
|
|
|
|
|
bridge->flipCount = bridge->flipCount + 1;
|
|
|
|
|
auto bufferInUse =
|
|
|
|
|
g_hostMemory.getPointer<std::uint64_t>(bridge->bufferInUseAddress);
|
|
|
|
|
if (bufferInUse != nullptr) {
|
|
|
|
|
bufferInUse[bufferIndex] = 0;
|
|
|
|
|
}
|
2023-06-25 15:54:49 +02:00
|
|
|
return true;
|
2023-06-24 14:59:27 +02:00
|
|
|
}
|