diff --git a/.gitmodules b/.gitmodules index 99ec5e665..a73061e22 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,9 +7,6 @@ [submodule "third_party/binutils-ppc-cygwin"] path = third_party/binutils-ppc-cygwin url = https://github.com/benvanik/binutils-ppc-cygwin.git -[submodule "third_party/spirv-tools"] - path = third_party/spirv-tools - url = https://github.com/xenia-project/SPIRV-Tools.git [submodule "third_party/catch"] path = third_party/catch url = https://github.com/catchorg/Catch2.git @@ -22,12 +19,6 @@ [submodule "third_party/premake-export-compile-commands"] path = third_party/premake-export-compile-commands url = https://github.com/xenia-project/premake-export-compile-commands.git -[submodule "third_party/spirv-headers"] - path = third_party/spirv-headers - url = https://github.com/KhronosGroup/SPIRV-Headers.git -[submodule "third_party/volk"] - path = third_party/volk - url = https://github.com/zeux/volk.git [submodule "third_party/discord-rpc"] path = third_party/discord-rpc url = https://github.com/discordapp/discord-rpc.git @@ -85,6 +76,12 @@ [submodule "third_party/Vulkan-Headers"] path = third_party/Vulkan-Headers url = https://github.com/KhronosGroup/Vulkan-Headers.git +[submodule "third_party/glslang"] + path = third_party/glslang + url = https://github.com/KhronosGroup/glslang.git +[submodule "third_party/SPIRV-Tools"] + path = third_party/SPIRV-Tools + url = https://github.com/KhronosGroup/SPIRV-Tools.git [submodule "third_party/VulkanMemoryAllocator"] path = third_party/VulkanMemoryAllocator url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git diff --git a/premake5.lua b/premake5.lua index 3659c683a..e685fe899 100644 --- a/premake5.lua +++ b/premake5.lua @@ -247,7 +247,6 @@ workspace("xenia") include("third_party/imgui.lua") include("third_party/mspack.lua") include("third_party/snappy.lua") - include("third_party/spirv-tools.lua") include("third_party/xxhash.lua") if not os.istarget("android") then @@ -288,7 +287,6 @@ workspace("xenia") include("src/xenia/kernel") include("src/xenia/patcher") include("src/xenia/ui") - include("src/xenia/ui/spirv") include("src/xenia/ui/vulkan") include("src/xenia/vfs") diff --git a/src/xenia/app/premake5.lua b/src/xenia/app/premake5.lua index 091384c52..09c2d2a50 100644 --- a/src/xenia/app/premake5.lua +++ b/src/xenia/app/premake5.lua @@ -27,7 +27,6 @@ project("xenia-app") "xenia-kernel", "xenia-patcher", "xenia-ui", - "xenia-ui-spirv", "xenia-ui-vulkan", "xenia-patcher", "xenia-vfs", @@ -44,7 +43,6 @@ project("xenia-app") "libavutil", "mspack", "snappy", - "spirv-tools", "xxhash", }) defines({ diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index 1c3af47e6..3e2729993 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -59,7 +59,7 @@ #include "third_party/fmt/include/fmt/format.h" DEFINE_string(apu, "any", "Audio system. Use: [any, nop, sdl, xaudio2]", "APU"); -DEFINE_string(gpu, "any", "Graphics system. Use: [any, d3d12, null]", +DEFINE_string(gpu, "any", "Graphics system. Use: [any, d3d12, vulkan, null]", "GPU"); DEFINE_string(hid, "any", "Input system. Use: [any, nop, sdl, winkey, xinput]", "HID"); @@ -259,11 +259,82 @@ std::unique_ptr EmulatorApp::CreateAudioSystem( } std::unique_ptr EmulatorApp::CreateGraphicsSystem() { + // While Vulkan is supported by a large variety of operating systems (Windows, + // GNU/Linux, Android, also via the MoltenVK translation layer on top of Metal + // on macOS and iOS), please don't remove platform-specific GPU backends from + // Xenia. + // + // Regardless of the operating system, having multiple options provides more + // stability to users. In case of driver issues, users may try switching + // between the available backends. For example, in June 2022, on Nvidia Ampere + // (RTX 30xx), Xenia had synchronization issues that resulted in flickering, + // most prominently in 4D5307E6, on Direct3D 12 - but the same issue was not + // reproducible in the Vulkan backend, however, it used ImageSampleExplicitLod + // with explicit gradients for cubemaps, which triggered a different driver + // bug on Nvidia (every 1 out of 2x2 pixels receiving junk). + // + // Specifically on Microsoft platforms, there are a few reasons why supporting + // Direct3D 12 is desirable rather than limiting Xenia to Vulkan only: + // - Wider hardware support for Direct3D 12 on x86 Windows desktops. + // Direct3D 12 requires the minimum of Nvidia Fermi, or, with a pre-2021 + // driver version, Intel HD Graphics 4200. Vulkan, however, is supported + // only starting with Nvidia Kepler and a much more recent Intel UHD + // Graphics generation. + // - Wider hardware support on other kinds of Microsoft devices. The Xbox One + // and the Xbox Series X|S only support Direct3D as the GPU API in their UWP + // runtime, and only version 12 can be granted expanded resource access. + // Qualcomm, as of June 2022, also doesn't provide a Vulkan implementation + // for their Arm-based Windows devices, while Direct3D 12 is available. + // - Both older Intel GPUs and the Xbox One apparently, as well as earlier + // Windows 10 versions, also require Shader Model 5.1 DXBC shaders rather + // than Shader Model 6 DXIL ones, so a DXBC shader translator should be + // available in Xenia too, a DXIL one doesn't fully replace it. + // - As of June 2022, AMD also refuses to implement the + // VK_EXT_fragment_shader_interlock Vulkan extension in their drivers, as + // well as its OpenGL counterpart, which is heavily utilized for accurate + // support of Xenos render target formats that don't have PC equivalents + // (8_8_8_8_GAMMA, 2_10_10_10_FLOAT, 16_16 and 16_16_16_16 with -32 to 32 + // range, D24FS8) with correct blending. Direct3D 12, however, requires + // support for similar functionality (rasterizer-ordered views) on the + // feature level 12_1, and the AMD driver implements it on Direct3D, as well + // as raster order groups in their Metal driver. + // + // Additionally, different host GPU APIs receive feature support at different + // paces. VK_EXT_fragment_shader_interlock first appeared in 2019, for + // instance, while Xenia had been taking advantage of rasterizer-ordered views + // on Direct3D 12 for over half a year at that point (they have existed in + // Direct3D 12 since the first version). + // + // MoltenVK on top Metal also has its flaws and limitations. Metal, for + // instance, as of June 2022, doesn't provide a switch for primitive restart, + // while Vulkan does - so MoltenVK is not completely transparent to Xenia, + // many of its issues that may be not very obvious (unlike when the Metal API + // is used directly) should be taken into account in Xenia. Also, as of June + // 2022, MoltenVK translates SPIR-V shaders into the C++-based Metal Shading + // Language rather than AIR directly, which likely massively increases + // pipeline object creation time - and Xenia translates shaders and creates + // pipelines when they're first actually used for a draw command by the game, + // thus it can't precompile anything that hasn't ever been encountered before + // there's already no time to waste. + // + // Very old hardware (Direct3D 10 level) is also not supported by most Vulkan + // drivers. However, in the future, Xenia may be ported to it using the + // Direct3D 11 API with the feature level 10_1 or 10_0. OpenGL, however, had + // been lagging behind Direct3D prior to versions 4.x, and didn't receive + // compute shaders until a 4.2 extension (while 4.2 already corresponds + // roughly to Direct3D 11 features) - and replacing Xenia compute shaders with + // transform feedback / stream output is not always trivial (in particular, + // will need to rely on GL_ARB_transform_feedback3 for skipping over memory + // locations that shouldn't be overwritten). + // + // For maintainability, as much implementation code as possible should be + // placed in `xe::gpu` and shared between the backends rather than duplicated + // between them. Factory factory; #if XE_PLATFORM_WIN32 factory.Add("d3d12"); #endif // XE_PLATFORM_WIN32 - //factory.Add("vulkan"); + factory.Add("vulkan"); factory.Add("null"); return factory.Create(cvars::gpu); } diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua index bfdb70789..971d6ef70 100644 --- a/src/xenia/gpu/premake5.lua +++ b/src/xenia/gpu/premake5.lua @@ -11,20 +11,14 @@ project("xenia-gpu") "fmt", "glslang-spirv", "snappy", - "spirv-tools", "xenia-base", "xenia-ui", - "xenia-ui-spirv", "xxhash", }) - defines({ - }) includedirs({ - project_root.."/third_party/spirv-tools/external/include", + project_root.."/third_party/Vulkan-Headers/include", }) local_platform_files() - -- local_platform_files("spirv") - -- local_platform_files("spirv/passes") group("src") project("xenia-gpu-shader-compiler") @@ -36,13 +30,13 @@ project("xenia-gpu-shader-compiler") "fmt", "glslang-spirv", "snappy", - "spirv-tools", "xenia-base", "xenia-gpu", "xenia-ui", - "xenia-ui-spirv", + "xenia-ui-vulkan", }) - defines({ + includedirs({ + project_root.."/third_party/Vulkan-Headers/include", }) files({ "shader_compiler_main.cc", diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 31f9de372..fc08b2fd0 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -807,6 +807,9 @@ class Shader { Translation(Shader& shader, uint64_t modification) : shader_(shader), modification_(modification) {} + // If there was some failure during preparation on the implementation side. + void MakeInvalid() { is_valid_ = false; } + private: friend class Shader; friend class ShaderTranslator; diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index 6c539ec6e..ec2e20184 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -9,9 +9,12 @@ #include #include +#include #include +#include #include +#include "third_party/glslang/SPIRV/disassemble.h" #include "xenia/base/assert.h" #include "xenia/base/console_app_main.h" #include "xenia/base/cvar.h" @@ -23,7 +26,7 @@ #include "xenia/gpu/shader_translator.h" #include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/xenos.h" -#include "xenia/ui/spirv/spirv_disassembler.h" +#include "xenia/ui/vulkan/spirv_tools_context.h" // For D3DDisassemble: #if XE_PLATFORM_WIN32 @@ -118,9 +121,10 @@ int shader_compiler_main(const std::vector& args) { shader->AnalyzeUcode(ucode_disasm_buffer); std::unique_ptr translator; + SpirvShaderTranslator::Features spirv_features(true); if (cvars::shader_output_type == "spirv" || cvars::shader_output_type == "spirvtext") { - translator = std::make_unique(); + translator = std::make_unique(spirv_features); } else if (cvars::shader_output_type == "dxbc" || cvars::shader_output_type == "dxbctext") { translator = std::make_unique( @@ -183,13 +187,30 @@ int shader_compiler_main(const std::vector& args) { const void* source_data = translation->translated_binary().data(); size_t source_data_size = translation->translated_binary().size(); - std::unique_ptr spirv_disasm_result; + std::string spirv_disasm; if (cvars::shader_output_type == "spirvtext") { - // Disassemble SPIRV. - spirv_disasm_result = xe::ui::spirv::SpirvDisassembler().Disassemble( - reinterpret_cast(source_data), source_data_size / 4); - source_data = spirv_disasm_result->text(); - source_data_size = std::strlen(spirv_disasm_result->text()) + 1; + std::ostringstream spirv_disasm_stream; + std::vector spirv_source; + spirv_source.reserve(source_data_size / sizeof(unsigned int)); + spirv_source.insert(spirv_source.cend(), + reinterpret_cast(source_data), + reinterpret_cast(source_data) + + source_data_size / sizeof(unsigned int)); + spv::Disassemble(spirv_disasm_stream, spirv_source); + spirv_disasm = std::move(spirv_disasm_stream.str()); + ui::vulkan::SpirvToolsContext spirv_tools_context; + if (spirv_tools_context.Initialize(spirv_features.spirv_version)) { + std::string spirv_validation_error; + spirv_tools_context.Validate( + reinterpret_cast(spirv_source.data()), + spirv_source.size(), &spirv_validation_error); + if (!spirv_validation_error.empty()) { + spirv_disasm.append(1, '\n'); + spirv_disasm.append(spirv_validation_error); + } + } + source_data = spirv_disasm.c_str(); + source_data_size = spirv_disasm.size(); } #if XE_PLATFORM_WIN32 ID3DBlob* dxbc_disasm_blob = nullptr; diff --git a/src/xenia/gpu/spirv/compiler.cc b/src/xenia/gpu/spirv/compiler.cc deleted file mode 100644 index d31b36996..000000000 --- a/src/xenia/gpu/spirv/compiler.cc +++ /dev/null @@ -1,36 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/spirv/compiler.h" - -namespace xe { -namespace gpu { -namespace spirv { - -Compiler::Compiler() {} - -void Compiler::AddPass(std::unique_ptr pass) { - compiler_passes_.push_back(std::move(pass)); -} - -bool Compiler::Compile(spv::Module* module) { - for (auto& pass : compiler_passes_) { - if (!pass->Run(module)) { - return false; - } - } - - return true; -} - -void Compiler::Reset() { compiler_passes_.clear(); } - -} // namespace spirv -} // namespace gpu -} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/spirv/compiler.h b/src/xenia/gpu/spirv/compiler.h deleted file mode 100644 index fd27969ee..000000000 --- a/src/xenia/gpu/spirv/compiler.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SPIRV_COMPILER_H_ -#define XENIA_GPU_SPIRV_COMPILER_H_ - -#include "xenia/base/arena.h" -#include "xenia/gpu/spirv/compiler_pass.h" - -#include "third_party/glslang-spirv/SpvBuilder.h" -#include "third_party/spirv/GLSL.std.450.hpp11" - -namespace xe { -namespace gpu { -namespace spirv { - -// SPIR-V Compiler. Designed to optimize SPIR-V code before feeding it into the -// drivers. -class Compiler { - public: - Compiler(); - - void AddPass(std::unique_ptr pass); - void Reset(); - bool Compile(spv::Module* module); - - private: - std::vector> compiler_passes_; -}; - -} // namespace spirv -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_SPIRV_COMPILER_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv/compiler_pass.h b/src/xenia/gpu/spirv/compiler_pass.h deleted file mode 100644 index 0d81aeeee..000000000 --- a/src/xenia/gpu/spirv/compiler_pass.h +++ /dev/null @@ -1,37 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SPIRV_COMPILER_PASS_H_ -#define XENIA_GPU_SPIRV_COMPILER_PASS_H_ - -#include "xenia/base/arena.h" - -#include "third_party/glslang-spirv/SpvBuilder.h" -#include "third_party/spirv/GLSL.std.450.hpp11" - -namespace xe { -namespace gpu { -namespace spirv { - -class CompilerPass { - public: - CompilerPass() = default; - virtual ~CompilerPass() {} - - virtual bool Run(spv::Module* module) = 0; - - private: - xe::Arena ir_arena_; -}; - -} // namespace spirv -} // namespace gpu -} // namespace xe - -#endif \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cc b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cc deleted file mode 100644 index 55506365a..000000000 --- a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cc +++ /dev/null @@ -1,30 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h" - -namespace xe { -namespace gpu { -namespace spirv { - -ControlFlowAnalysisPass::ControlFlowAnalysisPass() {} - -bool ControlFlowAnalysisPass::Run(spv::Module* module) { - for (auto function : module->getFunctions()) { - // For each OpBranchConditional, see if we can find a point where control - // flow converges and then append an OpSelectionMerge. - // Potential problems: while loops constructed from branch instructions - } - - return true; -} - -} // namespace spirv -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h deleted file mode 100644 index 6b279e251..000000000 --- a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ -#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ - -#include "xenia/gpu/spirv/compiler_pass.h" - -namespace xe { -namespace gpu { -namespace spirv { - -// Control-flow analysis pass. Runs through control-flow and adds merge opcodes -// where necessary. -class ControlFlowAnalysisPass : public CompilerPass { - public: - ControlFlowAnalysisPass(); - - bool Run(spv::Module* module) override; - - private: -}; - -} // namespace spirv -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc deleted file mode 100644 index d32997d47..000000000 --- a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc +++ /dev/null @@ -1,48 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h" - -namespace xe { -namespace gpu { -namespace spirv { - -ControlFlowSimplificationPass::ControlFlowSimplificationPass() {} - -bool ControlFlowSimplificationPass::Run(spv::Module* module) { - for (auto function : module->getFunctions()) { - // Walk through the blocks in the function and merge any blocks which are - // unconditionally dominated. - for (auto it = function->getBlocks().end() - 1; - it != function->getBlocks().begin();) { - auto block = *it; - if (!block->isUnreachable() && block->getPredecessors().size() == 1) { - auto prev_block = block->getPredecessors()[0]; - auto last_instr = - prev_block->getInstruction(prev_block->getInstructionCount() - 1); - if (last_instr->getOpCode() == spv::Op::OpBranch) { - if (prev_block->getSuccessors().size() == 1 && - prev_block->getSuccessors()[0] == block) { - // We're dominated by this block. Merge into it. - prev_block->merge(block); - block->setUnreachable(); - } - } - } - - --it; - } - } - - return true; -} - -} // namespace spirv -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h deleted file mode 100644 index f851d24f1..000000000 --- a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ -#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ - -#include "xenia/gpu/spirv/compiler_pass.h" - -namespace xe { -namespace gpu { -namespace spirv { - -// Control-flow simplification pass. Combines adjacent blocks and marks -// any unreachable blocks. -class ControlFlowSimplificationPass : public CompilerPass { - public: - ControlFlowSimplificationPass(); - - bool Run(spv::Module* module) override; - - private: -}; - -} // namespace spirv -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv_shader.cc b/src/xenia/gpu/spirv_shader.cc new file mode 100644 index 000000000..db3ebd0da --- /dev/null +++ b/src/xenia/gpu/spirv_shader.cc @@ -0,0 +1,30 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader.h" + +#include + +namespace xe { +namespace gpu { + +SpirvShader::SpirvShader(xenos::ShaderType shader_type, + uint64_t ucode_data_hash, const uint32_t* ucode_dwords, + size_t ucode_dword_count, + std::endian ucode_source_endian) + : Shader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count, + ucode_source_endian) {} + +Shader::Translation* SpirvShader::CreateTranslationInstance( + uint64_t modification) { + return new SpirvTranslation(*this, modification); +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/spirv_shader.h b/src/xenia/gpu/spirv_shader.h new file mode 100644 index 000000000..7eba372fa --- /dev/null +++ b/src/xenia/gpu/spirv_shader.h @@ -0,0 +1,81 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_SHADER_H_ +#define XENIA_GPU_SPIRV_SHADER_H_ + +#include +#include + +#include "xenia/gpu/shader.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/xenos.h" + +namespace xe { +namespace gpu { + +class SpirvShader : public Shader { + public: + class SpirvTranslation : public Translation { + public: + explicit SpirvTranslation(SpirvShader& shader, uint64_t modification) + : Translation(shader, modification) {} + }; + + explicit SpirvShader(xenos::ShaderType shader_type, uint64_t ucode_data_hash, + const uint32_t* ucode_dwords, size_t ucode_dword_count, + std::endian ucode_source_endian = std::endian::big); + + // Resource bindings are gathered after the successful translation of any + // modification for simplicity of translation (and they don't depend on + // modification bits). + + struct TextureBinding { + uint32_t fetch_constant : 5; + // Stacked and 3D are separate TextureBindings. + xenos::FetchOpDimension dimension : 2; + uint32_t is_signed : 1; + }; + // Safe to hash and compare with memcmp for layout hashing. + const std::vector& GetTextureBindingsAfterTranslation() + const { + return texture_bindings_; + } + const uint32_t GetUsedTextureMaskAfterTranslation() const { + return used_texture_mask_; + } + + struct SamplerBinding { + uint32_t fetch_constant : 5; + xenos::TextureFilter mag_filter : 2; + xenos::TextureFilter min_filter : 2; + xenos::TextureFilter mip_filter : 2; + xenos::AnisoFilter aniso_filter : 3; + }; + const std::vector& GetSamplerBindingsAfterTranslation() + const { + return sampler_bindings_; + } + + protected: + Translation* CreateTranslationInstance(uint64_t modification) override; + + private: + friend class SpirvShaderTranslator; + + std::atomic_flag bindings_setup_entered_ = ATOMIC_FLAG_INIT; + std::vector texture_bindings_; + std::vector sampler_bindings_; + uint32_t used_texture_mask_ = 0; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SPIRV_SHADER_H_ diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index cf1298e7d..cfbbd28e4 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,665 +10,645 @@ #include "xenia/gpu/spirv_shader_translator.h" #include -#include #include -#include +#include +#include +#include +#include #include -#include "third_party/fmt/include/fmt/format.h" -#include "xenia/base/cvar.h" -#include "xenia/base/logging.h" +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/assert.h" #include "xenia/base/math.h" - -DEFINE_bool(spv_validate, false, "Validate SPIR-V shaders after generation", - "GPU"); -DEFINE_bool(spv_disasm, false, "Disassemble SPIR-V shaders after generation", - "GPU"); +#include "xenia/gpu/spirv_shader.h" namespace xe { namespace gpu { -using namespace ucode; -constexpr uint32_t kMaxInterpolators = 16; -constexpr uint32_t kMaxTemporaryRegisters = 64; +SpirvShaderTranslator::Features::Features(bool all) + : spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0), + max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)), + clip_distance(all), + cull_distance(all), + image_view_format_swizzle(all), + signed_zero_inf_nan_preserve_float32(all), + denorm_flush_to_zero_float32(all) {} -using spv::GLSLstd450; -using spv::Id; -using spv::Op; +SpirvShaderTranslator::Features::Features( + const ui::vulkan::VulkanProvider& provider) + : max_storage_buffer_range( + provider.device_properties().limits.maxStorageBufferRange), + clip_distance(provider.device_features().shaderClipDistance), + cull_distance(provider.device_features().shaderCullDistance) { + uint32_t device_version = provider.device_properties().apiVersion; + const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions = + provider.device_extensions(); + if (device_version >= VK_MAKE_VERSION(1, 2, 0)) { + spirv_version = spv::Spv_1_5; + } else if (device_extensions.khr_spirv_1_4) { + spirv_version = spv::Spv_1_4; + } else if (device_version >= VK_MAKE_VERSION(1, 1, 0)) { + spirv_version = spv::Spv_1_3; + } else { + spirv_version = spv::Spv_1_0; + } + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features) { + image_view_format_swizzle = + bool(device_portability_subset_features->imageViewFormatSwizzle); + } else { + image_view_format_swizzle = true; + } + if (spirv_version >= spv::Spv_1_4 || + device_extensions.khr_shader_float_controls) { + const VkPhysicalDeviceFloatControlsPropertiesKHR& + float_controls_properties = provider.device_float_controls_properties(); + signed_zero_inf_nan_preserve_float32 = + bool(float_controls_properties.shaderSignedZeroInfNanPreserveFloat32); + denorm_flush_to_zero_float32 = + bool(float_controls_properties.shaderDenormFlushToZeroFloat32); + } else { + signed_zero_inf_nan_preserve_float32 = false; + denorm_flush_to_zero_float32 = false; + } +} -SpirvShaderTranslator::SpirvShaderTranslator() {} -SpirvShaderTranslator::~SpirvShaderTranslator() = default; +const std::string SpirvShaderTranslator::kInterpolatorNamePrefix = + "xe_interpolator_"; + +SpirvShaderTranslator::SpirvShaderTranslator(const Features& features) + : features_(features) {} + +uint64_t SpirvShaderTranslator::GetDefaultVertexShaderModification( + uint32_t dynamic_addressable_register_count, + Shader::HostVertexShaderType host_vertex_shader_type) const { + Modification shader_modification; + shader_modification.vertex.dynamic_addressable_register_count = + dynamic_addressable_register_count; + shader_modification.vertex.host_vertex_shader_type = host_vertex_shader_type; + return shader_modification.value; +} + +uint64_t SpirvShaderTranslator::GetDefaultPixelShaderModification( + uint32_t dynamic_addressable_register_count) const { + Modification shader_modification; + shader_modification.pixel.dynamic_addressable_register_count = + dynamic_addressable_register_count; + return shader_modification.value; +} + +void SpirvShaderTranslator::Reset() { + ShaderTranslator::Reset(); + + builder_.reset(); + + uniform_float_constants_ = spv::NoResult; + + input_fragment_coord_ = spv::NoResult; + input_front_facing_ = spv::NoResult; + + sampler_bindings_.clear(); + texture_bindings_.clear(); + + main_interface_.clear(); + var_main_registers_ = spv::NoResult; + + main_switch_op_.reset(); + main_switch_next_pc_phi_operands_.clear(); + + cf_exec_conditional_merge_ = nullptr; + cf_instruction_predicate_merge_ = nullptr; +} + +uint32_t SpirvShaderTranslator::GetModificationRegisterCount() const { + Modification modification = GetSpirvShaderModification(); + return is_vertex_shader() + ? modification.vertex.dynamic_addressable_register_count + : modification.pixel.dynamic_addressable_register_count; +} void SpirvShaderTranslator::StartTranslation() { - // Create a new builder. - builder_ = std::make_unique(0x10000, 0xFFFFFFFF, nullptr); - auto& b = *builder_; + // TODO(Triang3l): Logger. + builder_ = std::make_unique( + features_.spirv_version, (kSpirvMagicToolId << 16) | 1, nullptr); - // Import required modules. - glsl_std_450_instruction_set_ = b.import("GLSL.std.450"); - - // Configure environment. - b.setSource(spv::SourceLanguage::SourceLanguageUnknown, 0); - b.setMemoryModel(spv::AddressingModel::AddressingModelLogical, - spv::MemoryModel::MemoryModelGLSL450); - b.addCapability(spv::Capability::CapabilityShader); - b.addCapability(spv::Capability::CapabilityImageQuery); - - if (is_vertex_shader()) { - b.addCapability(spv::Capability::CapabilityClipDistance); - b.addCapability(spv::Capability::CapabilityCullDistance); + builder_->addCapability(IsSpirvTessEvalShader() ? spv::CapabilityTessellation + : spv::CapabilityShader); + if (features_.spirv_version < spv::Spv_1_4) { + if (features_.signed_zero_inf_nan_preserve_float32 || + features_.denorm_flush_to_zero_float32) { + builder_->addExtension("SPV_KHR_float_controls"); + } } - if (is_pixel_shader()) { - b.addCapability(spv::Capability::CapabilityDerivativeControl); + ext_inst_glsl_std_450_ = builder_->import("GLSL.std.450"); + builder_->setMemoryModel(spv::AddressingModelLogical, + spv::MemoryModelGLSL450); + builder_->setSource(spv::SourceLanguageUnknown, 0); + + type_void_ = builder_->makeVoidType(); + type_bool_ = builder_->makeBoolType(); + type_bool2_ = builder_->makeVectorType(type_bool_, 2); + type_bool3_ = builder_->makeVectorType(type_bool_, 3); + type_bool4_ = builder_->makeVectorType(type_bool_, 4); + type_int_ = builder_->makeIntType(32); + type_int2_ = builder_->makeVectorType(type_int_, 2); + type_int3_ = builder_->makeVectorType(type_int_, 3); + type_int4_ = builder_->makeVectorType(type_int_, 4); + type_uint_ = builder_->makeUintType(32); + type_uint2_ = builder_->makeVectorType(type_uint_, 2); + type_uint3_ = builder_->makeVectorType(type_uint_, 3); + type_uint4_ = builder_->makeVectorType(type_uint_, 4); + type_float_ = builder_->makeFloatType(32); + type_float2_ = builder_->makeVectorType(type_float_, 2); + type_float3_ = builder_->makeVectorType(type_float_, 3); + type_float4_ = builder_->makeVectorType(type_float_, 4); + + const_int_0_ = builder_->makeIntConstant(0); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp_.push_back(const_int_0_); + } + const_int4_0_ = builder_->makeCompositeConstant(type_int4_, id_vector_temp_); + const_uint_0_ = builder_->makeUintConstant(0); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp_.push_back(const_uint_0_); + } + const_uint4_0_ = + builder_->makeCompositeConstant(type_uint4_, id_vector_temp_); + const_float_0_ = builder_->makeFloatConstant(0.0f); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(const_float_0_); + for (uint32_t i = 1; i < 4; ++i) { + id_vector_temp_.push_back(const_float_0_); + const_float_vectors_0_[i] = builder_->makeCompositeConstant( + type_float_vectors_[i], id_vector_temp_); + } + const_float_1_ = builder_->makeFloatConstant(1.0f); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(const_float_1_); + for (uint32_t i = 1; i < 4; ++i) { + id_vector_temp_.push_back(const_float_1_); + const_float_vectors_1_[i] = builder_->makeCompositeConstant( + type_float_vectors_[i], id_vector_temp_); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(const_float_0_); + id_vector_temp_.push_back(const_float_1_); + const_float2_0_1_ = + builder_->makeCompositeConstant(type_float2_, id_vector_temp_); + + // Common uniform buffer - system constants. + struct SystemConstant { + const char* name; + size_t offset; + spv::Id type; + }; + spv::Id type_uint4_array_2 = builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4); + builder_->addDecoration(type_uint4_array_2, spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + spv::Id type_uint4_array_4 = builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(4), sizeof(uint32_t) * 4); + builder_->addDecoration(type_uint4_array_4, spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + const SystemConstant system_constants[] = { + {"flags", offsetof(SystemConstants, flags), type_uint_}, + {"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian), + type_uint_}, + {"vertex_base_index", offsetof(SystemConstants, vertex_base_index), + type_int_}, + {"ndc_scale", offsetof(SystemConstants, ndc_scale), type_float3_}, + {"ndc_offset", offsetof(SystemConstants, ndc_offset), type_float3_}, + {"texture_swizzled_signs", + offsetof(SystemConstants, texture_swizzled_signs), type_uint4_array_2}, + {"texture_swizzles", offsetof(SystemConstants, texture_swizzles), + type_uint4_array_4}, + {"alpha_test_reference", offsetof(SystemConstants, alpha_test_reference), + type_float_}, + {"color_exp_bias", offsetof(SystemConstants, color_exp_bias), + type_float4_}, + }; + id_vector_temp_.clear(); + id_vector_temp_.reserve(xe::countof(system_constants)); + for (size_t i = 0; i < xe::countof(system_constants); ++i) { + id_vector_temp_.push_back(system_constants[i].type); + } + spv::Id type_system_constants = + builder_->makeStructType(id_vector_temp_, "XeSystemConstants"); + for (size_t i = 0; i < xe::countof(system_constants); ++i) { + const SystemConstant& system_constant = system_constants[i]; + builder_->addMemberName(type_system_constants, static_cast(i), + system_constant.name); + builder_->addMemberDecoration( + type_system_constants, static_cast(i), + spv::DecorationOffset, int(system_constant.offset)); + } + builder_->addDecoration(type_system_constants, spv::DecorationBlock); + uniform_system_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_system_constants, + "xe_uniform_system_constants"); + builder_->addDecoration(uniform_system_constants_, + spv::DecorationDescriptorSet, + kDescriptorSetSystemConstants); + builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding, 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(uniform_system_constants_); } - bool_type_ = b.makeBoolType(); - float_type_ = b.makeFloatType(32); - int_type_ = b.makeIntType(32); - uint_type_ = b.makeUintType(32); - vec2_int_type_ = b.makeVectorType(int_type_, 2); - vec2_uint_type_ = b.makeVectorType(uint_type_, 2); - vec2_float_type_ = b.makeVectorType(float_type_, 2); - vec3_int_type_ = b.makeVectorType(int_type_, 3); - vec3_float_type_ = b.makeVectorType(float_type_, 3); - vec4_float_type_ = b.makeVectorType(float_type_, 4); - vec4_int_type_ = b.makeVectorType(int_type_, 4); - vec4_uint_type_ = b.makeVectorType(uint_type_, 4); - vec2_bool_type_ = b.makeVectorType(bool_type_, 2); - vec3_bool_type_ = b.makeVectorType(bool_type_, 3); - vec4_bool_type_ = b.makeVectorType(bool_type_, 4); + // Common uniform buffer - float constants. + uint32_t float_constant_count = + current_shader().constant_register_map().float_count; + if (float_constant_count) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeArrayType( + type_float4_, builder_->makeUintConstant(float_constant_count), + sizeof(float) * 4)); + // Currently (as of October 24, 2020) makeArrayType only uses the stride to + // check if deduplication can be done - the array stride decoration needs to + // be applied explicitly. + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(float) * 4); + spv::Id type_float_constants = + builder_->makeStructType(id_vector_temp_, "XeFloatConstants"); + builder_->addMemberName(type_float_constants, 0, "float_constants"); + builder_->addMemberDecoration(type_float_constants, 0, + spv::DecorationOffset, 0); + builder_->addDecoration(type_float_constants, spv::DecorationBlock); + uniform_float_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_float_constants, + "xe_uniform_float_constants"); + builder_->addDecoration( + uniform_float_constants_, spv::DecorationDescriptorSet, + int(is_pixel_shader() ? kDescriptorSetFloatConstantsPixel + : kDescriptorSetFloatConstantsVertex)); + builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding, + 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(uniform_float_constants_); + } + } - vec4_float_one_ = b.makeCompositeConstant( - vec4_float_type_, - std::vector({b.makeFloatConstant(1.f), b.makeFloatConstant(1.f), - b.makeFloatConstant(1.f), b.makeFloatConstant(1.f)})); - vec4_float_zero_ = b.makeCompositeConstant( - vec4_float_type_, - std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), - b.makeFloatConstant(0.f), b.makeFloatConstant(0.f)})); + // Common uniform buffer - bool and loop constants. + // Uniform buffers must have std140 packing, so using arrays of 4-component + // vectors instead of scalar arrays because the latter would have padding to + // 16 bytes in each element. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + // 256 bool constants. + id_vector_temp_.push_back(builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4)); + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + // 32 loop constants. + id_vector_temp_.push_back(builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(8), sizeof(uint32_t) * 4)); + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + spv::Id type_bool_loop_constants = + builder_->makeStructType(id_vector_temp_, "XeBoolLoopConstants"); + builder_->addMemberName(type_bool_loop_constants, 0, "bool_constants"); + builder_->addMemberDecoration(type_bool_loop_constants, 0, + spv::DecorationOffset, 0); + builder_->addMemberName(type_bool_loop_constants, 1, "loop_constants"); + builder_->addMemberDecoration(type_bool_loop_constants, 1, + spv::DecorationOffset, sizeof(uint32_t) * 8); + builder_->addDecoration(type_bool_loop_constants, spv::DecorationBlock); + uniform_bool_loop_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_bool_loop_constants, + "xe_uniform_bool_loop_constants"); + builder_->addDecoration(uniform_bool_loop_constants_, + spv::DecorationDescriptorSet, + int(kDescriptorSetBoolLoopConstants)); + builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding, + 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(uniform_bool_loop_constants_); + } - cube_function_ = CreateCubeFunction(); + // Common uniform buffer - fetch constants (32 x 6 uints packed in std140 as + // 4-component vectors). + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(32 * 6 / 4), + sizeof(uint32_t) * 4)); + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + spv::Id type_fetch_constants = + builder_->makeStructType(id_vector_temp_, "XeFetchConstants"); + builder_->addMemberName(type_fetch_constants, 0, "fetch_constants"); + builder_->addMemberDecoration(type_fetch_constants, 0, spv::DecorationOffset, + 0); + builder_->addDecoration(type_fetch_constants, spv::DecorationBlock); + uniform_fetch_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_fetch_constants, + "xe_uniform_fetch_constants"); + builder_->addDecoration(uniform_fetch_constants_, + spv::DecorationDescriptorSet, + int(kDescriptorSetFetchConstants)); + builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(uniform_fetch_constants_); + } - spv::Block* function_block = nullptr; - translated_main_ = - b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main", - {}, {}, &function_block); + // Common storage buffers - shared memory uint[], each 128 MB or larger, + // depending on what's possible on the device. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); + // Storage buffers have std430 packing, no padding to 4-component vectors. + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t)); + spv::Id type_shared_memory = + builder_->makeStructType(id_vector_temp_, "XeSharedMemory"); + builder_->addMemberName(type_shared_memory, 0, "shared_memory"); + // TODO(Triang3l): Make writable when memexport is implemented. + builder_->addMemberDecoration(type_shared_memory, 0, + spv::DecorationNonWritable); + builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset, + 0); + builder_->addDecoration(type_shared_memory, + features_.spirv_version >= spv::Spv_1_3 + ? spv::DecorationBlock + : spv::DecorationBufferBlock); + unsigned int shared_memory_binding_count = + 1 << GetSharedMemoryStorageBufferCountLog2(); + if (shared_memory_binding_count > 1) { + type_shared_memory = builder_->makeArrayType( + type_shared_memory, + builder_->makeUintConstant(shared_memory_binding_count), 0); + } + buffers_shared_memory_ = builder_->createVariable( + spv::NoPrecision, + features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer + : spv::StorageClassUniform, + type_shared_memory, "xe_shared_memory"); + builder_->addDecoration(buffers_shared_memory_, spv::DecorationDescriptorSet, + int(kDescriptorSetSharedMemoryAndEdram)); + builder_->addDecoration(buffers_shared_memory_, spv::DecorationBinding, 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(buffers_shared_memory_); + } - assert_not_zero(register_count()); - registers_type_ = b.makeArrayType(vec4_float_type_, - b.makeUintConstant(register_count()), 0); - registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction, - registers_type_, "r"); - - aL_ = b.createVariable(spv::StorageClass::StorageClassFunction, - vec4_uint_type_, "aL"); - - loop_count_ = b.createVariable(spv::StorageClass::StorageClassFunction, - vec4_uint_type_, "loop_count"); - p0_ = b.createVariable(spv::StorageClass::StorageClassFunction, bool_type_, - "p0"); - ps_ = b.createVariable(spv::StorageClass::StorageClassFunction, float_type_, - "ps"); - pv_ = b.createVariable(spv::StorageClass::StorageClassFunction, - vec4_float_type_, "pv"); - pc_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_, - "pc"); - a0_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_, - "a0"); - lod_ = b.createVariable(spv::StorageClass::StorageClassFunction, float_type_, - "lod"); - - // Uniform constants. - Id float_consts_type = - b.makeArrayType(vec4_float_type_, b.makeUintConstant(512), 1); - Id loop_consts_type = - b.makeArrayType(vec4_uint_type_, b.makeUintConstant(8), 1); - Id bool_consts_type = - b.makeArrayType(vec4_uint_type_, b.makeUintConstant(2), 1); - - // Strides - b.addDecoration(float_consts_type, spv::Decoration::DecorationArrayStride, - 4 * sizeof(float)); - b.addDecoration(loop_consts_type, spv::Decoration::DecorationArrayStride, - 4 * sizeof(uint32_t)); - b.addDecoration(bool_consts_type, spv::Decoration::DecorationArrayStride, - 4 * sizeof(uint32_t)); - - Id consts_struct_type = b.makeStructType( - {float_consts_type, loop_consts_type, bool_consts_type}, "consts_type"); - b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock); - - // Constants member decorations. - b.addMemberDecoration(consts_struct_type, 0, - spv::Decoration::DecorationOffset, 0); - b.addMemberName(consts_struct_type, 0, "float_consts"); - - b.addMemberDecoration(consts_struct_type, 1, - spv::Decoration::DecorationOffset, - 512 * 4 * sizeof(float)); - b.addMemberName(consts_struct_type, 1, "loop_consts"); - - b.addMemberDecoration(consts_struct_type, 2, - spv::Decoration::DecorationOffset, - 512 * 4 * sizeof(float) + 32 * sizeof(uint32_t)); - b.addMemberName(consts_struct_type, 2, "bool_consts"); - - consts_ = b.createVariable(spv::StorageClass::StorageClassUniform, - consts_struct_type, "consts"); - - b.addDecoration(consts_, spv::Decoration::DecorationDescriptorSet, 0); if (is_vertex_shader()) { - b.addDecoration(consts_, spv::Decoration::DecorationBinding, 0); + StartVertexOrTessEvalShaderBeforeMain(); } else if (is_pixel_shader()) { - b.addDecoration(consts_, spv::Decoration::DecorationBinding, 1); + StartFragmentShaderBeforeMain(); } - // Push constants, represented by SpirvPushConstants. - Id push_constants_type = - b.makeStructType({vec4_float_type_, vec4_float_type_, vec4_float_type_, - vec4_float_type_, vec4_float_type_, uint_type_}, - "push_consts_type"); - b.addDecoration(push_constants_type, spv::Decoration::DecorationBlock); + // Begin the main function. + std::vector main_param_types; + std::vector> main_precisions; + spv::Block* function_main_entry; + function_main_ = builder_->makeFunctionEntry( + spv::NoPrecision, type_void_, "main", main_param_types, main_precisions, + &function_main_entry); - // float4 window_scale; - b.addMemberDecoration( - push_constants_type, 0, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, window_scale))); - b.addMemberName(push_constants_type, 0, "window_scale"); - // float4 vtx_fmt; - b.addMemberDecoration( - push_constants_type, 1, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, vtx_fmt))); - b.addMemberName(push_constants_type, 1, "vtx_fmt"); - // float4 vtx_fmt; - b.addMemberDecoration( - push_constants_type, 2, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, point_size))); - b.addMemberName(push_constants_type, 2, "point_size"); - // float4 alpha_test; - b.addMemberDecoration( - push_constants_type, 3, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, alpha_test))); - b.addMemberName(push_constants_type, 3, "alpha_test"); - // float4 color_exp_bias; - b.addMemberDecoration( - push_constants_type, 4, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, color_exp_bias))); - b.addMemberName(push_constants_type, 4, "color_exp_bias"); - // uint ps_param_gen; - b.addMemberDecoration( - push_constants_type, 5, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, ps_param_gen))); - b.addMemberName(push_constants_type, 5, "ps_param_gen"); - push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant, - push_constants_type, "push_consts"); + // Load the flags system constant since it may be used in many places. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); + main_system_constant_flags_ = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); - const std::vector& texture_bindings = - current_shader().texture_bindings(); - if (!texture_bindings.empty()) { - image_2d_type_ = - b.makeImageType(float_type_, spv::Dim::Dim2D, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown); - image_3d_type_ = - b.makeImageType(float_type_, spv::Dim::Dim3D, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown); - image_cube_type_ = - b.makeImageType(float_type_, spv::Dim::DimCube, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown); - - // Texture bindings - Id tex_t[] = {b.makeSampledImageType(image_2d_type_), - b.makeSampledImageType(image_3d_type_), - b.makeSampledImageType(image_cube_type_)}; - - uint32_t num_tex_bindings = 0; - for (const auto& binding : texture_bindings) { - // Calculate the highest binding index. - num_tex_bindings = - std::max(num_tex_bindings, uint32_t(binding.binding_index + 1)); - } - - Id tex_a_t[] = { - b.makeArrayType(tex_t[0], b.makeUintConstant(num_tex_bindings), 0), - b.makeArrayType(tex_t[1], b.makeUintConstant(num_tex_bindings), 0), - b.makeArrayType(tex_t[2], b.makeUintConstant(num_tex_bindings), 0)}; - - // Create 3 texture types, all aliased on the same binding - for (int i = 0; i < 3; i++) { - tex_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant, - tex_a_t[i], - fmt::format("textures{}D", i + 2).c_str()); - b.addDecoration(tex_[i], spv::Decoration::DecorationDescriptorSet, 1); - b.addDecoration(tex_[i], spv::Decoration::DecorationBinding, 0); - } - - // Set up the map from binding -> ssbo index - for (const auto& binding : texture_bindings) { - tex_binding_map_[binding.fetch_constant] = - uint32_t(binding.binding_index); + // Begin ucode translation. Initialize everything, even without defined + // defaults, for safety. + var_main_predicate_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_bool_, + "xe_var_predicate", builder_->makeBoolConstant(false)); + var_main_loop_count_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_uint4_, + "xe_var_loop_count", const_uint4_0_); + var_main_address_register_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_int_, + "xe_var_address_register", const_int_0_); + var_main_loop_address_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_int4_, + "xe_var_loop_address", const_int4_0_); + var_main_previous_scalar_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float_, + "xe_var_previous_scalar", const_float_0_); + var_main_vfetch_address_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_int_, + "xe_var_vfetch_address", const_int_0_); + var_main_tfetch_lod_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float_, + "xe_var_tfetch_lod", const_float_0_); + var_main_tfetch_gradients_h_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float3_, + "xe_var_tfetch_gradients_h", const_float3_0_); + var_main_tfetch_gradients_v_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float3_, + "xe_var_tfetch_gradients_v", const_float3_0_); + uint32_t register_array_size = register_count(); + if (register_array_size) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(register_array_size); + // TODO(Triang3l): In PS, only need to initialize starting from the + // interpolators, probably manually. But likely not very important - the + // compiler in the driver will likely eliminate that write. + for (uint32_t i = 0; i < register_array_size; ++i) { + id_vector_temp_.push_back(const_float4_0_); } + spv::Id type_register_array = builder_->makeArrayType( + type_float4_, builder_->makeUintConstant(register_array_size), 0); + var_main_registers_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_register_array, + "xe_var_registers", + builder_->makeCompositeConstant(type_register_array, id_vector_temp_)); } - // Interpolators. - Id interpolators_type = b.makeArrayType( - vec4_float_type_, b.makeUintConstant(kMaxInterpolators), 0); + // Write the execution model-specific prologue with access to variables in the + // main function. if (is_vertex_shader()) { - // Vertex inputs/outputs - // Inputs: 32 SSBOs on DS 2 binding 0 - - const std::vector& vertex_bindings = - current_shader().vertex_bindings(); - if (!vertex_bindings.empty()) { - // Runtime array for vertex data - Id vtx_t = b.makeRuntimeArray(uint_type_); - b.addDecoration(vtx_t, spv::Decoration::DecorationArrayStride, - sizeof(uint32_t)); - - Id vtx_s = b.makeStructType({vtx_t}, "vertex_type"); - b.addDecoration(vtx_s, spv::Decoration::DecorationBufferBlock); - - // Describe the actual data - b.addMemberName(vtx_s, 0, "data"); - b.addMemberDecoration(vtx_s, 0, spv::Decoration::DecorationOffset, 0); - - // Create the vertex bindings variable. - Id vtx_a_t = b.makeArrayType( - vtx_s, b.makeUintConstant(uint32_t(vertex_bindings.size())), 0); - vtx_ = b.createVariable(spv::StorageClass::StorageClassUniform, vtx_a_t, - "vertex_bindings"); - - // DS 2 binding 0 - b.addDecoration(vtx_, spv::Decoration::DecorationDescriptorSet, 2); - b.addDecoration(vtx_, spv::Decoration::DecorationBinding, 0); - b.addDecoration(vtx_, spv::Decoration::DecorationNonWritable); - - // Set up the map from binding -> ssbo index - for (const auto& binding : vertex_bindings) { - vtx_binding_map_[binding.fetch_constant] = binding.binding_index; - } - } - - // Outputs - interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput, - interpolators_type, "interpolators"); - b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); - for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators); - i++) { - // Zero interpolators. - auto ptr = b.createAccessChain(spv::StorageClass::StorageClassOutput, - interpolators_, - std::vector({b.makeUintConstant(i)})); - b.createStore(vec4_float_zero_, ptr); - } - - point_size_ = b.createVariable(spv::StorageClass::StorageClassOutput, - float_type_, "point_size"); - b.addDecoration(point_size_, spv::Decoration::DecorationLocation, 17); - // Set default point-size value (-1.0f, indicating to the geometry shader - // that the register value should be used instead of the per-vertex value) - b.createStore(b.makeFloatConstant(-1.0f), point_size_); - - point_coord_ = b.createVariable(spv::StorageClass::StorageClassOutput, - vec2_float_type_, "point_coord"); - b.addDecoration(point_coord_, spv::Decoration::DecorationLocation, 16); - // point_coord is only ever populated in a geometry shader. Just write - // zero to it in the vertex shader. - b.createStore( - b.makeCompositeConstant(vec2_float_type_, - std::vector({b.makeFloatConstant(0.0f), - b.makeFloatConstant(0.0f)})), - point_coord_); - - pos_ = b.createVariable(spv::StorageClass::StorageClassOutput, - vec4_float_type_, "gl_Position"); - b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn, - spv::BuiltIn::BuiltInPosition); - - vertex_idx_ = b.createVariable(spv::StorageClass::StorageClassInput, - int_type_, "gl_VertexIndex"); - b.addDecoration(vertex_idx_, spv::Decoration::DecorationBuiltIn, - spv::BuiltIn::BuiltInVertexIndex); - - interface_ids_.push_back(interpolators_); - interface_ids_.push_back(point_coord_); - interface_ids_.push_back(point_size_); - interface_ids_.push_back(pos_); - interface_ids_.push_back(vertex_idx_); - - auto vertex_idx = b.createLoad(vertex_idx_); - vertex_idx = - b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, vertex_idx); - auto r0_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction, - registers_ptr_, - std::vector({b.makeUintConstant(0)})); - auto r0 = b.createLoad(r0_ptr); - r0 = b.createCompositeInsert(vertex_idx, r0, vec4_float_type_, 0); - b.createStore(r0, r0_ptr); - } else { - // Pixel inputs from vertex shader. - interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput, - interpolators_type, "interpolators"); - b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); - - point_coord_ = b.createVariable(spv::StorageClass::StorageClassInput, - vec2_float_type_, "point_coord"); - b.addDecoration(point_coord_, spv::Decoration::DecorationLocation, 16); - - // Pixel fragment outputs (one per render target). - Id frag_outputs_type = - b.makeArrayType(vec4_float_type_, b.makeUintConstant(4), 0); - frag_outputs_ = b.createVariable(spv::StorageClass::StorageClassOutput, - frag_outputs_type, "oC"); - b.addDecoration(frag_outputs_, spv::Decoration::DecorationLocation, 0); - - frag_depth_ = b.createVariable(spv::StorageClass::StorageClassOutput, - float_type_, "gl_FragDepth"); - b.addDecoration(frag_depth_, spv::Decoration::DecorationBuiltIn, - spv::BuiltIn::BuiltInFragDepth); - - interface_ids_.push_back(interpolators_); - interface_ids_.push_back(point_coord_); - interface_ids_.push_back(frag_outputs_); - interface_ids_.push_back(frag_depth_); - // TODO(benvanik): frag depth, etc. - - // TODO(DrChat): Verify this naive, stupid approach to uninitialized values. - for (uint32_t i = 0; i < 4; i++) { - auto idx = b.makeUintConstant(i); - auto oC = b.createAccessChain(spv::StorageClass::StorageClassOutput, - frag_outputs_, std::vector({idx})); - b.createStore(vec4_float_zero_, oC); - } - - // Copy interpolators to r[0..16]. - // TODO: Need physical addressing in order to do this. - // b.createNoResultOp(spv::Op::OpCopyMemorySized, - // {registers_ptr_, interpolators_, - // b.makeUintConstant(16 * 4 * sizeof(float))}); - for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators); - i++) { - // For now, copy interpolators register-by-register :/ - auto idx = b.makeUintConstant(i); - auto i_a = b.createAccessChain(spv::StorageClass::StorageClassInput, - interpolators_, std::vector({idx})); - auto r_a = b.createAccessChain(spv::StorageClass::StorageClassFunction, - registers_ptr_, std::vector({idx})); - b.createNoResultOp(spv::Op::OpCopyMemory, std::vector({r_a, i_a})); - } - - // Setup ps_param_gen - auto ps_param_gen_idx_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(5)})); - auto ps_param_gen_idx = b.createLoad(ps_param_gen_idx_ptr); - - auto frag_coord = b.createVariable(spv::StorageClass::StorageClassInput, - vec4_float_type_, "gl_FragCoord"); - b.addDecoration(frag_coord, spv::Decoration::DecorationBuiltIn, - spv::BuiltIn::BuiltInFragCoord); - - interface_ids_.push_back(frag_coord); - - auto param = b.createOp( - spv::Op::OpVectorShuffle, vec4_float_type_, - {b.createLoad(frag_coord), b.createLoad(point_coord_), 0, 1, 4, 5}); - /* - // TODO: gl_FrontFacing - auto param_x = b.createCompositeExtract(param, float_type_, 0); - auto param_x_inv = b.createBinOp(spv::Op::OpFMul, float_type_, param_x, - b.makeFloatConstant(-1.f)); - param_x = b.createCompositeInsert(param_x_inv, param, vec4_float_type_, 0); - */ - - auto cond = b.createBinOp(spv::Op::OpINotEqual, bool_type_, - ps_param_gen_idx, b.makeUintConstant(-1)); - spv::Builder::If ifb(cond, 0, b); - - // FYI: We do this instead of r[ps_param_gen_idx] because that causes - // nvidia to move all registers into local memory (slow!) - for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators); - i++) { - auto reg_ptr = b.createAccessChain( - spv::StorageClass::StorageClassFunction, registers_ptr_, - std::vector({b.makeUintConstant(i)})); - - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, ps_param_gen_idx, - b.makeUintConstant(i)); - cond = b.smearScalar(spv::NoPrecision, cond, vec4_bool_type_); - auto reg = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, cond, param, - b.createLoad(reg_ptr)); - b.createStore(reg, reg_ptr); - } - - ifb.makeEndIf(); + StartVertexOrTessEvalShaderInMain(); + } else if (is_pixel_shader()) { + StartFragmentShaderInMain(); } - b.createStore(b.makeIntConstant(0x0), pc_); + // Open the main loop. + spv::Block& main_loop_pre_header = *builder_->getBuildPoint(); + main_loop_header_ = &builder_->makeNewBlock(); + spv::Block& main_loop_body = builder_->makeNewBlock(); + // Added later because the body has nested control flow, but according to the + // specification: + // "The order of blocks in a function must satisfy the rule that blocks appear + // before all blocks they dominate." + main_loop_continue_ = + new spv::Block(builder_->getUniqueId(), *function_main_); + main_loop_merge_ = new spv::Block(builder_->getUniqueId(), *function_main_); + builder_->createBranch(main_loop_header_); - loop_head_block_ = &b.makeNewBlock(); - auto block = &b.makeNewBlock(); - loop_body_block_ = &b.makeNewBlock(); - loop_cont_block_ = &b.makeNewBlock(); - loop_exit_block_ = &b.makeNewBlock(); - b.createBranch(loop_head_block_); + // If no jumps, don't create a switch, but still create a loop so exece can + // break. + bool has_main_switch = !current_shader().label_addresses().empty(); - // Setup continue block - b.setBuildPoint(loop_cont_block_); - b.createBranch(loop_head_block_); + // Main loop header - based on whether it's the first iteration (entered from + // the function or from the continuation), choose the program counter. + builder_->setBuildPoint(main_loop_header_); + spv::Id main_loop_pc_current = spv::NoResult; + if (has_main_switch) { + // OpPhi must be the first in the block. + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(main_loop_pre_header.getId()); + main_loop_pc_next_ = builder_->getUniqueId(); + id_vector_temp_.push_back(main_loop_pc_next_); + id_vector_temp_.push_back(main_loop_continue_->getId()); + main_loop_pc_current = + builder_->createOp(spv::OpPhi, type_int_, id_vector_temp_); + } + uint_vector_temp_.clear(); + builder_->createLoopMerge(main_loop_merge_, main_loop_continue_, + spv::LoopControlDontUnrollMask, uint_vector_temp_); + builder_->createBranch(&main_loop_body); - // While loop header block - b.setBuildPoint(loop_head_block_); - b.createLoopMerge(loop_exit_block_, loop_cont_block_, - spv::LoopControlMask::LoopControlDontUnrollMask, 0); - b.createBranch(block); - - // Condition block - b.setBuildPoint(block); - - // while (pc != 0xFFFF) - auto c = b.createBinOp(spv::Op::OpINotEqual, bool_type_, b.createLoad(pc_), - b.makeIntConstant(0xFFFF)); - b.createConditionalBranch(c, loop_body_block_, loop_exit_block_); - b.setBuildPoint(loop_body_block_); + // Main loop body. + builder_->setBuildPoint(&main_loop_body); + if (has_main_switch) { + // Create the program counter switch with cases for every label and for + // label 0. + main_switch_header_ = builder_->getBuildPoint(); + main_switch_merge_ = + new spv::Block(builder_->getUniqueId(), *function_main_); + SpirvCreateSelectionMerge(main_switch_merge_->getId(), + spv::SelectionControlDontFlattenMask); + main_switch_op_ = std::make_unique(spv::OpSwitch); + main_switch_op_->addIdOperand(main_loop_pc_current); + main_switch_op_->addIdOperand(main_switch_merge_->getId()); + // The default case (the merge here) must have the header as a predecessor. + main_switch_merge_->addPredecessor(main_switch_header_); + // The instruction will be inserted later, when all cases are filled. + // Insert and enter case 0. + spv::Block* main_switch_case_0_block = + new spv::Block(builder_->getUniqueId(), *function_main_); + main_switch_op_->addImmediateOperand(0); + main_switch_op_->addIdOperand(main_switch_case_0_block->getId()); + // Every switch case must have the OpSelectionMerge/OpSwitch block as a + // predecessor. + main_switch_case_0_block->addPredecessor(main_switch_header_); + function_main_->addBlock(main_switch_case_0_block); + builder_->setBuildPoint(main_switch_case_0_block); + } } std::vector SpirvShaderTranslator::CompleteTranslation() { - auto& b = *builder_; + // Close flow control within the last switch case. + CloseExecConditionals(); + bool has_main_switch = !current_shader().label_addresses().empty(); + // After the final exec (if it happened to be not exece, which would already + // have a break branch), break from the switch if it exists, or from the + // loop it doesn't. + if (!builder_->getBuildPoint()->isTerminated()) { + builder_->createBranch(has_main_switch ? main_switch_merge_ + : main_loop_merge_); + } + if (has_main_switch) { + // Insert the switch instruction with all cases added as operands. + builder_->setBuildPoint(main_switch_header_); + builder_->getBuildPoint()->addInstruction(std::move(main_switch_op_)); + // Build the main switch merge, breaking out of the loop after falling + // through the end or breaking from exece (only continuing if a jump - from + // a guest loop or from jmp/call - was made). + function_main_->addBlock(main_switch_merge_); + builder_->setBuildPoint(main_switch_merge_); + builder_->createBranch(main_loop_merge_); + } - assert_false(open_predicated_block_); - b.setBuildPoint(loop_exit_block_); - b.makeReturn(false); - exec_cond_ = false; - exec_skip_block_ = nullptr; + // Main loop continuation - choose the program counter based on the path + // taken (-1 if not from a jump as a safe fallback, which would result in not + // hitting any switch case and reaching the final break in the body). + function_main_->addBlock(main_loop_continue_); + builder_->setBuildPoint(main_loop_continue_); + if (has_main_switch) { + // OpPhi, if added, must be the first in the block. + // If labels were added, but not jumps (for example, due to the call + // instruction not being implemented as of October 18, 2020), send an + // impossible program counter value (-1) to the OpPhi at the next iteration. + if (main_switch_next_pc_phi_operands_.empty()) { + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(-1)); + } + std::unique_ptr main_loop_pc_next_op = + std::make_unique( + main_loop_pc_next_, type_int_, + main_switch_next_pc_phi_operands_.size() >= 2 ? spv::OpPhi + : spv::OpCopyObject); + for (spv::Id operand : main_switch_next_pc_phi_operands_) { + main_loop_pc_next_op->addIdOperand(operand); + } + builder_->getBuildPoint()->addInstruction(std::move(main_loop_pc_next_op)); + } + builder_->createBranch(main_loop_header_); + + // Add the main loop merge block and go back to the function. + function_main_->addBlock(main_loop_merge_); + builder_->setBuildPoint(main_loop_merge_); - // main() entry point. - spv::Block* entry_block; - auto mainFn = b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "main", - {}, {}, &entry_block); if (is_vertex_shader()) { - auto entry = b.addEntryPoint(spv::ExecutionModel::ExecutionModelVertex, - mainFn, "main"); + CompleteVertexOrTessEvalShaderInMain(); + } else if (is_pixel_shader()) { + CompleteFragmentShaderInMain(); + } - for (auto id : interface_ids_) { - entry->addIdOperand(id); + // End the main function. + builder_->leaveFunction(); + + // Make the main function the entry point. + spv::ExecutionModel execution_model; + if (is_pixel_shader()) { + execution_model = spv::ExecutionModelFragment; + builder_->addExecutionMode(function_main_, + spv::ExecutionModeOriginUpperLeft); + if (IsExecutionModeEarlyFragmentTests()) { + builder_->addExecutionMode(function_main_, + spv::ExecutionModeEarlyFragmentTests); } } else { - auto entry = b.addEntryPoint(spv::ExecutionModel::ExecutionModelFragment, - mainFn, "main"); - b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft); - - // If we write a new depth value, we must declare this mode! - if (current_shader().writes_depth()) { - b.addExecutionMode(mainFn, spv::ExecutionModeDepthReplacing); - } - - for (auto id : interface_ids_) { - entry->addIdOperand(id); - } + assert_true(is_vertex_shader()); + execution_model = IsSpirvTessEvalShader() + ? spv::ExecutionModelTessellationEvaluation + : spv::ExecutionModelVertex; + } + if (features_.denorm_flush_to_zero_float32) { + // Flush to zero, similar to the real hardware, also for things like Shader + // Model 3 multiplication emulation. + builder_->addCapability(spv::CapabilityDenormFlushToZero); + builder_->addExecutionMode(function_main_, + spv::ExecutionModeDenormFlushToZero, 32); + } + if (features_.signed_zero_inf_nan_preserve_float32) { + // Signed zero used to get VFACE from ps_param_gen, also special behavior + // for infinity in certain instructions (such as logarithm, reciprocal, + // muls_prev2). + builder_->addCapability(spv::CapabilitySignedZeroInfNanPreserve); + builder_->addExecutionMode(function_main_, + spv::ExecutionModeSignedZeroInfNanPreserve, 32); + } + spv::Instruction* entry_point = + builder_->addEntryPoint(execution_model, function_main_, "main"); + for (spv::Id interface_id : main_interface_) { + entry_point->addIdOperand(interface_id); } - // TODO(benvanik): transform feedback. - if (false) { - b.addCapability(spv::Capability::CapabilityTransformFeedback); - b.addExecutionMode(mainFn, spv::ExecutionMode::ExecutionModeXfb); - } - - b.createFunctionCall(translated_main_, std::vector({})); - if (is_vertex_shader()) { - // gl_Position transform - auto vtx_fmt_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(1)})); - auto window_scale_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(0)})); - auto vtx_fmt = b.createLoad(vtx_fmt_ptr); - auto window_scale = b.createLoad(window_scale_ptr); - - auto p = b.createLoad(pos_); - auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, vtx_fmt, - vec4_float_zero_); - - // pos.w = vtx_fmt.w == 0.0 ? 1.0 / pos.w : pos.w - auto c_w = b.createCompositeExtract(c, bool_type_, 3); - auto p_w = b.createCompositeExtract(p, float_type_, 3); - auto p_w_inv = b.createBinOp(spv::Op::OpFDiv, float_type_, - b.makeFloatConstant(1.f), p_w); - p_w = b.createTriOp(spv::Op::OpSelect, float_type_, c_w, p_w, p_w_inv); - - // pos.xyz = vtx_fmt.xyz != 0.0 ? pos.xyz / pos.w : pos.xyz - auto p_all_w = b.smearScalar(spv::NoPrecision, p_w, vec4_float_type_); - auto p_inv = b.createBinOp(spv::Op::OpFDiv, vec4_float_type_, p, p_all_w); - p = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, p_inv, p); - - // Reinsert w - p = b.createCompositeInsert(p_w, p, vec4_float_type_, 3); - - // Apply window offset - // pos.xy += window_scale.zw - auto window_offset = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, - {window_scale, window_scale, 2, 3, 0, 1}); - auto p_offset = - b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, p, window_offset); - - // Apply window scaling - // pos.xy *= window_scale.xy - auto p_scaled = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, p_offset, - window_scale); - - p = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, - {p, p_scaled, 4, 5, 2, 3}); - - b.createStore(p, pos_); - } else { - // Color exponent bias - { - auto bias_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(4)})); - auto bias = b.createLoad(bias_ptr); - for (uint32_t i = 0; i < 4; i++) { - auto bias_value = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, - {bias, bias, i, i, i, i}); - auto oC_ptr = b.createAccessChain( - spv::StorageClass::StorageClassOutput, frag_outputs_, - std::vector({b.makeUintConstant(i)})); - auto oC_biased = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, - b.createLoad(oC_ptr), bias_value); - b.createStore(oC_biased, oC_ptr); - } - } - - // Alpha test - { - auto alpha_test_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(3)})); - auto alpha_test = b.createLoad(alpha_test_ptr); - - auto alpha_test_enabled = - b.createCompositeExtract(alpha_test, float_type_, 0); - auto alpha_test_func = - b.createCompositeExtract(alpha_test, float_type_, 1); - auto alpha_test_ref = - b.createCompositeExtract(alpha_test, float_type_, 2); - - alpha_test_func = - b.createUnaryOp(spv::Op::OpConvertFToU, uint_type_, alpha_test_func); - - auto oC0_ptr = b.createAccessChain( - spv::StorageClass::StorageClassOutput, frag_outputs_, - std::vector({b.makeUintConstant(0)})); - auto oC0_alpha = - b.createCompositeExtract(b.createLoad(oC0_ptr), float_type_, 3); - - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, - alpha_test_enabled, b.makeFloatConstant(1.f)); - spv::Builder::If alpha_if(cond, 0, b); - - std::vector switch_segments; - b.makeSwitch( - alpha_test_func, 0, 8, std::vector({0, 1, 2, 3, 4, 5, 6, 7}), - std::vector({0, 1, 2, 3, 4, 5, 6, 7}), 7, switch_segments); - - const static spv::Op alpha_op_map[] = { - spv::Op::OpNop, - spv::Op::OpFOrdGreaterThanEqual, - spv::Op::OpFOrdNotEqual, - spv::Op::OpFOrdGreaterThan, - spv::Op::OpFOrdLessThanEqual, - spv::Op::OpFOrdEqual, - spv::Op::OpFOrdLessThan, - spv::Op::OpNop, - }; - - // if (alpha_func == 0) passes = false; - b.nextSwitchSegment(switch_segments, 0); - b.makeDiscard(); - b.addSwitchBreak(); - - for (int i = 1; i < 7; i++) { - b.nextSwitchSegment(switch_segments, i); - auto cond = b.createBinOp(alpha_op_map[i], bool_type_, oC0_alpha, - alpha_test_ref); - spv::Builder::If discard_if(cond, 0, b); - b.makeDiscard(); - discard_if.makeEndIf(); - b.addSwitchBreak(); - } - - // if (alpha_func == 7) passes = true; - b.nextSwitchSegment(switch_segments, 7); - b.endSwitch(switch_segments); - - alpha_if.makeEndIf(); - } - } - - b.makeReturn(false); - - // Compile the spv IR - // compiler_.Compile(b.getModule()); - - std::vector spirv_words; - b.dump(spirv_words); - - // Cleanup builder. - cf_blocks_.clear(); - loop_head_block_ = nullptr; - loop_body_block_ = nullptr; - loop_cont_block_ = nullptr; - loop_exit_block_ = nullptr; - builder_.reset(); - - interface_ids_.clear(); - - // Copy bytes out. - // TODO(benvanik): avoid copy? - std::vector spirv_bytes; - spirv_bytes.resize(spirv_words.size() * 4); - std::memcpy(spirv_bytes.data(), spirv_words.data(), spirv_bytes.size()); - return spirv_bytes; + // TODO(Triang3l): Avoid copy? + std::vector module_uints; + builder_->dump(module_uints); + std::vector module_bytes; + module_bytes.reserve(sizeof(unsigned int) * module_uints.size()); + module_bytes.insert(module_bytes.cend(), + reinterpret_cast(module_uints.data()), + reinterpret_cast(module_uints.data()) + + sizeof(unsigned int) * module_uints.size()); + return module_bytes; } void SpirvShaderTranslator::PostTranslation() { @@ -676,2806 +656,1806 @@ void SpirvShaderTranslator::PostTranslation() { if (!translation.is_valid()) { return; } - - // Validation. - if (cvars::spv_validate) { - auto validation = validator_.Validate( - reinterpret_cast( - translation.translated_binary().data()), - translation.translated_binary().size() / sizeof(uint32_t)); - if (validation->has_error()) { - XELOGE("SPIR-V Shader Validation failed! Error: {}", - validation->error_string()); + SpirvShader* spirv_shader = dynamic_cast(&translation.shader()); + if (spirv_shader && !spirv_shader->bindings_setup_entered_.test_and_set( + std::memory_order_relaxed)) { + spirv_shader->texture_bindings_.clear(); + spirv_shader->texture_bindings_.reserve(texture_bindings_.size()); + for (const TextureBinding& translator_binding : texture_bindings_) { + SpirvShader::TextureBinding& shader_binding = + spirv_shader->texture_bindings_.emplace_back(); + // For a stable hash. + std::memset(&shader_binding, 0, sizeof(shader_binding)); + shader_binding.fetch_constant = translator_binding.fetch_constant; + shader_binding.dimension = translator_binding.dimension; + shader_binding.is_signed = translator_binding.is_signed; + spirv_shader->used_texture_mask_ |= UINT32_C(1) + << translator_binding.fetch_constant; + } + spirv_shader->sampler_bindings_.clear(); + spirv_shader->sampler_bindings_.reserve(sampler_bindings_.size()); + for (const SamplerBinding& translator_binding : sampler_bindings_) { + SpirvShader::SamplerBinding& shader_binding = + spirv_shader->sampler_bindings_.emplace_back(); + shader_binding.fetch_constant = translator_binding.fetch_constant; + shader_binding.mag_filter = translator_binding.mag_filter; + shader_binding.min_filter = translator_binding.min_filter; + shader_binding.mip_filter = translator_binding.mip_filter; + shader_binding.aniso_filter = translator_binding.aniso_filter; } } - - if (cvars::spv_disasm) { - // TODO(benvanik): only if needed? could be slowish. - auto disasm = disassembler_.Disassemble( - reinterpret_cast( - translation.translated_binary().data()), - translation.translated_binary().size() / sizeof(uint32_t)); - if (disasm->has_error()) { - XELOGE("Failed to disassemble SPIRV - invalid?"); - } else { - set_host_disassembly(translation, disasm->to_string()); - } - } -} - -void SpirvShaderTranslator::PreProcessControlFlowInstructions( - std::vector instrs) { - auto& b = *builder_; - - auto default_block = &b.makeNewBlock(); - switch_break_block_ = &b.makeNewBlock(); - - b.setBuildPoint(default_block); - b.createStore(b.makeIntConstant(0xFFFF), pc_); - b.createBranch(switch_break_block_); - - b.setBuildPoint(switch_break_block_); - b.createBranch(loop_cont_block_); - - // Now setup the switch. - default_block->addPredecessor(loop_body_block_); - b.setBuildPoint(loop_body_block_); - - cf_blocks_.resize(instrs.size()); - for (size_t i = 0; i < cf_blocks_.size(); i++) { - cf_blocks_[i].block = &b.makeNewBlock(); - cf_blocks_[i].labelled = false; - } - - std::vector operands; - operands.push_back(b.createLoad(pc_)); // Selector - operands.push_back(default_block->getId()); // Default - - // Always have a case for block 0. - operands.push_back(0); - operands.push_back(cf_blocks_[0].block->getId()); - cf_blocks_[0].block->addPredecessor(loop_body_block_); - cf_blocks_[0].labelled = true; - - for (size_t i = 0; i < instrs.size(); i++) { - auto& instr = instrs[i]; - if (instr.opcode() == ucode::ControlFlowOpcode::kCondJmp) { - uint32_t address = instr.cond_jmp.address(); - - if (!cf_blocks_[address].labelled) { - cf_blocks_[address].labelled = true; - operands.push_back(address); - operands.push_back(cf_blocks_[address].block->getId()); - cf_blocks_[address].block->addPredecessor(loop_body_block_); - } - - if (!cf_blocks_[i + 1].labelled) { - cf_blocks_[i + 1].labelled = true; - operands.push_back(uint32_t(i + 1)); - operands.push_back(cf_blocks_[i + 1].block->getId()); - cf_blocks_[i + 1].block->addPredecessor(loop_body_block_); - } - } else if (instr.opcode() == ucode::ControlFlowOpcode::kLoopStart) { - uint32_t address = instr.loop_start.address(); - - // Label the body - if (!cf_blocks_[i + 1].labelled) { - cf_blocks_[i + 1].labelled = true; - operands.push_back(uint32_t(i + 1)); - operands.push_back(cf_blocks_[i + 1].block->getId()); - cf_blocks_[i + 1].block->addPredecessor(loop_body_block_); - } - - // Label the loop skip address. - if (!cf_blocks_[address].labelled) { - cf_blocks_[address].labelled = true; - operands.push_back(address); - operands.push_back(cf_blocks_[address].block->getId()); - cf_blocks_[address].block->addPredecessor(loop_body_block_); - } - } else if (instr.opcode() == ucode::ControlFlowOpcode::kLoopEnd) { - uint32_t address = instr.loop_end.address(); - - if (!cf_blocks_[address].labelled) { - cf_blocks_[address].labelled = true; - operands.push_back(address); - operands.push_back(cf_blocks_[address].block->getId()); - cf_blocks_[address].block->addPredecessor(loop_body_block_); - } - } - } - - b.createSelectionMerge(switch_break_block_, 0); - b.createNoResultOp(spv::Op::OpSwitch, operands); } void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { - auto& b = *builder_; -} - -void SpirvShaderTranslator::ProcessControlFlowInstructionBegin( - uint32_t cf_index) { - auto& b = *builder_; -} - -void SpirvShaderTranslator::ProcessControlFlowInstructionEnd( - uint32_t cf_index) { - auto& b = *builder_; -} - -void SpirvShaderTranslator::ProcessControlFlowNopInstruction( - uint32_t cf_index) { - auto& b = *builder_; - - auto head = cf_blocks_[cf_index].block; - b.setBuildPoint(head); - b.createNoResultOp(spv::Op::OpNop); - if (cf_blocks_.size() > cf_index + 1) { - b.createBranch(cf_blocks_[cf_index + 1].block); - } else { - b.makeReturn(false); + if (cf_index == 0) { + // 0 already added in the beginning. + return; } + + assert_false(current_shader().label_addresses().empty()); + + // Close flow control within the previous switch case. + CloseExecConditionals(); + + spv::Function& function = builder_->getBuildPoint()->getParent(); + // Create the next switch case and fallthrough to it. + spv::Block* new_case = new spv::Block(builder_->getUniqueId(), function); + main_switch_op_->addImmediateOperand(cf_index); + main_switch_op_->addIdOperand(new_case->getId()); + // Every switch case must have the OpSelectionMerge/OpSwitch block as a + // predecessor. + new_case->addPredecessor(main_switch_header_); + // The previous block may have already been terminated if was exece. + if (!builder_->getBuildPoint()->isTerminated()) { + builder_->createBranch(new_case); + } + function.addBlock(new_case); + builder_->setBuildPoint(new_case); } void SpirvShaderTranslator::ProcessExecInstructionBegin( const ParsedExecInstruction& instr) { - auto& b = *builder_; - - assert_false(open_predicated_block_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - - // Head has the logic to check if the body should execute. - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - auto body = head; - switch (instr.type) { - case ParsedExecInstruction::Type::kUnconditional: { - // No need to do anything. - exec_cond_ = false; - } break; - case ParsedExecInstruction::Type::kConditional: { - // Based off of bool_consts - std::vector offsets; - offsets.push_back(b.makeUintConstant(2)); // bool_consts - uint32_t bitfield_index = instr.bool_constant_index / 32; - offsets.push_back(b.makeUintConstant(bitfield_index / 4)); - auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform, - consts_, offsets); - v = b.createLoad(v); - v = b.createCompositeExtract(v, uint_type_, bitfield_index % 4); - - // Bitfield extract the bool constant. - // FIXME: NVidia's compiler seems to be broken on this instruction? - /* - v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v, - b.makeUintConstant(instr.bool_constant_index % 32), - b.makeUintConstant(1)); - - // Conditional branch - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, - b.makeUintConstant(instr.condition ? 1 : 0)); - */ - v = b.createBinOp( - spv::Op::OpBitwiseAnd, uint_type_, v, - b.makeUintConstant(1 << (instr.bool_constant_index % 32))); - auto cond = b.createBinOp( - instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual, - bool_type_, v, b.makeUintConstant(0)); - - // Conditional branch - body = &b.makeNewBlock(); - exec_cond_ = true; - exec_skip_block_ = &b.makeNewBlock(); - - b.createSelectionMerge( - exec_skip_block_, - spv::SelectionControlMask::SelectionControlMaskNone); - b.createConditionalBranch(cond, body, exec_skip_block_); - - b.setBuildPoint(exec_skip_block_); - if (!instr.is_end || cf_blocks_.size() > instr.dword_index + 1) { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); - } else { - b.makeReturn(false); - } - } break; - case ParsedExecInstruction::Type::kPredicated: { - // Branch based on p0. - body = &b.makeNewBlock(); - exec_cond_ = true; - exec_skip_block_ = &b.makeNewBlock(); - - auto cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.condition)); - b.createSelectionMerge( - exec_skip_block_, - spv::SelectionControlMask::SelectionControlMaskNone); - b.createConditionalBranch(cond, body, exec_skip_block_); - - b.setBuildPoint(exec_skip_block_); - if (!instr.is_end || cf_blocks_.size() > instr.dword_index + 1) { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); - } else { - b.makeReturn(false); - } - } break; - } - b.setBuildPoint(body); + UpdateExecConditionals(instr.type, instr.bool_constant_index, + instr.condition); } void SpirvShaderTranslator::ProcessExecInstructionEnd( const ParsedExecInstruction& instr) { - auto& b = *builder_; - - if (open_predicated_block_) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } - if (instr.is_end) { - b.makeReturn(false); - } else if (exec_cond_) { - b.createBranch(exec_skip_block_); - } else { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); + // Break out of the main switch (if exists) and the main loop. + CloseInstructionPredication(); + if (!builder_->getBuildPoint()->isTerminated()) { + builder_->createBranch(current_shader().label_addresses().empty() + ? main_loop_merge_ + : main_switch_merge_); + } } + UpdateExecConditionals(instr.type, instr.bool_constant_index, + instr.condition); } void SpirvShaderTranslator::ProcessLoopStartInstruction( const ParsedLoopStartInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - // loop il, L - loop with loop data il, end @ L - std::vector offsets; - offsets.push_back(b.makeUintConstant(1)); // loop_consts - offsets.push_back(b.makeUintConstant(instr.loop_constant_index / 4)); - auto loop_const = b.createAccessChain(spv::StorageClass::StorageClassUniform, - consts_, offsets); - loop_const = b.createLoad(loop_const); - loop_const = b.createCompositeExtract(loop_const, uint_type_, - instr.loop_constant_index % 4); + // Loop control is outside execs - actually close the last exec. + CloseExecConditionals(); - // uint loop_count_value = loop_const & 0xFF; - auto loop_count_value = b.createBinOp(spv::Op::OpBitwiseAnd, uint_type_, - loop_const, b.makeUintConstant(0xFF)); + EnsureBuildPointAvailable(); - // uint loop_aL_value = (loop_const >> 8) & 0xFF; - auto loop_aL_value = b.createBinOp(spv::Op::OpShiftRightLogical, uint_type_, - loop_const, b.makeUintConstant(8)); - loop_aL_value = b.createBinOp(spv::Op::OpBitwiseAnd, uint_type_, - loop_aL_value, b.makeUintConstant(0xFF)); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // Loop constants (member 1). + id_vector_temp_.push_back(builder_->makeIntConstant(1)); + // 4-component vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(instr.loop_constant_index >> 2))); + // Scalar within the vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(instr.loop_constant_index & 3))); + // Count (unsigned) in bits 0:7 of the loop constant (struct member 1), + // initial aL (unsigned) in 8:15. + spv::Id loop_constant = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_bool_loop_constants_, id_vector_temp_), + spv::NoPrecision); - // loop_count_ = uvec4(loop_count_value, loop_count_.xyz); - auto loop_count = b.createLoad(loop_count_); - loop_count = - b.createRvalueSwizzle(spv::NoPrecision, vec4_uint_type_, loop_count, - std::vector({0, 0, 1, 2})); - loop_count = - b.createCompositeInsert(loop_count_value, loop_count, vec4_uint_type_, 0); - b.createStore(loop_count, loop_count_); + spv::Id const_int_8 = builder_->makeIntConstant(8); - // aL = aL.xxyz; - auto aL = b.createLoad(aL_); - aL = b.createRvalueSwizzle(spv::NoPrecision, vec4_uint_type_, aL, - std::vector({0, 0, 1, 2})); - if (!instr.is_repeat) { - // aL.x = loop_aL_value; - aL = b.createCompositeInsert(loop_aL_value, aL, vec4_uint_type_, 0); + // Push the count to the loop count stack - move XYZ to YZW and set X to the + // new iteration count (swizzling the way glslang does it for similar GLSL). + spv::Id loop_count_stack_old = + builder_->createLoad(var_main_loop_count_, spv::NoPrecision); + spv::Id loop_count_new = + builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, loop_constant, + const_int_0_, const_int_8); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(loop_count_new); + for (unsigned int i = 0; i < 3; ++i) { + id_vector_temp_.push_back( + builder_->createCompositeExtract(loop_count_stack_old, type_uint_, i)); } - b.createStore(aL, aL_); + builder_->createStore( + builder_->createCompositeConstruct(type_uint4_, id_vector_temp_), + var_main_loop_count_); - // Short-circuit if loop counter is 0 - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, loop_count_value, - b.makeUintConstant(0)); - auto next_pc = b.createTriOp(spv::Op::OpSelect, int_type_, cond, - b.makeIntConstant(instr.loop_skip_address), - b.makeIntConstant(instr.dword_index + 1)); - b.createStore(next_pc, pc_); - b.createBranch(switch_break_block_); + // Push aL - keep the same value as in the previous loop if repeating, or the + // new one otherwise. + spv::Id address_relative_stack_old = + builder_->createLoad(var_main_loop_address_, spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + if (instr.is_repeat) { + id_vector_temp_.emplace_back(); + } else { + id_vector_temp_.push_back(builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, + loop_constant, const_int_8, const_int_8))); + } + for (unsigned int i = 0; i < 3; ++i) { + id_vector_temp_.push_back(builder_->createCompositeExtract( + address_relative_stack_old, type_int_, i)); + } + if (instr.is_repeat) { + id_vector_temp_[0] = id_vector_temp_[1]; + } + builder_->createStore( + builder_->createCompositeConstruct(type_int4_, id_vector_temp_), + var_main_loop_address_); + + // Break (jump to the skip label) if the loop counter is 0 (since the + // condition is checked in the end). + spv::Block& head_block = *builder_->getBuildPoint(); + spv::Id loop_count_zero = builder_->createBinOp( + spv::OpIEqual, type_bool_, loop_count_new, const_uint_0_); + spv::Block& skip_block = builder_->makeNewBlock(); + spv::Block& body_block = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(body_block.getId()); + { + std::unique_ptr branch_conditional_op = + std::make_unique(spv::OpBranchConditional); + branch_conditional_op->addIdOperand(loop_count_zero); + branch_conditional_op->addIdOperand(skip_block.getId()); + branch_conditional_op->addIdOperand(body_block.getId()); + // More likely to enter than to skip. + branch_conditional_op->addImmediateOperand(1); + branch_conditional_op->addImmediateOperand(2); + head_block.addInstruction(std::move(branch_conditional_op)); + } + skip_block.addPredecessor(&head_block); + body_block.addPredecessor(&head_block); + builder_->setBuildPoint(&skip_block); + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(int(instr.loop_skip_address))); + main_switch_next_pc_phi_operands_.push_back( + builder_->getBuildPoint()->getId()); + builder_->createBranch(main_loop_continue_); + builder_->setBuildPoint(&body_block); } void SpirvShaderTranslator::ProcessLoopEndInstruction( const ParsedLoopEndInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - // endloop il, L - end loop w/ data il, head @ L - auto loop_count = b.createLoad(loop_count_); - auto count = b.createCompositeExtract(loop_count, uint_type_, 0); - count = - b.createBinOp(spv::Op::OpISub, uint_type_, count, b.makeUintConstant(1)); - loop_count = b.createCompositeInsert(count, loop_count, vec4_uint_type_, 0); - b.createStore(loop_count, loop_count_); - // if (--loop_count_.x == 0 || [!]p0) - auto c1 = b.createBinOp(spv::Op::OpIEqual, bool_type_, count, - b.makeUintConstant(0)); - auto c2 = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto cond = b.createBinOp(spv::Op::OpLogicalOr, bool_type_, c1, c2); + // Loop control is outside execs - actually close the last exec. + CloseExecConditionals(); - auto loop = &b.makeNewBlock(); - auto end = &b.makeNewBlock(); - auto tail = &b.makeNewBlock(); - b.createSelectionMerge(tail, spv::SelectionControlMaskNone); - b.createConditionalBranch(cond, end, loop); + EnsureBuildPointAvailable(); - // ================================================ - // Loop completed - pop the current loop off the stack and exit - b.setBuildPoint(end); - loop_count = b.createLoad(loop_count_); - auto aL = b.createLoad(aL_); + // Subtract 1 from the loop counter (will store later). + spv::Id loop_count_stack_old = + builder_->createLoad(var_main_loop_count_, spv::NoPrecision); + spv::Id loop_count = builder_->createBinOp( + spv::OpISub, type_uint_, + builder_->createCompositeExtract(loop_count_stack_old, type_uint_, 0), + builder_->makeUintConstant(1)); + spv::Id address_relative_stack_old = + builder_->createLoad(var_main_loop_address_, spv::NoPrecision); - // loop_count = loop_count.yzw0 - loop_count = - b.createRvalueSwizzle(spv::NoPrecision, vec4_uint_type_, loop_count, - std::vector({1, 2, 3, 3})); - loop_count = b.createCompositeInsert(b.makeUintConstant(0), loop_count, - vec4_uint_type_, 3); - b.createStore(loop_count, loop_count_); + // Predicated break works like break if (loop_count == 0 || [!]p0). + // Three options, due to logical operations usage (so OpLogicalNot is not + // required): + // - Continue if (loop_count != 0). + // - Continue if (loop_count != 0 && p0), if breaking if !p0. + // - Break if (loop_count == 0 || p0), if breaking if p0. + bool break_is_true = instr.is_predicated_break && instr.predicate_condition; + spv::Id condition = + builder_->createBinOp(break_is_true ? spv::OpIEqual : spv::OpINotEqual, + type_bool_, loop_count, const_uint_0_); + if (instr.is_predicated_break) { + condition = builder_->createBinOp( + instr.predicate_condition ? spv::OpLogicalOr : spv::OpLogicalAnd, + type_bool_, condition, + builder_->createLoad(var_main_predicate_, spv::NoPrecision)); + } - // aL = aL.yzw0 - aL = b.createRvalueSwizzle(spv::NoPrecision, vec4_uint_type_, aL, - std::vector({1, 2, 3, 3})); - aL = b.createCompositeInsert(b.makeUintConstant(0), aL, vec4_uint_type_, 3); - b.createStore(aL, aL_); + spv::Block& body_block = *builder_->getBuildPoint(); + spv::Block& continue_block = builder_->makeNewBlock(); + spv::Block& break_block = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(break_block.getId()); + { + std::unique_ptr branch_conditional_op = + std::make_unique(spv::OpBranchConditional); + branch_conditional_op->addIdOperand(condition); + // More likely to continue than to break. + if (break_is_true) { + branch_conditional_op->addIdOperand(break_block.getId()); + branch_conditional_op->addIdOperand(continue_block.getId()); + branch_conditional_op->addImmediateOperand(1); + branch_conditional_op->addImmediateOperand(2); + } else { + branch_conditional_op->addIdOperand(continue_block.getId()); + branch_conditional_op->addIdOperand(break_block.getId()); + branch_conditional_op->addImmediateOperand(2); + branch_conditional_op->addImmediateOperand(1); + } + body_block.addInstruction(std::move(branch_conditional_op)); + } + continue_block.addPredecessor(&body_block); + break_block.addPredecessor(&body_block); - // Update pc with the next block - // pc_ = instr.dword_index + 1 - b.createStore(b.makeIntConstant(instr.dword_index + 1), pc_); - b.createBranch(tail); + // Continue case. + builder_->setBuildPoint(&continue_block); + // Store the loop count with 1 subtracted. + builder_->createStore(builder_->createCompositeInsert( + loop_count, loop_count_stack_old, type_uint4_, 0), + var_main_loop_count_); + // Extract the value to add to aL (signed, in bits 16:23 of the loop + // constant). + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // Loop constants (member 1). + id_vector_temp_.push_back(builder_->makeIntConstant(1)); + // 4-component vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(instr.loop_constant_index >> 2))); + // Scalar within the vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(instr.loop_constant_index & 3))); + spv::Id loop_constant = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_bool_loop_constants_, id_vector_temp_), + spv::NoPrecision); + spv::Id address_relative_old = builder_->createCompositeExtract( + address_relative_stack_old, type_int_, 0); + builder_->createStore( + builder_->createCompositeInsert( + builder_->createBinOp( + spv::OpIAdd, type_int_, address_relative_old, + builder_->createTriOp( + spv::OpBitFieldSExtract, type_int_, + builder_->createUnaryOp(spv::OpBitcast, type_int_, + loop_constant), + builder_->makeIntConstant(16), builder_->makeIntConstant(8))), + address_relative_stack_old, type_int4_, 0), + var_main_loop_address_); + // Jump back to the beginning of the loop body. + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(int(instr.loop_body_address))); + main_switch_next_pc_phi_operands_.push_back( + builder_->getBuildPoint()->getId()); + builder_->createBranch(main_loop_continue_); - // ================================================ - // Still looping - increment aL and loop - b.setBuildPoint(loop); - aL = b.createLoad(aL_); - auto aL_x = b.createCompositeExtract(aL, uint_type_, 0); - - std::vector offsets; - offsets.push_back(b.makeUintConstant(1)); // loop_consts - offsets.push_back(b.makeUintConstant(instr.loop_constant_index / 4)); - auto loop_const = b.createAccessChain(spv::StorageClass::StorageClassUniform, - consts_, offsets); - loop_const = b.createLoad(loop_const); - loop_const = b.createCompositeExtract(loop_const, uint_type_, - instr.loop_constant_index % 4); - - // uint loop_aL_value = (loop_const >> 16) & 0xFF; - auto loop_aL_value = b.createBinOp(spv::Op::OpShiftRightLogical, uint_type_, - loop_const, b.makeUintConstant(16)); - loop_aL_value = b.createBinOp(spv::Op::OpBitwiseAnd, uint_type_, - loop_aL_value, b.makeUintConstant(0xFF)); - - aL_x = b.createBinOp(spv::Op::OpIAdd, uint_type_, aL_x, loop_aL_value); - aL = b.createCompositeInsert(aL_x, aL, vec4_uint_type_, 0); - b.createStore(aL, aL_); - - // pc_ = instr.loop_body_address; - b.createStore(b.makeIntConstant(instr.loop_body_address), pc_); - b.createBranch(tail); - - // ================================================ - b.setBuildPoint(tail); - b.createBranch(switch_break_block_); + // Break case. + builder_->setBuildPoint(&break_block); + // Pop the current loop off the loop counter and the relative address stacks - + // move YZW to XYZ and set W to 0. + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + for (unsigned int i = 1; i < 4; ++i) { + id_vector_temp_.push_back( + builder_->createCompositeExtract(loop_count_stack_old, type_uint_, i)); + } + id_vector_temp_.push_back(const_uint_0_); + builder_->createStore( + builder_->createCompositeConstruct(type_uint4_, id_vector_temp_), + var_main_loop_count_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + for (unsigned int i = 1; i < 4; ++i) { + id_vector_temp_.push_back(builder_->createCompositeExtract( + address_relative_stack_old, type_int_, i)); + } + id_vector_temp_.push_back(const_int_0_); + builder_->createStore( + builder_->createCompositeConstruct(type_int4_, id_vector_temp_), + var_main_loop_address_); + // Now going to fall through to the next control flow instruction. } -void SpirvShaderTranslator::ProcessCallInstruction( - const ParsedCallInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - - // Unused instruction(?) - assert_always(); - EmitTranslationError("call is unimplemented", false); - - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); -} - -void SpirvShaderTranslator::ProcessReturnInstruction( - const ParsedReturnInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - - // Unused instruction(?) - assert_always(); - EmitTranslationError("ret is unimplemented", false); - - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); -} - -// CF jump void SpirvShaderTranslator::ProcessJumpInstruction( const ParsedJumpInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - switch (instr.type) { - case ParsedJumpInstruction::Type::kUnconditional: { - b.createStore(b.makeIntConstant(instr.target_address), pc_); - b.createBranch(switch_break_block_); - } break; - case ParsedJumpInstruction::Type::kConditional: { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - - // Based off of bool_consts - std::vector offsets; - offsets.push_back(b.makeUintConstant(2)); // bool_consts - uint32_t bitfield_index = instr.bool_constant_index / 32; - offsets.push_back(b.makeUintConstant(bitfield_index / 4)); - auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform, - consts_, offsets); - v = b.createLoad(v); - v = b.createCompositeExtract(v, uint_type_, bitfield_index % 4); - - // Bitfield extract the bool constant. - // FIXME: NVidia's compiler seems to be broken on this instruction? - /* - v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v, - b.makeUintConstant(instr.bool_constant_index % 32), - b.makeUintConstant(1)); - - // Conditional branch - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, - b.makeUintConstant(instr.condition ? 1 : 0)); - */ - v = b.createBinOp( - spv::Op::OpBitwiseAnd, uint_type_, v, - b.makeUintConstant(1 << (instr.bool_constant_index % 32))); - auto cond = b.createBinOp( - instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual, - bool_type_, v, b.makeUintConstant(0)); - - auto next_pc = b.createTriOp(spv::Op::OpSelect, int_type_, cond, - b.makeIntConstant(instr.target_address), - b.makeIntConstant(instr.dword_index + 1)); - b.createStore(next_pc, pc_); - b.createBranch(switch_break_block_); - } break; - case ParsedJumpInstruction::Type::kPredicated: { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - - auto cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.condition)); - - auto next_pc = b.createTriOp(spv::Op::OpSelect, int_type_, cond, - b.makeIntConstant(instr.target_address), - b.makeIntConstant(instr.dword_index + 1)); - b.createStore(next_pc, pc_); - b.createBranch(switch_break_block_); - } break; - } -} - -void SpirvShaderTranslator::ProcessAllocInstruction( - const ParsedAllocInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - - switch (instr.type) { - case AllocType::kNone: { - // ? - } break; - case AllocType::kVsPosition: { - assert_true(is_vertex_shader()); - } break; - // Also PS Colors - case AllocType::kVsInterpolators: { - // Already included, nothing to do here. - } break; - case AllocType::kMemory: { - // Nothing to do for this. - } break; - default: - break; - } - - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); -} - -spv::Id SpirvShaderTranslator::BitfieldExtract(spv::Id result_type, - spv::Id base, bool is_signed, - uint32_t offset, - uint32_t count) { - auto& b = *builder_; - - spv::Id base_type = b.getTypeId(base); - - // <-- 32 - (offset + count) ------ [bits] -?- - if (32 - (offset + count) > 0) { - base = b.createBinOp(spv::Op::OpShiftLeftLogical, base_type, base, - b.makeUintConstant(32 - (offset + count))); - } - // [bits] -?-?-?--------------------------- - auto op = is_signed ? spv::Op::OpShiftRightArithmetic - : spv::Op::OpShiftRightLogical; - base = b.createBinOp(op, base_type, base, b.makeUintConstant(32 - count)); - - return base; -} - -spv::Id SpirvShaderTranslator::ConvertNormVar(spv::Id var, spv::Id result_type, - uint32_t bits, bool is_signed) { - auto& b = *builder_; - if (is_signed) { - auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, var, - b.makeFloatConstant(-float(1 << (bits - 1)))); - auto v = b.createBinOp(spv::Op::OpFDiv, result_type, var, - b.makeFloatConstant(float((1 << (bits - 1)) - 1))); - var = b.createTriOp(spv::Op::OpSelect, result_type, c, - b.makeFloatConstant(-1.f), v); + // Treat like exec, merge with execs if possible, since it's an if too. + ParsedExecInstruction::Type type; + if (instr.type == ParsedJumpInstruction::Type::kConditional) { + type = ParsedExecInstruction::Type::kConditional; + } else if (instr.type == ParsedJumpInstruction::Type::kPredicated) { + type = ParsedExecInstruction::Type::kPredicated; } else { - var = b.createBinOp(spv::Op::OpFDiv, result_type, var, - b.makeFloatConstant(float((1 << bits) - 1))); + type = ParsedExecInstruction::Type::kUnconditional; } + UpdateExecConditionals(type, instr.bool_constant_index, instr.condition); - return var; + // UpdateExecConditionals may not necessarily close the instruction-level + // predicate check (it's not necessary if the execs are merged), but here the + // instruction itself is on the control flow level, so the predicate check is + // on the control flow level too. + CloseInstructionPredication(); + + if (builder_->getBuildPoint()->isTerminated()) { + // Unreachable for some reason. + return; + } + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(int(instr.target_address))); + main_switch_next_pc_phi_operands_.push_back( + builder_->getBuildPoint()->getId()); + builder_->createBranch(main_loop_continue_); } -void SpirvShaderTranslator::ProcessVertexFetchInstruction( - const ParsedVertexFetchInstruction& instr) { - auto& b = *builder_; - assert_true(is_vertex_shader()); - assert_not_zero(vertex_idx_); - - // Close the open predicated block if this instr isn't predicated or the - // conditions do not match. - if (open_predicated_block_ && - (!instr.is_predicated || - instr.predicate_condition != predicated_block_cond_)) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; +spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant( + spv::Id scalar, spv::Id vector_type) { + bool is_constant = builder_->isConstant(scalar); + bool is_spec_constant = builder_->isSpecConstant(scalar); + if (!is_constant && !is_spec_constant) { + return builder_->smearScalar(spv::NoPrecision, scalar, vector_type); } - - if (!open_predicated_block_ && instr.is_predicated) { - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto block = &b.makeNewBlock(); - open_predicated_block_ = true; - predicated_block_cond_ = instr.predicate_condition; - predicated_block_end_ = &b.makeNewBlock(); - - b.createSelectionMerge(predicated_block_end_, - spv::SelectionControlMaskNone); - b.createConditionalBranch(pred_cond, block, predicated_block_end_); - b.setBuildPoint(block); + assert_true(builder_->getTypeClass(builder_->getTypeId(scalar)) == + builder_->getTypeClass(builder_->getScalarTypeId(vector_type))); + if (!builder_->isVectorType(vector_type)) { + assert_true(builder_->isScalarType(vector_type)); + return scalar; } - - // Operand 0 is the index - // Operand 1 is the binding - // TODO: Indexed fetch - auto vertex_idx = LoadFromOperand(instr.operands[0]); - vertex_idx = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_idx); - - // vertex_idx * stride + offset - vertex_idx = b.createBinOp(spv::Op::OpIMul, int_type_, vertex_idx, - b.makeUintConstant(instr.attributes.stride)); - vertex_idx = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(instr.attributes.offset)); - - auto data_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, vtx_, - {b.makeUintConstant(vtx_binding_map_[instr.operands[1].storage_index]), - b.makeUintConstant(0)}); - - spv::Id vertex = 0; - switch (instr.attributes.data_format) { - case xenos::VertexFormat::k_8_8_8_8: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - spv::Id components[4] = {}; - - auto op = instr.attributes.is_signed ? spv::Op::OpConvertSToF - : spv::Op::OpConvertUToF; - auto comp_type = instr.attributes.is_signed ? int_type_ : uint_type_; - - for (int i = 0; i < 4; i++) { - components[i] = BitfieldExtract(comp_type, vertex_data, - instr.attributes.is_signed, 8 * i, 8); - components[i] = b.createUnaryOp(op, float_type_, components[i]); - } - - vertex = b.createCompositeConstruct( - vec4_float_type_, - {components[0], components[1], components[2], components[3]}); - } else { - spv::GLSLstd450 op; - if (instr.attributes.is_signed) { - op = spv::GLSLstd450::kUnpackSnorm4x8; - } else { - op = spv::GLSLstd450::kUnpackUnorm4x8; - } - vertex = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec4_float_type_, op, {vertex_data}); - } - } break; - - case xenos::VertexFormat::k_16_16: { - spv::Id components[1] = {}; - for (uint32_t i = 0; i < 1; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - spv::Id comp[2] = {}; - - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto comp_type = is_signed ? int_type_ : uint_type_; - - if (is_signed) { - vertex_data = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - } - - comp[0] = BitfieldExtract(comp_type, vertex_data, is_signed, 0, 16); - comp[1] = BitfieldExtract(comp_type, vertex_data, is_signed, 16, 16); - - auto op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < xe::countof(comp); i++) { - comp[i] = b.createUnaryOp(op, float_type_, comp[i]); - } - - components[i] = - b.createCompositeConstruct(vec2_float_type_, {comp[0], comp[1]}); - } else { - spv::GLSLstd450 op; - if (instr.attributes.is_signed) { - op = spv::GLSLstd450::kUnpackSnorm2x16; - } else { - op = spv::GLSLstd450::kUnpackUnorm2x16; - } - - components[i] = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, op, {vertex_data}); - } - } - - vertex = components[0]; - } break; - - case xenos::VertexFormat::k_16_16_16_16: { - spv::Id components[2] = {}; - for (uint32_t i = 0; i < 2; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - spv::Id comp[2] = {}; - - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto comp_type = is_signed ? int_type_ : uint_type_; - - if (is_signed) { - vertex_data = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - } - - comp[0] = BitfieldExtract(comp_type, vertex_data, is_signed, 0, 16); - comp[1] = BitfieldExtract(comp_type, vertex_data, is_signed, 16, 16); - - auto op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < xe::countof(comp); i++) { - comp[i] = b.createUnaryOp(op, float_type_, comp[i]); - } - - components[i] = - b.createCompositeConstruct(vec2_float_type_, {comp[0], comp[1]}); - } else { - spv::GLSLstd450 op; - if (instr.attributes.is_signed) { - op = spv::GLSLstd450::kUnpackSnorm2x16; - } else { - op = spv::GLSLstd450::kUnpackUnorm2x16; - } - - components[i] = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, op, {vertex_data}); - } - } - - vertex = b.createConstructor( - spv::NoPrecision, {components[0], components[1]}, vec4_float_type_); - } break; - - case xenos::VertexFormat::k_16_16_FLOAT: { - spv::Id components[1] = {}; - for (uint32_t i = 0; i < 1; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - assert_true(instr.attributes.is_integer); - assert_true(instr.attributes.is_signed); - components[i] = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, - spv::GLSLstd450::kUnpackHalf2x16, {vertex_data}); - } - - vertex = components[0]; - } break; - - case xenos::VertexFormat::k_16_16_16_16_FLOAT: { - spv::Id components[2] = {}; - for (uint32_t i = 0; i < 2; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - assert_true(instr.attributes.is_integer); - assert_true(instr.attributes.is_signed); - components[i] = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, - spv::GLSLstd450::kUnpackHalf2x16, {vertex_data}); - } - - vertex = b.createConstructor( - spv::NoPrecision, {components[0], components[1]}, vec4_float_type_); - } break; - - case xenos::VertexFormat::k_32: { - spv::Id components[1] = {}; - for (uint32_t i = 0; i < 1; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - if (instr.attributes.is_signed) { - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - components[i] = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, - vertex_data); - } else { - components[i] = b.createUnaryOp(spv::Op::OpConvertUToF, float_type_, - vertex_data); - } - } else { - if (instr.attributes.is_signed) { - // TODO(DrChat): This is gonna be harder to convert. There's not - // enough precision in a float to shove INT_MAX into it. - assert_always(); - components[i] = b.makeFloatConstant(0.f); - } else { - components[i] = ConvertNormVar(vertex_data, uint_type_, 32, false); - } - } - } - - // vertex = b.createCompositeConstruct(float_type_, {components[0]}); - vertex = components[0]; - } break; - - case xenos::VertexFormat::k_32_32: { - spv::Id components[2] = {}; - for (uint32_t i = 0; i < 2; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - if (instr.attributes.is_signed) { - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - components[i] = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, - vertex_data); - } else { - components[i] = b.createUnaryOp(spv::Op::OpConvertUToF, float_type_, - vertex_data); - } - } else { - if (instr.attributes.is_signed) { - // TODO(DrChat): This is gonna be harder to convert. There's not - // enough precision in a float to shove INT_MAX into it. - assert_always(); - components[i] = b.makeFloatConstant(0.f); - } else { - components[i] = ConvertNormVar(vertex_data, uint_type_, 32, false); - } - } - } - - vertex = b.createCompositeConstruct(vec2_float_type_, - {components[0], components[1]}); - } break; - - case xenos::VertexFormat::k_32_32_32_32: { - spv::Id components[4] = {}; - for (uint32_t i = 0; i < 4; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - if (instr.attributes.is_signed) { - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - components[i] = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, - vertex_data); - } else { - components[i] = b.createUnaryOp(spv::Op::OpConvertUToF, float_type_, - vertex_data); - } - } else { - if (instr.attributes.is_signed) { - // TODO(DrChat): This is gonna be harder to convert. There's not - // enough precision in a float to shove INT_MAX into it. - assert_always(); - components[i] = b.makeFloatConstant(0.f); - } else { - components[i] = ConvertNormVar(vertex_data, uint_type_, 32, false); - } - } - } - - vertex = b.createCompositeConstruct( - vec2_float_type_, - {components[0], components[1], components[2], components[3]}); - } break; - - case xenos::VertexFormat::k_32_FLOAT: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - - vertex = b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); - } break; - - case xenos::VertexFormat::k_32_32_FLOAT: { - spv::Id components[2] = {}; - for (uint32_t i = 0; i < 2; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); - } - - vertex = b.createCompositeConstruct(vec2_float_type_, - {components[0], components[1]}); - } break; - - case xenos::VertexFormat::k_32_32_32_FLOAT: { - spv::Id components[3] = {}; - for (uint32_t i = 0; i < 3; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); - } - - vertex = b.createCompositeConstruct( - vec3_float_type_, {components[0], components[1], components[2]}); - } break; - - case xenos::VertexFormat::k_32_32_32_32_FLOAT: { - spv::Id components[4] = {}; - for (uint32_t i = 0; i < 4; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); - } - - vertex = b.createCompositeConstruct( - vec4_float_type_, - {components[0], components[1], components[2], components[3]}); - } break; - - case xenos::VertexFormat::k_2_10_10_10: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - assert(b.getTypeId(vertex_data) == uint_type_); - - // This needs to be converted. - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto comp_type = is_signed ? int_type_ : uint_type_; - - if (is_signed) { - vertex_data = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - } - - spv::Id components[4] = {0}; - components[0] = - BitfieldExtract(comp_type, vertex_data, is_signed, 00, 10); - components[1] = - BitfieldExtract(comp_type, vertex_data, is_signed, 10, 10); - components[2] = - BitfieldExtract(comp_type, vertex_data, is_signed, 20, 10); - components[3] = - BitfieldExtract(comp_type, vertex_data, is_signed, 30, 02); - - auto op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < xe::countof(components); i++) { - components[i] = b.createUnaryOp(op, float_type_, components[i]); - } - - if (!is_integer) { - components[0] = - ConvertNormVar(components[0], float_type_, 10, is_signed); - components[1] = - ConvertNormVar(components[1], float_type_, 10, is_signed); - components[2] = - ConvertNormVar(components[2], float_type_, 10, is_signed); - components[3] = - ConvertNormVar(components[3], float_type_, 02, is_signed); - } - - vertex = b.createCompositeConstruct( - vec4_float_type_, std::vector({components[0], components[1], - components[2], components[3]})); - } break; - - case xenos::VertexFormat::k_10_11_11: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - assert(b.getTypeId(vertex_data) == uint_type_); - - // This needs to be converted. - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto op = - is_signed ? spv::Op::OpBitFieldSExtract : spv::Op::OpBitFieldUExtract; - auto comp_type = is_signed ? int_type_ : uint_type_; - - if (is_signed) { - vertex_data = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - } - - assert_true(comp_type == b.getTypeId(vertex_data)); - - spv::Id components[3] = {0}; - /* - components[2] = b.createTriOp( - op, comp_type, vertex, b.makeUintConstant(0), b.makeUintConstant(10)); - components[1] = - b.createTriOp(op, comp_type, vertex, b.makeUintConstant(10), - b.makeUintConstant(11)); - components[0] = - b.createTriOp(op, comp_type, vertex, b.makeUintConstant(21), - b.makeUintConstant(11)); - */ - // Workaround until NVIDIA fixes their compiler :| - components[0] = - BitfieldExtract(comp_type, vertex_data, is_signed, 00, 11); - components[1] = - BitfieldExtract(comp_type, vertex_data, is_signed, 11, 11); - components[2] = - BitfieldExtract(comp_type, vertex_data, is_signed, 22, 10); - - op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < 3; i++) { - components[i] = b.createUnaryOp(op, float_type_, components[i]); - } - - if (!is_integer) { - components[0] = - ConvertNormVar(components[0], float_type_, 11, is_signed); - components[1] = - ConvertNormVar(components[1], float_type_, 11, is_signed); - components[2] = - ConvertNormVar(components[2], float_type_, 10, is_signed); - } - - vertex = b.createCompositeConstruct( - vec3_float_type_, - std::vector({components[0], components[1], components[2]})); - } break; - - case xenos::VertexFormat::k_11_11_10: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - assert(b.getTypeId(vertex_data) == uint_type_); - - // This needs to be converted. - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto op = - is_signed ? spv::Op::OpBitFieldSExtract : spv::Op::OpBitFieldUExtract; - auto comp_type = is_signed ? int_type_ : uint_type_; - - spv::Id components[3] = {0}; - /* - components[2] = b.createTriOp( - op, comp_type, vertex, b.makeUintConstant(0), b.makeUintConstant(11)); - components[1] = - b.createTriOp(op, comp_type, vertex, b.makeUintConstant(11), - b.makeUintConstant(11)); - components[0] = - b.createTriOp(op, comp_type, vertex, b.makeUintConstant(22), - b.makeUintConstant(10)); - */ - // Workaround until NVIDIA fixes their compiler :| - components[0] = - BitfieldExtract(comp_type, vertex_data, is_signed, 00, 10); - components[1] = - BitfieldExtract(comp_type, vertex_data, is_signed, 10, 11); - components[2] = - BitfieldExtract(comp_type, vertex_data, is_signed, 21, 11); - - op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < 3; i++) { - components[i] = b.createUnaryOp(op, float_type_, components[i]); - } - - if (!is_integer) { - components[0] = - ConvertNormVar(components[0], float_type_, 11, is_signed); - components[1] = - ConvertNormVar(components[1], float_type_, 11, is_signed); - components[2] = - ConvertNormVar(components[2], float_type_, 10, is_signed); - } - - vertex = b.createCompositeConstruct( - vec3_float_type_, - std::vector({components[0], components[1], components[2]})); - } break; - - case xenos::VertexFormat::kUndefined: - break; + int num_components = builder_->getNumTypeComponents(vector_type); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(size_t(num_components)); + for (int i = 0; i < num_components; ++i) { + id_vector_temp_util_.push_back(scalar); } - - assert_not_zero(vertex); - StoreToResult(vertex, instr.result); + return builder_->makeCompositeConstant(vector_type, id_vector_temp_util_, + is_spec_constant); } -void SpirvShaderTranslator::ProcessTextureFetchInstruction( - const ParsedTextureFetchInstruction& instr) { - auto& b = *builder_; +uint32_t SpirvShaderTranslator::GetPsParamGenInterpolator() const { + assert_true(is_pixel_shader()); + Modification modification = GetSpirvShaderModification(); + // param_gen_interpolator is already 4 bits, no need for an interpolator count + // safety check. + return (modification.pixel.param_gen_enable && + modification.pixel.param_gen_interpolator < register_count()) + ? modification.pixel.param_gen_interpolator + : UINT32_MAX; +} - // Close the open predicated block if this instr isn't predicated or the - // conditions do not match. - if (open_predicated_block_ && - (!instr.is_predicated || - instr.predicate_condition != predicated_block_cond_)) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; +void SpirvShaderTranslator::EnsureBuildPointAvailable() { + if (!builder_->getBuildPoint()->isTerminated()) { + return; + } + spv::Block& new_block = builder_->makeNewBlock(); + new_block.setUnreachable(); + builder_->setBuildPoint(&new_block); +} + +void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { + // Create the inputs. + if (IsSpirvTessEvalShader()) { + input_primitive_id_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_PrimitiveID"); + builder_->addDecoration(input_primitive_id_, spv::DecorationBuiltIn, + spv::BuiltInPrimitiveId); + main_interface_.push_back(input_primitive_id_); + } else { + input_vertex_index_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_VertexIndex"); + builder_->addDecoration(input_vertex_index_, spv::DecorationBuiltIn, + spv::BuiltInVertexIndex); + main_interface_.push_back(input_vertex_index_); } - if (!open_predicated_block_ && instr.is_predicated) { - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto block = &b.makeNewBlock(); - open_predicated_block_ = true; - predicated_block_cond_ = instr.predicate_condition; - predicated_block_end_ = &b.makeNewBlock(); - - b.createSelectionMerge(predicated_block_end_, - spv::SelectionControlMaskNone); - b.createConditionalBranch(pred_cond, block, predicated_block_end_); - b.setBuildPoint(block); + // Create the Xenia-specific outputs. + // TODO(Triang3l): Change to an interpolator array. + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { + spv::Id interpolator = builder_->createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_float4_, + (kInterpolatorNamePrefix + std::to_string(i)).c_str()); + input_output_interpolators_[i] = interpolator; + builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); + builder_->addDecoration(interpolator, spv::DecorationInvariant); + main_interface_.push_back(interpolator); } - // Operand 0 is the offset - // Operand 1 is the sampler index - Id dest = vec4_float_zero_; - Id src = LoadFromOperand(instr.operands[0]); - assert_not_zero(src); + // Create the gl_PerVertex output for used system outputs. + std::vector struct_per_vertex_members; + struct_per_vertex_members.reserve(kOutputPerVertexMemberCount); + struct_per_vertex_members.push_back(type_float4_); + spv::Id type_struct_per_vertex = + builder_->makeStructType(struct_per_vertex_members, "gl_PerVertex"); + builder_->addMemberName(type_struct_per_vertex, + kOutputPerVertexMemberPosition, "gl_Position"); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberPosition, + spv::DecorationInvariant); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberPosition, + spv::DecorationBuiltIn, spv::BuiltInPosition); + builder_->addDecoration(type_struct_per_vertex, spv::DecorationBlock); + output_per_vertex_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_struct_per_vertex, ""); + main_interface_.push_back(output_per_vertex_); +} - uint32_t dim_idx = 0; - switch (instr.dimension) { - case xenos::FetchOpDimension::k1D: - case xenos::FetchOpDimension::k2D: { - dim_idx = 0; - } break; - case xenos::FetchOpDimension::k3DOrStacked: { - dim_idx = 1; - } break; - case xenos::FetchOpDimension::kCube: { - dim_idx = 2; - } break; - default: - assert_unhandled_case(instr.dimension); +void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { + var_main_point_size_edge_flag_kill_vertex_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float3_, + "xe_var_point_size_edge_flag_kill_vertex"); + + // Zero the interpolators. + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { + builder_->createStore(const_float4_0_, input_output_interpolators_[i]); } - switch (instr.opcode) { - case FetchOpcode::kTextureFetch: { - auto texture_index = - b.makeUintConstant(tex_binding_map_[instr.operands[1].storage_index]); - auto texture_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - tex_[dim_idx], std::vector({texture_index})); - auto texture = b.createLoad(texture_ptr); - - if (instr.dimension == xenos::FetchOpDimension::k1D) { - // Upgrade 1D src coordinate into 2D - src = b.createCompositeConstruct(vec2_float_type_, - {src, b.makeFloatConstant(0.f)}); - } - - spv::Builder::TextureParameters params = {0}; - params.coords = src; - params.sampler = texture; - if (instr.attributes.use_register_lod) { - params.lod = b.createLoad(lod_); - } - if (instr.attributes.offset_x || instr.attributes.offset_y || - instr.attributes.offset_z) { - float offset_x = instr.attributes.offset_x; - float offset_y = instr.attributes.offset_y; - float offset_z = instr.attributes.offset_z; - - // Round numbers away from zero. No effect if offset is 0. - offset_x += instr.attributes.offset_x < 0 ? -0.5f : 0.5f; - offset_y += instr.attributes.offset_y < 0 ? -0.5f : 0.5f; - offset_z += instr.attributes.offset_z < 0 ? -0.5f : 0.5f; - - Id offset = 0; - switch (instr.dimension) { - case xenos::FetchOpDimension::k1D: { - // https://msdn.microsoft.com/en-us/library/windows/desktop/bb944006.aspx - // "Because the runtime does not support 1D textures, the compiler - // will use a 2D texture with the knowledge that the y-coordinate is - // unimportant." - offset = b.makeCompositeConstant( - vec2_int_type_, - {b.makeIntConstant(int(offset_x)), b.makeIntConstant(0)}); - } break; - case xenos::FetchOpDimension::k2D: { - offset = b.makeCompositeConstant( - vec2_int_type_, {b.makeIntConstant(int(offset_x)), - b.makeIntConstant(int(offset_y))}); - } break; - case xenos::FetchOpDimension::k3DOrStacked: { - offset = b.makeCompositeConstant( - vec3_int_type_, {b.makeIntConstant(int(offset_x)), - b.makeIntConstant(int(offset_y)), - b.makeIntConstant(int(offset_z))}); - } break; - case xenos::FetchOpDimension::kCube: { - // FIXME(DrChat): Is this the correct dimension? I forget - offset = b.makeCompositeConstant( - vec3_int_type_, {b.makeIntConstant(int(offset_x)), - b.makeIntConstant(int(offset_y)), - b.makeIntConstant(int(offset_z))}); - } break; - } - - params.offset = offset; - } - - dest = - b.createTextureCall(spv::NoPrecision, vec4_float_type_, false, false, - false, false, is_vertex_shader(), params); - } break; - - case FetchOpcode::kGetTextureGradients: { - Id src_x = b.createCompositeExtract(src, float_type_, 0); - Id src_y = b.createCompositeExtract(src, float_type_, 1); - - dest = b.createCompositeConstruct( - vec4_float_type_, - { - b.createUnaryOp(spv::OpDPdx, float_type_, src_x), - b.createUnaryOp(spv::OpDPdy, float_type_, src_x), - b.createUnaryOp(spv::OpDPdx, float_type_, src_y), - b.createUnaryOp(spv::OpDPdy, float_type_, src_y), - }); - } break; - - case FetchOpcode::kGetTextureWeights: { - // fract(src0 * textureSize); - auto texture_index = - b.makeUintConstant(tex_binding_map_[instr.operands[1].storage_index]); - auto texture_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - tex_[dim_idx], std::vector({texture_index})); - auto texture = b.createLoad(texture_ptr); - auto image = - b.createUnaryOp(spv::OpImage, b.getImageType(texture), texture); - - switch (instr.dimension) { - case xenos::FetchOpDimension::k1D: - case xenos::FetchOpDimension::k2D: { - spv::Builder::TextureParameters params; - std::memset(¶ms, 0, sizeof(params)); - params.sampler = image; - params.lod = b.makeIntConstant(0); - auto size = b.createTextureQueryCall(spv::Op::OpImageQuerySizeLod, - params, true); - size = - b.createUnaryOp(spv::Op::OpConvertUToF, vec2_float_type_, size); - - auto weight = - b.createBinOp(spv::Op::OpFMul, vec2_float_type_, size, src); - weight = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, spv::GLSLstd450::kFract, - {weight}); - - dest = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, - {weight, vec4_float_zero_, 0, 1, 2, 2}); - } break; - - default: - // TODO(DrChat): The rest of these. - assert_unhandled_case(instr.dimension); - break; - } - } break; - - case FetchOpcode::kGetTextureComputedLod: { - // TODO(DrChat): Verify if this implementation is correct. - // This is only valid in pixel shaders. - assert_true(is_pixel_shader()); - - auto texture_index = - b.makeUintConstant(tex_binding_map_[instr.operands[1].storage_index]); - auto texture_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - tex_[dim_idx], std::vector({texture_index})); - auto texture = b.createLoad(texture_ptr); - - if (instr.dimension == xenos::FetchOpDimension::k1D) { - // Upgrade 1D src coordinate into 2D - src = b.createCompositeConstruct(vec2_float_type_, - {src, b.makeFloatConstant(0.f)}); - } - - spv::Builder::TextureParameters params = {}; - params.sampler = texture; - params.coords = src; - auto lod = - b.createTextureQueryCall(spv::Op::OpImageQueryLod, params, false); - - dest = b.createCompositeExtract(lod, float_type_, 1); - dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); - } break; - - case FetchOpcode::kSetTextureLod: { - // = src1.x (MIP level) - // ... immediately after - // tfetch UseRegisterLOD=true - b.createStore(src, lod_); - } break; - - default: - // TODO: the rest of these - assert_unhandled_case(instr.opcode); - break; - } - - if (dest) { - b.createStore(dest, pv_); - StoreToResult(dest, instr.result); + // Load the vertex index or the tessellation parameters. + if (register_count()) { + // TODO(Triang3l): Barycentric coordinates and patch index. + if (IsSpirvVertexShader()) { + // TODO(Triang3l): Fetch the vertex index from the shared memory when + // fullDrawIndexUint32 isn't available and the index is 32-bit and needs + // endian swap. + // TODO(Triang3l): Close line loop primitive. + // Load the unswapped index as uint for swapping. + spv::Id vertex_index = builder_->createUnaryOp( + spv::OpBitcast, type_uint_, + builder_->createLoad(input_vertex_index_, spv::NoPrecision)); + // Endian-swap the index and convert to int. + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantIndexVertexIndexEndian)); + spv::Id vertex_index_endian = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + vertex_index = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + EndianSwap32Uint(vertex_index, vertex_index_endian)); + // Add the base to the index. + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantIndexVertexBaseIndex)); + vertex_index = builder_->createBinOp( + spv::OpIAdd, type_int_, vertex_index, + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision)); + // Write the index to r0.x as float. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(const_int_0_); + builder_->createStore( + builder_->createUnaryOp(spv::OpConvertSToF, type_float_, + vertex_index), + builder_->createAccessChain(spv::StorageClassFunction, + var_main_registers_, id_vector_temp_)); + } } } -void SpirvShaderTranslator::ProcessAluInstruction( - const ParsedAluInstruction& instr) { - if (instr.IsNop()) { +void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kOutputPerVertexMemberPosition)); + spv::Id position_ptr = builder_->createAccessChain( + spv::StorageClassOutput, output_per_vertex_, id_vector_temp_); + spv::Id guest_position = builder_->createLoad(position_ptr, spv::NoPrecision); + + // Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W + // into W. + spv::Id position_w = + builder_->createCompositeExtract(guest_position, type_float_, 3); + spv::Id is_w_not_reciprocal = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, + builder_->makeUintConstant( + static_cast(kSysFlag_WNotReciprocal))), + const_uint_0_); + spv::Id guest_position_w_inv = builder_->createBinOp( + spv::OpFDiv, type_float_, const_float_1_, position_w); + builder_->addDecoration(guest_position_w_inv, spv::DecorationNoContraction); + position_w = + builder_->createTriOp(spv::OpSelect, type_float_, is_w_not_reciprocal, + position_w, guest_position_w_inv); + + // Check if the shader returns XY/W rather than XY, and if it does, revert + // that. + // TODO(Triang3l): Check if having XY or Z pre-divided by W should result in + // affine interpolation. + uint_vector_temp_.clear(); + uint_vector_temp_.reserve(2); + uint_vector_temp_.push_back(0); + uint_vector_temp_.push_back(1); + spv::Id position_xy = builder_->createRvalueSwizzle( + spv::NoPrecision, type_float2_, guest_position, uint_vector_temp_); + spv::Id is_xy_divided_by_w = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, + builder_->makeUintConstant( + static_cast(kSysFlag_XYDividedByW))), + const_uint_0_); + spv::Id guest_position_xy_mul_w = builder_->createBinOp( + spv::OpVectorTimesScalar, type_float2_, position_xy, position_w); + builder_->addDecoration(guest_position_xy_mul_w, + spv::DecorationNoContraction); + position_xy = + builder_->createTriOp(spv::OpSelect, type_float2_, is_xy_divided_by_w, + guest_position_xy_mul_w, position_xy); + + // Check if the shader returns Z/W rather than Z, and if it does, revert that. + // TODO(Triang3l): Check if having XY or Z pre-divided by W should result in + // affine interpolation. + spv::Id position_z = + builder_->createCompositeExtract(guest_position, type_float_, 2); + spv::Id is_z_divided_by_w = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, + builder_->makeUintConstant( + static_cast(kSysFlag_ZDividedByW))), + const_uint_0_); + spv::Id guest_position_z_mul_w = + builder_->createBinOp(spv::OpFMul, type_float_, position_z, position_w); + builder_->addDecoration(guest_position_z_mul_w, spv::DecorationNoContraction); + position_z = + builder_->createTriOp(spv::OpSelect, type_float_, is_z_divided_by_w, + guest_position_z_mul_w, position_z); + + // Build XYZ of the position with W format handled. + spv::Id position_xyz; + { + std::unique_ptr composite_construct_op = + std::make_unique( + builder_->getUniqueId(), type_float3_, spv::OpCompositeConstruct); + composite_construct_op->addIdOperand(position_xy); + composite_construct_op->addIdOperand(position_z); + position_xyz = composite_construct_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(composite_construct_op)); + } + + // Apply the NDC scale and offset for guest to host viewport transformation. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantNdcScale)); + spv::Id ndc_scale = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + position_xyz = + builder_->createBinOp(spv::OpFMul, type_float3_, position_xyz, ndc_scale); + builder_->addDecoration(position_xyz, spv::DecorationNoContraction); + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantNdcOffset)); + spv::Id ndc_offset = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + spv::Id ndc_offset_mul_w = builder_->createBinOp( + spv::OpVectorTimesScalar, type_float3_, ndc_offset, position_w); + builder_->addDecoration(ndc_offset_mul_w, spv::DecorationNoContraction); + position_xyz = builder_->createBinOp(spv::OpFAdd, type_float3_, position_xyz, + ndc_offset_mul_w); + builder_->addDecoration(position_xyz, spv::DecorationNoContraction); + + // Store the position converted to the host. + spv::Id position; + { + std::unique_ptr composite_construct_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpCompositeConstruct); + composite_construct_op->addIdOperand(position_xyz); + composite_construct_op->addIdOperand(position_w); + position = composite_construct_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(composite_construct_op)); + } + builder_->createStore(position, position_ptr); +} + +void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { + // Interpolator inputs. + uint32_t interpolator_count = + std::min(xenos::kMaxInterpolators, register_count()); + for (uint32_t i = 0; i < interpolator_count; ++i) { + spv::Id interpolator = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_float4_, + (kInterpolatorNamePrefix + std::to_string(i)).c_str()); + input_output_interpolators_[i] = interpolator; + builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); + main_interface_.push_back(interpolator); + } + + bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX; + + // Fragment coordinates. + // TODO(Triang3l): More conditions - fragment shader interlock render backend, + // alpha to coverage (if RT 0 is written, and there's no early depth / + // stencil), depth writing in the fragment shader (per-sample if supported). + if (param_gen_needed) { + input_fragment_coord_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_float4_, "gl_FragCoord"); + builder_->addDecoration(input_fragment_coord_, spv::DecorationBuiltIn, + spv::BuiltInFragCoord); + main_interface_.push_back(input_fragment_coord_); + } + + // Is front facing. + // TODO(Triang3l): Needed for stencil in the fragment shader interlock render + // backend. + if (param_gen_needed && !GetSpirvShaderModification().pixel.param_gen_point) { + input_front_facing_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_bool_, "gl_FrontFacing"); + builder_->addDecoration(input_front_facing_, spv::DecorationBuiltIn, + spv::BuiltInFrontFacing); + main_interface_.push_back(input_front_facing_); + } + + // Framebuffer attachment outputs. + std::fill(output_fragment_data_.begin(), output_fragment_data_.end(), + spv::NoResult); + static const char* const kFragmentDataNames[] = { + "xe_out_fragment_data_0", + "xe_out_fragment_data_1", + "xe_out_fragment_data_2", + "xe_out_fragment_data_3", + }; + uint32_t color_targets_remaining = current_shader().writes_color_targets(); + uint32_t color_target_index; + while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { + color_targets_remaining &= ~(UINT32_C(1) << color_target_index); + spv::Id output_fragment_data_rt = builder_->createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_float4_, + kFragmentDataNames[color_target_index]); + output_fragment_data_[color_target_index] = output_fragment_data_rt; + builder_->addDecoration(output_fragment_data_rt, spv::DecorationLocation, + int(color_target_index)); + // Make invariant as pixel shaders may be used for various precise + // computations. + builder_->addDecoration(output_fragment_data_rt, spv::DecorationInvariant); + main_interface_.push_back(output_fragment_data_rt); + } +} + +void SpirvShaderTranslator::StartFragmentShaderInMain() { + uint32_t param_gen_interpolator = GetPsParamGenInterpolator(); + + // Copy the interpolators to general-purpose registers. + // TODO(Triang3l): Centroid. + uint32_t interpolator_count = + std::min(xenos::kMaxInterpolators, register_count()); + for (uint32_t i = 0; i < interpolator_count; ++i) { + if (i == param_gen_interpolator) { + continue; + } + id_vector_temp_.clear(); + // Register array element. + id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); + builder_->createStore( + builder_->createLoad(input_output_interpolators_[i], spv::NoPrecision), + builder_->createAccessChain(spv::StorageClassFunction, + var_main_registers_, id_vector_temp_)); + } + + // Pixel parameters. + if (param_gen_interpolator != UINT32_MAX) { + Modification modification = GetSpirvShaderModification(); + // Rounding the position down, and taking the absolute value, so in case the + // host GPU for some reason has quads used for derivative calculation at odd + // locations, the left and top edges will have correct derivative magnitude + // and LODs. + // Assuming that if PsParamGen is needed at all, param_gen_point is always + // set for point primitives, and is always disabled for other primitive + // types. + // OpFNegate requires sign bit flipping even for 0.0 (in this case, the + // first column or row of pixels) only since SPIR-V 1.5 revision 2 (not the + // base 1.5). + // TODO(Triang3l): When SPIR-V 1.6 is used in Xenia, see if OpFNegate can be + // used there, should be cheaper because it may be implemented as a hardware + // instruction modifier, though it respects the rule for subnormal numbers - + // see the actual hardware instructions in both OpBitwiseXor and OpFNegate + // cases. + spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31); + // TODO(Triang3l): Resolution scale inversion. + // X - pixel X .0 in the magnitude, is back-facing in the sign bit. + assert_true(input_fragment_coord_ != spv::NoResult); + id_vector_temp_.clear(); + id_vector_temp_.push_back(const_int_0_); + spv::Id param_gen_x = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coord_, id_vector_temp_), + spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_x); + param_gen_x = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_x); + param_gen_x = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_); + if (!modification.pixel.param_gen_point) { + assert_true(input_front_facing_ != spv::NoResult); + param_gen_x = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp( + spv::OpLogicalOr, type_bool_, + builder_->createBinOp( + spv::OpIEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_PrimitivePolygonal)), + const_uint_0_), + builder_->createLoad(input_front_facing_, spv::NoPrecision)), + param_gen_x, + builder_->createUnaryOp( + spv::OpBitcast, type_float_, + builder_->createBinOp( + spv::OpBitwiseXor, type_uint_, + builder_->createUnaryOp(spv::OpBitcast, type_uint_, + param_gen_x), + const_sign_bit))); + } + // Y - pixel Y .0 in the magnitude, is point in the sign bit. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(1)); + spv::Id param_gen_y = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coord_, id_vector_temp_), + spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_y); + param_gen_y = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_y); + param_gen_y = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_); + if (modification.pixel.param_gen_point) { + param_gen_y = builder_->createUnaryOp( + spv::OpBitcast, type_float_, + builder_->createBinOp( + spv::OpBitwiseXor, type_uint_, + builder_->createUnaryOp(spv::OpBitcast, type_uint_, param_gen_y), + const_sign_bit)); + } + // Z - point S in the magnitude, is line in the sign bit. + spv::Id param_gen_z; + if (modification.pixel.param_gen_point) { + // TODO(Triang3l): Point coordinates. + param_gen_z = const_float_0_; + } else { + param_gen_z = builder_->createUnaryOp( + spv::OpBitcast, type_float_, + builder_->createTriOp( + spv::OpSelect, type_uint_, + builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_PrimitiveLine)), + const_uint_0_), + const_sign_bit, const_uint_0_)); + } + // W - point T in the magnitude. + // TODO(Triang3l): Point coordinates. + spv::Id param_gen_w = const_float_0_; + // Store the pixel parameters. + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(param_gen_x); + id_vector_temp_.push_back(param_gen_y); + id_vector_temp_.push_back(param_gen_z); + id_vector_temp_.push_back(param_gen_w); + spv::Id param_gen = + builder_->createCompositeConstruct(type_float4_, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(int(param_gen_interpolator))); + builder_->createStore(param_gen, builder_->createAccessChain( + spv::StorageClassFunction, + var_main_registers_, id_vector_temp_)); + } + + // Initialize the colors for safety. + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + spv::Id output_fragment_data_rt = output_fragment_data_[i]; + if (output_fragment_data_rt != spv::NoResult) { + builder_->createStore(const_float4_0_, output_fragment_data_rt); + } + } +} + +void SpirvShaderTranslator::UpdateExecConditionals( + ParsedExecInstruction::Type type, uint32_t bool_constant_index, + bool condition) { + // Check if we can merge the new exec with the previous one, or the jump with + // the previous exec. The instruction-level predicate check is also merged in + // this case. + if (type == ParsedExecInstruction::Type::kConditional) { + // Can merge conditional with conditional, as long as the bool constant and + // the expected values are the same. + if (cf_exec_conditional_merge_ && + cf_exec_bool_constant_or_predicate_ == bool_constant_index && + cf_exec_condition_ == condition) { + return; + } + } else if (type == ParsedExecInstruction::Type::kPredicated) { + // Can merge predicated with predicated if the conditions are the same and + // the previous exec hasn't modified the predicate register. + if (!cf_exec_predicate_written_ && cf_exec_conditional_merge_ && + cf_exec_bool_constant_or_predicate_ == kCfExecBoolConstantPredicate && + cf_exec_condition_ == condition) { + return; + } + } else { + // Can merge unconditional with unconditional. + assert_true(type == ParsedExecInstruction::Type::kUnconditional); + if (!cf_exec_conditional_merge_) { + return; + } + } + + CloseExecConditionals(); + + if (type == ParsedExecInstruction::Type::kUnconditional) { return; } - auto& b = *builder_; - - // Close the open predicated block if this instr isn't predicated or the - // conditions do not match. - if (open_predicated_block_ && - (!instr.is_predicated || - instr.predicate_condition != predicated_block_cond_)) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } - - if (!open_predicated_block_ && instr.is_predicated) { - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto block = &b.makeNewBlock(); - open_predicated_block_ = true; - predicated_block_cond_ = instr.predicate_condition; - predicated_block_end_ = &b.makeNewBlock(); - - b.createSelectionMerge(predicated_block_end_, - spv::SelectionControlMaskNone); - b.createConditionalBranch(pred_cond, block, predicated_block_end_); - b.setBuildPoint(block); - } - - bool close_predicated_block_vector = false; - bool store_vector = - ProcessVectorAluOperation(instr, close_predicated_block_vector); - bool close_predicated_block_scalar = false; - bool store_scalar = - ProcessScalarAluOperation(instr, close_predicated_block_scalar); - - if (store_vector) { - StoreToResult(b.createLoad(pv_), instr.vector_and_constant_result); - } - if (store_scalar) { - StoreToResult(b.createLoad(ps_), instr.scalar_result); - } - - if ((close_predicated_block_vector || close_predicated_block_scalar) && - open_predicated_block_) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } -} - -spv::Function* SpirvShaderTranslator::CreateCubeFunction() { - auto& b = *builder_; - spv::Block* function_block = nullptr; - auto function = b.makeFunctionEntry(spv::NoPrecision, vec4_float_type_, - "cube", {vec4_float_type_}, - {{spv::NoPrecision}}, &function_block); - auto src = function->getParamId(0); - auto face_id = b.createVariable(spv::StorageClass::StorageClassFunction, - float_type_, "face_id"); - auto sc = b.createVariable(spv::StorageClass::StorageClassFunction, - float_type_, "sc"); - auto tc = b.createVariable(spv::StorageClass::StorageClassFunction, - float_type_, "tc"); - auto ma = b.createVariable(spv::StorageClass::StorageClassFunction, - float_type_, "ma"); - - // Pseudocode: - /* - vec4 cube(vec4 src1) { - vec3 src = vec3(src1.y, src1.x, src1.z); - vec3 abs_src = abs(src); - int face_id; - float sc; - float tc; - float ma; - if (abs_src.x > abs_src.y && abs_src.x > abs_src.z) { - if (src.x > 0.0) { - face_id = 0; sc = -abs_src.z; tc = -abs_src.y; ma = abs_src.x; - } else { - face_id = 1; sc = abs_src.z; tc = -abs_src.y; ma = abs_src.x; - } - } else if (abs_src.y > abs_src.x && abs_src.y > abs_src.z) { - if (src.y > 0.0) { - face_id = 2; sc = abs_src.x; tc = abs_src.z; ma = abs_src.y; - } else { - face_id = 3; sc = abs_src.x; tc = -abs_src.z; ma = abs_src.y; - } + EnsureBuildPointAvailable(); + spv::Id condition_id; + if (type == ParsedExecInstruction::Type::kConditional) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // Bool constants (member 0). + id_vector_temp_.push_back(const_int_0_); + // 128-bit vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(bool_constant_index >> 7))); + // 32-bit scalar of a 128-bit vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int((bool_constant_index >> 5) & 3))); + spv::Id bool_constant_scalar = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_bool_loop_constants_, id_vector_temp_), + spv::NoPrecision); + condition_id = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, bool_constant_scalar, + builder_->makeUintConstant(uint32_t(1) + << (bool_constant_index & 31))), + const_uint_0_); + cf_exec_bool_constant_or_predicate_ = bool_constant_index; + } else if (type == ParsedExecInstruction::Type::kPredicated) { + condition_id = builder_->createLoad(var_main_predicate_, spv::NoPrecision); + cf_exec_bool_constant_or_predicate_ = kCfExecBoolConstantPredicate; } else { - if (src.z > 0.0) { - face_id = 4; sc = abs_src.x; tc = -abs_src.y; ma = abs_src.z; - } else { - face_id = 5; sc = -abs_src.x; tc = -abs_src.y; ma = abs_src.z; + assert_unhandled_case(type); + return; + } + cf_exec_condition_ = condition; + cf_exec_conditional_merge_ = new spv::Block( + builder_->getUniqueId(), builder_->getBuildPoint()->getParent()); + SpirvCreateSelectionMerge(cf_exec_conditional_merge_->getId()); + spv::Block& inner_block = builder_->makeNewBlock(); + builder_->createConditionalBranch( + condition_id, condition ? &inner_block : cf_exec_conditional_merge_, + condition ? cf_exec_conditional_merge_ : &inner_block); + builder_->setBuildPoint(&inner_block); +} + +void SpirvShaderTranslator::UpdateInstructionPredication(bool predicated, + bool condition) { + if (!predicated) { + CloseInstructionPredication(); + return; + } + + if (cf_instruction_predicate_merge_) { + if (cf_instruction_predicate_condition_ == condition) { + // Already in the needed instruction-level conditional. + return; } - } - float s = (sc / ma + 1.0) / 2.0; - float t = (tc / ma + 1.0) / 2.0; - return vec4(t, s, 2.0 * ma, float(face_id)); - } - */ - - auto abs_src = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec4_float_type_, spv::GLSLstd450::kFAbs, {src}); - auto abs_src_x = b.createCompositeExtract(abs_src, float_type_, 0); - auto abs_src_y = b.createCompositeExtract(abs_src, float_type_, 1); - auto abs_src_z = b.createCompositeExtract(abs_src, float_type_, 2); - auto neg_src_x = b.createUnaryOp(spv::Op::OpFNegate, float_type_, abs_src_x); - auto neg_src_y = b.createUnaryOp(spv::Op::OpFNegate, float_type_, abs_src_y); - auto neg_src_z = b.createUnaryOp(spv::Op::OpFNegate, float_type_, abs_src_z); - - // Case 1: abs(src).x > abs(src).yz - { - auto x_gt_y = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_x, abs_src_y); - auto x_gt_z = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_x, abs_src_z); - auto c1 = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, x_gt_y, x_gt_z); - spv::Builder::If if1(c1, 0, b); - - // sc = abs(src).y - b.createStore(abs_src_y, sc); - // ma = abs(src).x - b.createStore(abs_src_x, ma); - - auto src_x = b.createCompositeExtract(src, float_type_, 0); - auto c2 = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, src_x, - b.makeFloatConstant(0)); - // src.x > 0: - // face_id = 2 - // tc = -abs(src).z - // src.x <= 0: - // face_id = 3 - // tc = abs(src).z - auto tmp_face_id = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, - b.makeFloatConstant(2), b.makeFloatConstant(3)); - auto tmp_tc = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, neg_src_z, abs_src_z); - - b.createStore(tmp_face_id, face_id); - b.createStore(tmp_tc, tc); - - if1.makeEndIf(); + CloseInstructionPredication(); } - // Case 2: abs(src).y > abs(src).xz - { - auto y_gt_x = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_y, abs_src_x); - auto y_gt_z = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_y, abs_src_z); - auto c1 = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, y_gt_x, y_gt_z); - spv::Builder::If if1(c1, 0, b); - - // tc = -abs(src).x - b.createStore(neg_src_x, tc); - // ma = abs(src).y - b.createStore(abs_src_y, ma); - - auto src_y = b.createCompositeExtract(src, float_type_, 1); - auto c2 = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, src_y, - b.makeFloatConstant(0)); - // src.y > 0: - // face_id = 0 - // sc = -abs(src).z - // src.y <= 0: - // face_id = 1 - // sc = abs(src).z - auto tmp_face_id = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, - b.makeFloatConstant(0), b.makeFloatConstant(1)); - auto tmp_sc = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, neg_src_z, abs_src_z); - - b.createStore(tmp_face_id, face_id); - b.createStore(tmp_sc, sc); - - if1.makeEndIf(); + // If the instruction predicate condition is the same as the exec predicate + // condition, no need to open a check. However, if there was a `setp` prior + // to this instruction, the predicate value now may be different than it was + // in the beginning of the exec. + if (!cf_exec_predicate_written_ && cf_exec_conditional_merge_ && + cf_exec_bool_constant_or_predicate_ == kCfExecBoolConstantPredicate && + cf_exec_condition_ == condition) { + return; } - // Case 3: abs(src).z > abs(src).yx - { - auto z_gt_x = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_z, abs_src_x); - auto z_gt_y = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_z, abs_src_y); - auto c1 = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, z_gt_x, z_gt_y); - spv::Builder::If if1(c1, 0, b); - - // tc = -abs(src).x - b.createStore(neg_src_x, tc); - // ma = abs(src).z - b.createStore(abs_src_z, ma); - - auto src_z = b.createCompositeExtract(src, float_type_, 2); - auto c2 = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, src_z, - b.makeFloatConstant(0)); - // src.z > 0: - // face_id = 4 - // sc = -abs(src).y - // src.z <= 0: - // face_id = 5 - // sc = abs(src).y - auto tmp_face_id = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, - b.makeFloatConstant(4), b.makeFloatConstant(5)); - auto tmp_sc = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, neg_src_y, abs_src_y); - - b.createStore(tmp_face_id, face_id); - b.createStore(tmp_sc, sc); - - if1.makeEndIf(); - } - - // s = (sc / ma + 1.0) / 2.0 - auto s = b.createBinOp(spv::Op::OpFDiv, float_type_, b.createLoad(sc), - b.createLoad(ma)); - s = b.createBinOp(spv::Op::OpFAdd, float_type_, s, b.makeFloatConstant(1.0)); - s = b.createBinOp(spv::Op::OpFDiv, float_type_, s, b.makeFloatConstant(2.0)); - - // t = (tc / ma + 1.0) / 2.0 - auto t = b.createBinOp(spv::Op::OpFDiv, float_type_, b.createLoad(tc), - b.createLoad(ma)); - t = b.createBinOp(spv::Op::OpFAdd, float_type_, t, b.makeFloatConstant(1.0)); - t = b.createBinOp(spv::Op::OpFDiv, float_type_, t, b.makeFloatConstant(2.0)); - - auto ma_times_two = b.createBinOp(spv::Op::OpFMul, float_type_, - b.createLoad(ma), b.makeFloatConstant(2.0)); - - // dest = vec4(t, s, 2.0 * ma, face_id) - auto ret = b.createCompositeConstruct( - vec4_float_type_, - std::vector({t, s, ma_times_two, b.createLoad(face_id)})); - b.makeReturn(false, ret); - - return function; + cf_instruction_predicate_condition_ = condition; + EnsureBuildPointAvailable(); + spv::Id predicate_id = + builder_->createLoad(var_main_predicate_, spv::NoPrecision); + spv::Block& predicated_block = builder_->makeNewBlock(); + cf_instruction_predicate_merge_ = new spv::Block( + builder_->getUniqueId(), builder_->getBuildPoint()->getParent()); + SpirvCreateSelectionMerge(cf_instruction_predicate_merge_->getId()); + builder_->createConditionalBranch( + predicate_id, + condition ? &predicated_block : cf_instruction_predicate_merge_, + condition ? cf_instruction_predicate_merge_ : &predicated_block); + builder_->setBuildPoint(&predicated_block); } -bool SpirvShaderTranslator::ProcessVectorAluOperation( - const ParsedAluInstruction& instr, bool& close_predicated_block) { - close_predicated_block = false; - - if (!instr.vector_and_constant_result.GetUsedWriteMask() && - !ucode::GetAluVectorOpcodeInfo(instr.vector_opcode).changed_state) { - return false; +void SpirvShaderTranslator::CloseInstructionPredication() { + if (!cf_instruction_predicate_merge_) { + return; } - - auto& b = *builder_; - - // TODO: If we have identical operands, reuse previous one. - Id sources[3] = {0}; - Id dest = vec4_float_zero_; - for (uint32_t i = 0; i < instr.vector_operand_count; i++) { - sources[i] = LoadFromOperand(instr.vector_operands[i]); + spv::Block& inner_block = *builder_->getBuildPoint(); + if (!inner_block.isTerminated()) { + builder_->createBranch(cf_instruction_predicate_merge_); } - - switch (instr.vector_opcode) { - case AluVectorOpcode::kAdd: { - dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0], - sources[1]); - } break; - - case AluVectorOpcode::kCndEq: { - // dest = src0 == 0.0 ? src1 : src2; - auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - vec4_float_zero_); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], - sources[2]); - } break; - - case AluVectorOpcode::kCndGe: { - // dest = src0 >= 0.0 ? src1 : src2; - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, - sources[0], vec4_float_zero_); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], - sources[2]); - } break; - - case AluVectorOpcode::kCndGt: { - // dest = src0 > 0.0 ? src1 : src2; - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, - sources[0], vec4_float_zero_); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], - sources[2]); - } break; - - case AluVectorOpcode::kCube: { - dest = - b.createFunctionCall(cube_function_, std::vector({sources[1]})); - } break; - - case AluVectorOpcode::kDst: { - auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1); - auto src1_y = b.createCompositeExtract(sources[1], float_type_, 1); - auto dst_y = b.createBinOp(spv::Op::OpFMul, float_type_, src0_y, src1_y); - - auto src0_z = b.createCompositeExtract(sources[0], float_type_, 2); - auto src1_w = b.createCompositeExtract(sources[1], float_type_, 3); - dest = b.createCompositeConstruct( - vec4_float_type_, - std::vector({b.makeFloatConstant(1.f), dst_y, src0_z, src1_w})); - } break; - - case AluVectorOpcode::kDp2Add: { - auto src0_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_, - {sources[0], sources[0], 0, 1}); - auto src1_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_, - {sources[1], sources[1], 0, 1}); - auto src2_x = b.createCompositeExtract(sources[2], float_type_, 0); - dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xy, src1_xy); - dest = b.createBinOp(spv::Op::OpFAdd, float_type_, dest, src2_x); - dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); - } break; - - case AluVectorOpcode::kDp3: { - auto src0_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_, - {sources[0], sources[0], 0, 1, 2}); - auto src1_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_, - {sources[1], sources[1], 0, 1, 2}); - dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xyz, src1_xyz); - dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); - } break; - - case AluVectorOpcode::kDp4: { - dest = b.createBinOp(spv::Op::OpDot, float_type_, sources[0], sources[1]); - dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); - } break; - - case AluVectorOpcode::kFloor: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFloor, - {sources[0]}); - } break; - - case AluVectorOpcode::kFrc: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFract, - {sources[0]}); - } break; - - case AluVectorOpcode::kKillEq: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, - sources[0], sources[1]); - cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = vec4_float_zero_; - } break; - - case AluVectorOpcode::kKillGe: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, - vec4_bool_type_, sources[0], sources[1]); - cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = vec4_float_zero_; - } break; - - case AluVectorOpcode::kKillGt: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, - sources[0], sources[1]); - cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = vec4_float_zero_; - } break; - - case AluVectorOpcode::kKillNe: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, - sources[0], sources[1]); - cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = vec4_float_zero_; - } break; - - case AluVectorOpcode::kMad: { - dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0], - sources[1]); - dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, dest, sources[2]); - } break; - - case AluVectorOpcode::kMax4: { - auto src0_x = b.createCompositeExtract(sources[0], float_type_, 0); - auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1); - auto src0_z = b.createCompositeExtract(sources[0], float_type_, 2); - auto src0_w = b.createCompositeExtract(sources[0], float_type_, 3); - - auto max_xy = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax, - {src0_x, src0_y}); - auto max_zw = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax, - {src0_z, src0_w}); - auto max_xyzw = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax, - {max_xy, max_zw}); - - // FIXME: Docs say this only updates pv.x? - dest = b.smearScalar(spv::NoPrecision, max_xyzw, vec4_float_type_); - } break; - - case AluVectorOpcode::kMaxA: { - // a0 = clamp(floor(src0.w + 0.5), -256, 255) - auto addr = b.createCompositeExtract(sources[0], float_type_, 3); - addr = b.createBinOp(spv::Op::OpFAdd, float_type_, addr, - b.makeFloatConstant(0.5f)); - addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr); - addr = CreateGlslStd450InstructionCall( - spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp, - {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); - b.createStore(addr, a0_); - - // dest = src0 >= src1 ? src0 : src1 - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluVectorOpcode::kMax: { - if (sources[0] == sources[1]) { - // mov dst, src - dest = sources[0]; - break; - } - - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluVectorOpcode::kMin: { - if (sources[0] == sources[1]) { - // mov dst, src - dest = sources[0]; - break; - } - - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFMin, - {sources[0], sources[1]}); - } break; - - case AluVectorOpcode::kMul: { - dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0], - sources[1]); - } break; - - case AluVectorOpcode::kSetpEqPush: { - auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - vec4_float_zero_); - auto c1 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[1], - vec4_float_zero_); - auto c_and = - b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); - auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); - c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); - auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); - - // p0 - b.createStore(c_and_w, p0_); - close_predicated_block = true; - - // dest - auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); - s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, - b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); - - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, - vec4_float_zero_, s0); - } break; - - case AluVectorOpcode::kSetpGePush: { - auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - vec4_float_zero_); - auto c1 = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, - sources[1], vec4_float_zero_); - auto c_and = - b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); - auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); - c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); - auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); - - // p0 - b.createStore(c_and_w, p0_); - close_predicated_block = true; - - // dest - auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); - s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, - b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); - - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, - vec4_float_zero_, s0); - } break; - - case AluVectorOpcode::kSetpGtPush: { - auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - vec4_float_zero_); - auto c1 = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, - sources[1], vec4_float_zero_); - auto c_and = - b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); - auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); - c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); - auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); - - // p0 - b.createStore(c_and_w, p0_); - close_predicated_block = true; - - // dest - auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); - s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, - b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); - - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, - vec4_float_zero_, s0); - } break; - - case AluVectorOpcode::kSetpNePush: { - auto c0 = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, - sources[0], vec4_float_zero_); - auto c1 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[1], - vec4_float_zero_); - auto c_and = - b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); - auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); - c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); - auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); - - // p0 - b.createStore(c_and_w, p0_); - close_predicated_block = true; - - // dest - auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); - s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, - b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); - - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, - vec4_float_zero_, s0); - } break; - - case AluVectorOpcode::kSeq: { - // foreach(el) src0 == src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - sources[1]); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, - vec4_float_one_, vec4_float_zero_); - } break; - - case AluVectorOpcode::kSge: { - // foreach(el) src0 >= src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, - sources[0], sources[1]); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, - vec4_float_one_, vec4_float_zero_); - } break; - - case AluVectorOpcode::kSgt: { - // foreach(el) src0 > src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, - sources[0], sources[1]); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, - vec4_float_one_, vec4_float_zero_); - } break; - - case AluVectorOpcode::kSne: { - // foreach(el) src0 != src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, - sources[0], sources[1]); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, - vec4_float_one_, vec4_float_zero_); - } break; - - case AluVectorOpcode::kTrunc: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - GLSLstd450::kTrunc, {sources[0]}); - } break; - - default: - assert_unhandled_case(instr.vector_opcode); - break; - } - - assert_not_zero(dest); - assert_true(b.getTypeId(dest) == vec4_float_type_); - if (dest) { - b.createStore(dest, pv_); - return true; - } - return false; + inner_block.getParent().addBlock(cf_instruction_predicate_merge_); + builder_->setBuildPoint(cf_instruction_predicate_merge_); + cf_instruction_predicate_merge_ = nullptr; } -bool SpirvShaderTranslator::ProcessScalarAluOperation( - const ParsedAluInstruction& instr, bool& close_predicated_block) { - close_predicated_block = false; - - if (instr.scalar_opcode == ucode::AluScalarOpcode::kRetainPrev && - !instr.scalar_result.GetUsedWriteMask()) { - return false; - } - - auto& b = *builder_; - - // TODO: If we have identical operands, reuse previous one. - Id sources[3] = {0}; - Id dest = b.makeFloatConstant(0); - for (uint32_t i = 0, x = 0; i < instr.scalar_operand_count; i++) { - auto src = LoadFromOperand(instr.scalar_operands[i]); - - // Pull components out of the vector operands and use them as sources. - if (instr.scalar_operands[i].component_count > 1) { - for (uint32_t j = 0; j < instr.scalar_operands[i].component_count; j++) { - sources[x++] = b.createCompositeExtract(src, float_type_, j); - } - } else { - sources[x++] = src; +void SpirvShaderTranslator::CloseExecConditionals() { + // Within the exec - instruction-level predicate check. + CloseInstructionPredication(); + // Exec level. + if (cf_exec_conditional_merge_) { + spv::Block& inner_block = *builder_->getBuildPoint(); + if (!inner_block.isTerminated()) { + builder_->createBranch(cf_exec_conditional_merge_); } + inner_block.getParent().addBlock(cf_exec_conditional_merge_); + builder_->setBuildPoint(cf_exec_conditional_merge_); + cf_exec_conditional_merge_ = nullptr; } - - switch (instr.scalar_opcode) { - case AluScalarOpcode::kAdds: - case AluScalarOpcode::kAddsc0: - case AluScalarOpcode::kAddsc1: { - // dest = src0 + src1 - dest = - b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], sources[1]); - } break; - - case AluScalarOpcode::kAddsPrev: { - // dest = src0 + ps - dest = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], - b.createLoad(ps_)); - } break; - - case AluScalarOpcode::kCos: { - // dest = cos(src0) - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kCos, {sources[0]}); - } break; - - case AluScalarOpcode::kExp: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kExp2, {sources[0]}); - } break; - - case AluScalarOpcode::kFloors: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kFloor, {sources[0]}); - } break; - - case AluScalarOpcode::kFrcs: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kFract, {sources[0]}); - } break; - - case AluScalarOpcode::kKillsEq: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kKillsGe: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kKillsGt: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kKillsNe: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kKillsOne: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(1.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kLogc: { - auto t = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]}); - - // FIXME: We don't check to see if t == -INF, we just check for INF - auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, t); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(-FLT_MAX), t); - } break; - - case AluScalarOpcode::kLog: { - dest = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]}); - } break; - - case AluScalarOpcode::kMaxAsf: { - auto addr = - b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, sources[0]); - addr = CreateGlslStd450InstructionCall( - spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp, - {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); - b.createStore(addr, a0_); - - // dest = src0 >= src1 ? src0 : src1 - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluScalarOpcode::kMaxAs: { - // a0 = clamp(floor(src0 + 0.5), -256, 255) - auto addr = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], - b.makeFloatConstant(0.5f)); - addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr); - addr = CreateGlslStd450InstructionCall( - spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp, - {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); - b.createStore(addr, a0_); - - // dest = src0 >= src1 ? src0 : src1 - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluScalarOpcode::kMaxs: { - if (sources[0] == sources[1]) { - // mov dst, src - dest = sources[0]; - } - - // dest = max(src0, src1) - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluScalarOpcode::kMins: { - if (sources[0] == sources[1]) { - // mov dst, src - dest = sources[0]; - } - - // dest = min(src0, src1) - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kFMin, - {sources[0], sources[1]}); - } break; - - case AluScalarOpcode::kMuls: - case AluScalarOpcode::kMulsc0: - case AluScalarOpcode::kMulsc1: { - // dest = src0 * src1 - dest = - b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], sources[1]); - } break; - - case AluScalarOpcode::kMulsPrev: { - // dest = src0 * ps - dest = b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], - b.createLoad(ps_)); - } break; - - case AluScalarOpcode::kMulsPrev2: { - // TODO: Uh... see GLSL translator for impl. - } break; - - case AluScalarOpcode::kRcpc: { - dest = b.createBinOp(spv::Op::OpFDiv, float_type_, - b.makeFloatConstant(1.f), sources[0]); - dest = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp, - {dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)}); - } break; - - case AluScalarOpcode::kRcpf: { - dest = b.createBinOp(spv::Op::OpFDiv, float_type_, - b.makeFloatConstant(1.f), sources[0]); - auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(0.f), dest); - } break; - - case AluScalarOpcode::kRcp: { - // dest = src0 != 0.0 ? 1.0 / src0 : 0.0; - auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - auto d = b.createBinOp(spv::Op::OpFDiv, float_type_, - b.makeFloatConstant(1.f), sources[0]); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(0.f), d); - } break; - - case AluScalarOpcode::kRsqc: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kInverseSqrt, - {sources[0]}); - dest = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp, - {dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)}); - } break; - - case AluScalarOpcode::kRsqf: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kInverseSqrt, - {sources[0]}); - auto c1 = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest); - auto c2 = b.createUnaryOp(spv::Op::OpIsNan, bool_type_, dest); - auto c = b.createBinOp(spv::Op::OpLogicalOr, bool_type_, c1, c2); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(0.f), dest); - } break; - - case AluScalarOpcode::kRsq: { - // dest = src0 > 0.0 ? inversesqrt(src0) : 0.0; - auto c = b.createBinOp(spv::Op::OpFOrdLessThanEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - auto d = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kInverseSqrt, - {sources[0]}); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(0.f), d); - } break; - - case AluScalarOpcode::kSeqs: { - // dest = src0 == 0.0 ? 1.0 : 0.0; - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); - } break; - - case AluScalarOpcode::kSges: { - // dest = src0 >= 0.0 ? 1.0 : 0.0; - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); - } break; - - case AluScalarOpcode::kSgts: { - // dest = src0 > 0.0 ? 1.0 : 0.0; - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); - } break; - - case AluScalarOpcode::kSnes: { - // dest = src0 != 0.0 ? 1.0 : 0.0; - auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); - } break; - - case AluScalarOpcode::kSetpClr: { - b.createStore(b.makeBoolConstant(false), p0_); - close_predicated_block = true; - dest = b.makeFloatConstant(FLT_MAX); - } break; - - case AluScalarOpcode::kSetpEq: { - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - // p0 = cond - b.createStore(cond, p0_); - close_predicated_block = true; - - // dest = cond ? 0.f : 1.f; - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); - } break; - - case AluScalarOpcode::kSetpGe: { - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - // p0 = cond - b.createStore(cond, p0_); - close_predicated_block = true; - - // dest = cond ? 0.f : 1.f; - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); - } break; - - case AluScalarOpcode::kSetpGt: { - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - // p0 = cond - b.createStore(cond, p0_); - close_predicated_block = true; - - // dest = cond ? 0.f : 1.f; - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); - } break; - - case AluScalarOpcode::kSetpInv: { - // p0 = src0 == 1.0 - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(1.f)); - b.createStore(cond, p0_); - close_predicated_block = true; - - // if (!cond) dest = src0 == 0.0 ? 1.0 : src0; - auto dst_cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - auto dst_false = b.createTriOp(spv::Op::OpSelect, float_type_, dst_cond, - b.makeFloatConstant(1.f), sources[0]); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), dst_false); - } break; - - case AluScalarOpcode::kSetpNe: { - auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - - // p0 = cond - b.createStore(cond, p0_); - close_predicated_block = true; - - // dest = cond ? 0.f : 1.f; - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); - } break; - - case AluScalarOpcode::kSetpPop: { - auto src = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], - b.makeFloatConstant(1.f)); - auto c = b.createBinOp(spv::Op::OpFOrdLessThanEqual, bool_type_, src, - b.makeFloatConstant(0.f)); - b.createStore(c, p0_); - close_predicated_block = true; - - dest = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, GLSLstd450::kFMax, - {sources[0], b.makeFloatConstant(0.f)}); - } break; - - case AluScalarOpcode::kSetpRstr: { - auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - b.createStore(c, p0_); - close_predicated_block = true; - dest = sources[0]; - } break; - - case AluScalarOpcode::kSin: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kSin, {sources[0]}); - } break; - - case AluScalarOpcode::kSqrt: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kSqrt, {sources[0]}); - } break; - - case AluScalarOpcode::kSubs: - case AluScalarOpcode::kSubsc0: - case AluScalarOpcode::kSubsc1: { - dest = - b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], sources[1]); - } break; - - case AluScalarOpcode::kSubsPrev: { - dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], - b.createLoad(ps_)); - } break; - - case AluScalarOpcode::kTruncs: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kTrunc, {sources[0]}); - } break; - - default: - assert_unhandled_case(instr.scalar_opcode); - break; - } - - assert_not_zero(dest); - assert_true(b.getTypeId(dest) == float_type_); - if (dest) { - b.createStore(dest, ps_); - return true; - } - return false; + // Nothing relies on the predicate value being unchanged now. + cf_exec_predicate_written_ = false; } -Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( - spv::Decoration precision, Id result_type, GLSLstd450 instruction_ordinal, - std::vector args) { - return builder_->createBuiltinCall(result_type, glsl_std_450_instruction_set_, - static_cast(instruction_ordinal), - args); -} - -Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { - auto& b = *builder_; - - Id storage_pointer = 0; - Id storage_type = vec4_float_type_; - spv::StorageClass storage_class; - Id storage_index = 0; // Storage index at lowest level - std::vector storage_offsets; // Offsets in nested arrays -> storage - - // Out of the 512 constant registers pixel shaders get the last 256. - uint32_t storage_base = 0; - if (op.storage_source == InstructionStorageSource::kConstantFloat) { - storage_base = is_pixel_shader() ? 256 : 0; - } - - switch (op.storage_addressing_mode) { +spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index, + bool is_float_constant) { + const Shader::ConstantRegisterMap& constant_register_map = + current_shader().constant_register_map(); + EnsureBuildPointAvailable(); + spv::Id base_pointer = spv::NoResult; + switch (addressing_mode) { case InstructionStorageAddressingMode::kAbsolute: { - storage_index = b.makeUintConstant(storage_base + op.storage_index); - } break; - case InstructionStorageAddressingMode::kAddressRegisterRelative: { - // storage_index + a0 - storage_index = - b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_), - b.makeUintConstant(storage_base + op.storage_index)); - } break; - case InstructionStorageAddressingMode::kLoopRelative: { - // storage_index + aL.x - auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0); - storage_index = - b.createBinOp(spv::Op::OpIAdd, uint_type_, idx, - b.makeUintConstant(storage_base + op.storage_index)); - } break; - default: - assert_always(); + uint32_t static_storage_index = storage_index; + if (is_float_constant) { + static_storage_index = + constant_register_map.GetPackedFloatConstantIndex(storage_index); + assert_true(static_storage_index != UINT32_MAX); + if (static_storage_index == UINT32_MAX) { + static_storage_index = 0; + } + } + return builder_->makeIntConstant(int(static_storage_index)); + } + case InstructionStorageAddressingMode::kAddressRegisterRelative: + base_pointer = var_main_address_register_; + break; + case InstructionStorageAddressingMode::kLoopRelative: + // Load X component. + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back(const_int_0_); + base_pointer = builder_->createAccessChain(spv::StorageClassFunction, + var_main_loop_address_, + id_vector_temp_util_); break; } + assert_true(!is_float_constant || + constant_register_map.float_dynamic_addressing); + assert_true(base_pointer != spv::NoResult); + spv::Id index = builder_->createLoad(base_pointer, spv::NoPrecision); + if (storage_index) { + index = + builder_->createBinOp(spv::OpIAdd, type_int_, index, + builder_->makeIntConstant(int(storage_index))); + } + return index; +} - switch (op.storage_source) { +spv::Id SpirvShaderTranslator::LoadOperandStorage( + const InstructionOperand& operand) { + spv::Id index = GetStorageAddressingIndex( + operand.storage_addressing_mode, operand.storage_index, + operand.storage_source == InstructionStorageSource::kConstantFloat); + EnsureBuildPointAvailable(); + spv::Id vec4_pointer = spv::NoResult; + switch (operand.storage_source) { case InstructionStorageSource::kRegister: - storage_pointer = registers_ptr_; - storage_class = spv::StorageClass::StorageClassFunction; - storage_type = vec4_float_type_; - storage_offsets.push_back(storage_index); - assert_true(uint32_t(op.storage_index) < register_count()); + assert_true(var_main_registers_ != spv::NoResult); + id_vector_temp_util_.clear(); + // Array element. + id_vector_temp_util_.push_back(index); + vec4_pointer = builder_->createAccessChain( + spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); break; case InstructionStorageSource::kConstantFloat: - storage_pointer = consts_; - storage_class = spv::StorageClass::StorageClassUniform; - storage_type = vec4_float_type_; - storage_offsets.push_back(b.makeUintConstant(0)); - storage_offsets.push_back(storage_index); - break; - case InstructionStorageSource::kVertexFetchConstant: - case InstructionStorageSource::kTextureFetchConstant: - // Should not reach this. - assert_always(); + assert_true(uniform_float_constants_ != spv::NoResult); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(2); + // The first and the only structure member. + id_vector_temp_util_.push_back(const_int_0_); + // Array element. + id_vector_temp_util_.push_back(index); + vec4_pointer = builder_->createAccessChain(spv::StorageClassUniform, + uniform_float_constants_, + id_vector_temp_util_); break; default: - assert_always(); - break; + assert_unhandled_case(operand.storage_source); } - - if (!storage_pointer) { - return b.createUndefined(vec4_float_type_); - } - - storage_pointer = - b.createAccessChain(storage_class, storage_pointer, storage_offsets); - auto storage_value = b.createLoad(storage_pointer); - assert_true(b.getTypeId(storage_value) == vec4_float_type_); - - if (op.component_count == 1) { - // Don't bother handling constant 0/1 fetches, as they're invalid in scalar - // opcodes. - uint32_t index = 0; - switch (op.components[0]) { - case SwizzleSource::kX: - index = 0; - break; - case SwizzleSource::kY: - index = 1; - break; - case SwizzleSource::kZ: - index = 2; - break; - case SwizzleSource::kW: - index = 3; - break; - case SwizzleSource::k0: - assert_always(); - break; - case SwizzleSource::k1: - assert_always(); - break; - } - - storage_value = b.createCompositeExtract(storage_value, float_type_, index); - storage_type = float_type_; - } - - if (op.is_absolute_value) { - storage_value = CreateGlslStd450InstructionCall( - spv::NoPrecision, storage_type, GLSLstd450::kFAbs, {storage_value}); - } - if (op.is_negated) { - storage_value = - b.createUnaryOp(spv::Op::OpFNegate, storage_type, storage_value); - } - - // swizzle - if (op.component_count > 1 && !op.IsStandardSwizzle()) { - std::vector operands; - operands.push_back(storage_value); - operands.push_back(b.makeCompositeConstant( - vec2_float_type_, - std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)}))); - - // Components start from left and are duplicated rightwards - // e.g. count = 1, xxxx / count = 2, xyyy ... - for (uint32_t i = 0; i < 4; i++) { - auto swiz = op.components[i]; - if (i > op.component_count - 1) { - swiz = op.components[op.component_count - 1]; - } - - switch (swiz) { - case SwizzleSource::kX: - operands.push_back(0); - break; - case SwizzleSource::kY: - operands.push_back(1); - break; - case SwizzleSource::kZ: - operands.push_back(2); - break; - case SwizzleSource::kW: - operands.push_back(3); - break; - case SwizzleSource::k0: - operands.push_back(4); - break; - case SwizzleSource::k1: - operands.push_back(5); - break; - } - } - - storage_value = - b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); - } - - return storage_value; + assert_true(vec4_pointer != spv::NoResult); + return builder_->createLoad(vec4_pointer, spv::NoPrecision); } -void SpirvShaderTranslator::StoreToResult(Id source_value_id, - const InstructionResult& result) { - auto& b = *builder_; - - if (result.storage_target == InstructionStorageTarget::kNone) { - // No-op? - return; +spv::Id SpirvShaderTranslator::ApplyOperandModifiers( + spv::Id operand_value, const InstructionOperand& original_operand, + bool invert_negate, bool force_absolute) { + spv::Id type = builder_->getTypeId(operand_value); + assert_true(type != spv::NoType); + if (type == spv::NoType) { + return operand_value; } + if (original_operand.is_absolute_value || force_absolute) { + EnsureBuildPointAvailable(); + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back(operand_value); + operand_value = builder_->createBuiltinCall( + type, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_util_); + } + if (original_operand.is_negated != invert_negate) { + EnsureBuildPointAvailable(); + operand_value = + builder_->createUnaryOp(spv::OpFNegate, type, operand_value); + builder_->addDecoration(operand_value, spv::DecorationNoContraction); + } + return operand_value; +} +spv::Id SpirvShaderTranslator::GetUnmodifiedOperandComponents( + spv::Id operand_storage, const InstructionOperand& original_operand, + uint32_t components) { + assert_not_zero(components); + if (!components) { + return spv::NoResult; + } + assert_true(components <= 0b1111); + if (components == 0b1111 && original_operand.IsStandardSwizzle()) { + return operand_storage; + } + EnsureBuildPointAvailable(); + uint32_t component_count = xe::bit_count(components); + if (component_count == 1) { + uint32_t scalar_index; + xe::bit_scan_forward(components, &scalar_index); + return builder_->createCompositeExtract( + operand_storage, type_float_, + static_cast(original_operand.GetComponent(scalar_index)) - + static_cast(SwizzleSource::kX)); + } + uint_vector_temp_util_.clear(); + uint_vector_temp_util_.reserve(component_count); + uint32_t components_remaining = components; + uint32_t component_index; + while (xe::bit_scan_forward(components_remaining, &component_index)) { + components_remaining &= ~(uint32_t(1) << component_index); + uint_vector_temp_util_.push_back( + static_cast( + original_operand.GetComponent(component_index)) - + static_cast(SwizzleSource::kX)); + } + return builder_->createRvalueSwizzle(spv::NoPrecision, + type_float_vectors_[component_count - 1], + operand_storage, uint_vector_temp_util_); +} + +void SpirvShaderTranslator::GetOperandScalarXY( + spv::Id operand_storage, const InstructionOperand& original_operand, + spv::Id& a_out, spv::Id& b_out, bool invert_negate, bool force_absolute) { + spv::Id a = GetOperandComponents(operand_storage, original_operand, 0b0001, + invert_negate, force_absolute); + a_out = a; + b_out = original_operand.GetComponent(0) != original_operand.GetComponent(1) + ? GetOperandComponents(operand_storage, original_operand, 0b0010, + invert_negate, force_absolute) + : a; +} + +spv::Id SpirvShaderTranslator::GetAbsoluteOperand( + spv::Id operand_storage, const InstructionOperand& original_operand) { + if (original_operand.is_absolute_value && !original_operand.is_negated) { + return operand_storage; + } + EnsureBuildPointAvailable(); + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back(operand_storage); + return builder_->createBuiltinCall(builder_->getTypeId(operand_storage), + ext_inst_glsl_std_450_, GLSLstd450FAbs, + id_vector_temp_util_); +} + +void SpirvShaderTranslator::StoreResult(const InstructionResult& result, + spv::Id value) { uint32_t used_write_mask = result.GetUsedWriteMask(); if (!used_write_mask) { return; } - Id storage_pointer = 0; - Id storage_type = vec4_float_type_; - spv::StorageClass storage_class; - Id storage_index = 0; // Storage index at lowest level - std::vector storage_offsets; // Offsets in nested arrays -> storage + EnsureBuildPointAvailable(); - switch (result.storage_addressing_mode) { - case InstructionStorageAddressingMode::kAbsolute: { - storage_index = b.makeUintConstant(result.storage_index); - } break; - case InstructionStorageAddressingMode::kAddressRegisterRelative: { - // storage_index + a0 - storage_index = - b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_), - b.makeUintConstant(result.storage_index)); - } break; - case InstructionStorageAddressingMode::kLoopRelative: { - // storage_index + aL.x - auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0); - storage_index = b.createBinOp(spv::Op::OpIAdd, uint_type_, idx, - b.makeUintConstant(result.storage_index)); - } break; - default: - assert_always(); - return; - } - - bool storage_array; + spv::Id target_pointer = spv::NoResult; switch (result.storage_target) { - case InstructionStorageTarget::kRegister: - storage_pointer = registers_ptr_; - storage_class = spv::StorageClass::StorageClassFunction; - storage_type = vec4_float_type_; - storage_offsets.push_back(storage_index); - storage_array = true; - assert_true(uint32_t(result.storage_index) < register_count()); + case InstructionStorageTarget::kNone: break; + case InstructionStorageTarget::kRegister: { + assert_true(var_main_registers_ != spv::NoResult); + // Must call GetStorageAddressingIndex first because of + // id_vector_temp_util_ usage in it. + spv::Id register_index = GetStorageAddressingIndex( + result.storage_addressing_mode, result.storage_index); + id_vector_temp_util_.clear(); + // Array element. + id_vector_temp_util_.push_back(register_index); + target_pointer = builder_->createAccessChain( + spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); + } break; case InstructionStorageTarget::kInterpolator: assert_true(is_vertex_shader()); - storage_pointer = interpolators_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = vec4_float_type_; - storage_offsets.push_back(storage_index); - storage_array = true; + target_pointer = input_output_interpolators_[result.storage_index]; break; case InstructionStorageTarget::kPosition: assert_true(is_vertex_shader()); - assert_not_zero(pos_); - storage_pointer = pos_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = vec4_float_type_; - storage_offsets.push_back(0); - storage_array = false; - break; - case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: - assert_true(is_vertex_shader()); - storage_pointer = point_size_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = float_type_; - storage_offsets.push_back(0); - storage_array = false; + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back( + builder_->makeIntConstant(kOutputPerVertexMemberPosition)); + target_pointer = builder_->createAccessChain( + spv::StorageClassOutput, output_per_vertex_, id_vector_temp_util_); break; case InstructionStorageTarget::kColor: assert_true(is_pixel_shader()); - assert_not_zero(frag_outputs_); - storage_pointer = frag_outputs_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = vec4_float_type_; - storage_offsets.push_back(storage_index); - storage_array = true; - break; - case InstructionStorageTarget::kDepth: - assert_true(is_pixel_shader()); - storage_pointer = frag_depth_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = float_type_; - storage_offsets.push_back(0); - storage_array = false; - break; - case InstructionStorageTarget::kNone: - assert_always(); + assert_not_zero(used_write_mask); + assert_true(current_shader().writes_color_target(result.storage_index)); + target_pointer = output_fragment_data_[result.storage_index]; + // May be spv::NoResult if the color output is explicitly removed due to + // an empty write mask without independent blending. + // TODO(Triang3l): Store the alpha of the first output in this case for + // alpha test and alpha to coverage. break; default: - assert_unhandled_case(result.storage_target); + // TODO(Triang3l): All storage targets. break; } - - if (!storage_pointer) { - assert_always(); + if (target_pointer == spv::NoResult) { return; } - if (storage_array) { - storage_pointer = - b.createAccessChain(storage_class, storage_pointer, storage_offsets); + uint32_t constant_values; + uint32_t constant_components = + result.GetUsedConstantComponents(constant_values); + if (value == spv::NoResult) { + // The instruction processing function decided that nothing useful needs to + // be stored for some reason, however, some components still need to be + // written on the guest side - fill them with zeros. + constant_components = used_write_mask; + } + uint32_t non_constant_components = used_write_mask & ~constant_components; + + unsigned int value_num_components = + value != spv::NoResult + ? static_cast(builder_->getNumComponents(value)) + : 0; + + if (result.is_clamped && non_constant_components) { + // Apply the saturation modifier to the result. + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(3); + id_vector_temp_util_.push_back(value); + id_vector_temp_util_.push_back( + const_float_vectors_0_[value_num_components - 1]); + id_vector_temp_util_.push_back( + const_float_vectors_1_[value_num_components - 1]); + value = builder_->createBuiltinCall( + type_float_vectors_[value_num_components - 1], ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_util_); } - bool source_is_scalar = b.isScalar(source_value_id); - bool storage_is_scalar = b.isScalarType(b.getDerefTypeId(storage_pointer)); - spv::Id source_type = b.getTypeId(source_value_id); - - // Only load from storage if we need it later. - Id storage_value = 0; - if ((source_is_scalar && !storage_is_scalar) || used_write_mask != 0b1111) { - storage_value = b.createLoad(storage_pointer); - } - - // Clamp the input value. - if (result.is_clamped) { - source_value_id = CreateGlslStd450InstructionCall( - spv::NoPrecision, source_type, spv::GLSLstd450::kFClamp, - {source_value_id, - b.smearScalar(spv::NoPrecision, b.makeFloatConstant(0.f), source_type), - b.smearScalar(spv::NoPrecision, b.makeFloatConstant(1.f), - source_type)}); - } - - // destination swizzle - if (!result.IsStandardSwizzle() && !source_is_scalar) { - std::vector operands; - operands.push_back(source_value_id); - operands.push_back(b.makeCompositeConstant( - vec2_float_type_, - std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)}))); - - // Components start from left and are duplicated rightwards - // e.g. count = 1, xxxx / count = 2, xyyy ... - uint32_t source_components = b.getNumComponents(source_value_id); - for (int i = 0; i < 4; i++) { - if (!(used_write_mask & (1 << i))) { - // Undefined / don't care. - operands.push_back(0); - continue; - } - - auto swiz = result.components[i]; - switch (swiz) { - case SwizzleSource::kX: - operands.push_back(0); - break; - case SwizzleSource::kY: - operands.push_back(1); - break; - case SwizzleSource::kZ: - operands.push_back(2); - break; - case SwizzleSource::kW: - operands.push_back(3); - break; - case SwizzleSource::k0: - operands.push_back(source_components + 0); - break; - case SwizzleSource::k1: - operands.push_back(source_components + 1); - break; - } + // The value contains either result.GetUsedResultComponents() in a condensed + // way, or a scalar to be replicated. Decompress them to create a mapping from + // guest result components to the ones in the value vector. + uint32_t used_result_components = result.GetUsedResultComponents(); + unsigned int result_unswizzled_value_components[4] = {}; + if (value_num_components > 1) { + unsigned int value_component = 0; + uint32_t used_result_components_remaining = used_result_components; + uint32_t result_component; + while (xe::bit_scan_forward(used_result_components_remaining, + &result_component)) { + used_result_components_remaining &= ~(uint32_t(1) << result_component); + result_unswizzled_value_components[result_component] = + std::min(value_component++, value_num_components - 1); } - - source_value_id = - b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, operands); } - // write mask - if (used_write_mask != 0b1111 && !source_is_scalar && !storage_is_scalar) { - std::vector operands; - operands.push_back(source_value_id); - operands.push_back(storage_value); - - for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) { - operands.push_back((used_write_mask & (1 << i)) - ? i - : b.getNumComponents(source_value_id) + i); + // Get swizzled mapping of non-constant components to the components of + // `value`. + unsigned int result_swizzled_value_components[4] = {}; + for (uint32_t i = 0; i < 4; ++i) { + if (!(non_constant_components & (1 << i))) { + continue; } + SwizzleSource swizzle = result.components[i]; + assert_true(swizzle >= SwizzleSource::kX && swizzle <= SwizzleSource::kW); + result_swizzled_value_components[i] = + result_unswizzled_value_components[uint32_t(swizzle) - + uint32_t(SwizzleSource::kX)]; + } - source_value_id = - b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); - } else if (source_is_scalar && !storage_is_scalar) { - assert_not_zero(used_write_mask); + spv::Id target_type = builder_->getDerefTypeId(target_pointer); + unsigned int target_num_components = + builder_->getNumTypeComponents(target_type); + assert_true( + target_num_components == + GetInstructionStorageTargetUsedComponentCount(result.storage_target)); + uint32_t target_component_mask = (1 << target_num_components) - 1; + assert_zero(used_write_mask & ~target_component_mask); - if (used_write_mask == 0b1111) { - source_value_id = - b.smearScalar(spv::NoPrecision, source_value_id, storage_type); - } else { - // Find first enabled component - uint32_t index = 0; - for (uint32_t i = 0; i < 4; i++) { - if (used_write_mask & (1 << i)) { - index = i; - break; + spv::Id value_to_store; + if (target_component_mask == used_write_mask) { + // All components are overwritten - no need to load the original value. + // Possible cases: + // * Non-constants only. + // * Vector target. + // * Vector source. + // * Identity swizzle - store directly. + // * Non-identity swizzle - shuffle. + // * Scalar source - smear. + // * Scalar target. + // * Vector source - extract. + // * Scalar source - store directly. + // * Constants only. + // * Vector target - make composite constant. + // * Scalar target - store directly. + // * Mixed non-constants and constants (only for vector targets - scalar + // targets fully covered by the previous cases). + // * Vector source - shuffle with {0, 1} also applying swizzle. + // * Scalar source - construct composite. + if (!constant_components) { + if (target_num_components > 1) { + if (value_num_components > 1) { + // Non-constants only - vector target, vector source. + bool is_identity_swizzle = + target_num_components == value_num_components; + for (uint32_t i = 0; is_identity_swizzle && i < target_num_components; + ++i) { + is_identity_swizzle &= result_swizzled_value_components[i] == i; + } + if (is_identity_swizzle) { + value_to_store = value; + } else { + uint_vector_temp_util_.clear(); + uint_vector_temp_util_.reserve(target_num_components); + uint_vector_temp_util_.insert( + uint_vector_temp_util_.cend(), result_swizzled_value_components, + result_swizzled_value_components + target_num_components); + value_to_store = builder_->createRvalueSwizzle( + spv::NoPrecision, target_type, value, uint_vector_temp_util_); + } + } else { + // Non-constants only - vector target, scalar source. + value_to_store = + builder_->smearScalar(spv::NoPrecision, value, target_type); + } + } else { + if (value_num_components > 1) { + // Non-constants only - scalar target, vector source. + value_to_store = builder_->createCompositeExtract( + value, type_float_, result_swizzled_value_components[0]); + } else { + // Non-constants only - scalar target, scalar source. + value_to_store = value; } } - source_value_id = b.createCompositeInsert(source_value_id, storage_value, - storage_type, index); + } else if (!non_constant_components) { + if (target_num_components > 1) { + // Constants only - vector target. + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(target_num_components); + for (uint32_t i = 0; i < target_num_components; ++i) { + id_vector_temp_util_.push_back( + (constant_values & (1 << i)) ? const_float_1_ : const_float_0_); + } + value_to_store = + builder_->makeCompositeConstant(target_type, id_vector_temp_util_); + } else { + // Constants only - scalar target. + value_to_store = + (constant_values & 0b0001) ? const_float_1_ : const_float_0_; + } + } else { + assert_true(target_num_components > 1); + if (value_num_components > 1) { + // Mixed non-constants and constants - vector source. + std::unique_ptr shuffle_op = + std::make_unique( + builder_->getUniqueId(), target_type, spv::OpVectorShuffle); + shuffle_op->addIdOperand(value); + shuffle_op->addIdOperand(const_float2_0_1_); + for (uint32_t i = 0; i < target_num_components; ++i) { + shuffle_op->addImmediateOperand( + (constant_components & (1 << i)) + ? value_num_components + ((constant_values >> i) & 1) + : result_swizzled_value_components[i]); + } + value_to_store = shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } else { + // Mixed non-constants and constants - scalar source. + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(target_num_components); + for (uint32_t i = 0; i < target_num_components; ++i) { + if (constant_components & (1 << i)) { + id_vector_temp_util_.push_back( + (constant_values & (1 << i)) ? const_float_1_ : const_float_0_); + } else { + id_vector_temp_util_.push_back(value); + } + } + value_to_store = builder_->createCompositeConstruct( + target_type, id_vector_temp_util_); + } } - } else if (!source_is_scalar && storage_is_scalar) { - // Num writes /needs/ to be 1, and let's assume it's the first element. - assert_true(xe::bit_count(used_write_mask) == 1); - - for (uint32_t i = 0; i < 4; i++) { - if (used_write_mask & (1 << i)) { - source_value_id = - b.createCompositeExtract(source_value_id, storage_type, 0); - break; + } else { + // Only certain components are overwritten. + // Scalar targets are always overwritten fully, can't reach this case for + // them. + assert_true(target_num_components > 1); + value_to_store = builder_->createLoad(target_pointer, spv::NoPrecision); + // Two steps: + // 1) Insert constants by shuffling (first so dependency chain of step 2 is + // simpler if constants are written first). + // 2) Insert value components - via shuffling for vector source, via + // composite inserts for scalar value. + if (constant_components) { + std::unique_ptr shuffle_op = + std::make_unique(builder_->getUniqueId(), + target_type, spv::OpVectorShuffle); + shuffle_op->addIdOperand(value_to_store); + shuffle_op->addIdOperand(const_float2_0_1_); + for (uint32_t i = 0; i < target_num_components; ++i) { + shuffle_op->addImmediateOperand((constant_components & (1 << i)) + ? target_num_components + + ((constant_values >> i) & 1) + : i); + } + value_to_store = shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } + if (non_constant_components) { + if (value_num_components > 1) { + std::unique_ptr shuffle_op = + std::make_unique( + builder_->getUniqueId(), target_type, spv::OpVectorShuffle); + shuffle_op->addIdOperand(value_to_store); + shuffle_op->addIdOperand(value); + for (uint32_t i = 0; i < target_num_components; ++i) { + shuffle_op->addImmediateOperand( + (non_constant_components & (1 << i)) + ? target_num_components + result_swizzled_value_components[i] + : i); + } + value_to_store = shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } else { + for (uint32_t i = 0; i < target_num_components; ++i) { + if (non_constant_components & (1 << i)) { + value_to_store = builder_->createCompositeInsert( + value, value_to_store, target_type, i); + } + } } } } + builder_->createStore(value_to_store, target_pointer); +} - // Perform store into the pointer. - assert_true(b.getNumComponents(source_value_id) == - b.getNumTypeComponents(storage_type)); +spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { + spv::Id type = builder_->getTypeId(value); + spv::Id const_uint_8_scalar = builder_->makeUintConstant(8); + spv::Id const_uint_00ff00ff_scalar = builder_->makeUintConstant(0x00FF00FF); + spv::Id const_uint_16_scalar = builder_->makeUintConstant(16); + spv::Id const_uint_8_typed, const_uint_00ff00ff_typed, const_uint_16_typed; + int num_components = builder_->getNumTypeComponents(type); + if (num_components > 1) { + id_vector_temp_.reserve(num_components); + id_vector_temp_.clear(); + id_vector_temp_.insert(id_vector_temp_.cend(), num_components, + const_uint_8_scalar); + const_uint_8_typed = builder_->makeCompositeConstant(type, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.insert(id_vector_temp_.cend(), num_components, + const_uint_00ff00ff_scalar); + const_uint_00ff00ff_typed = + builder_->makeCompositeConstant(type, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.insert(id_vector_temp_.cend(), num_components, + const_uint_16_scalar); + const_uint_16_typed = + builder_->makeCompositeConstant(type, id_vector_temp_); + } else { + const_uint_8_typed = const_uint_8_scalar; + const_uint_00ff00ff_typed = const_uint_00ff00ff_scalar; + const_uint_16_typed = const_uint_16_scalar; + } - assert_true(b.getTypeId(source_value_id) == - b.getDerefTypeId(storage_pointer)); - b.createStore(source_value_id, storage_pointer); + // 8-in-16 or one half of 8-in-32 (doing 8-in-16 swap). + spv::Id is_8in16 = builder_->createBinOp( + spv::OpIEqual, type_bool_, endian, + builder_->makeUintConstant( + static_cast(xenos::Endian::k8in16))); + spv::Id is_8in32 = builder_->createBinOp( + spv::OpIEqual, type_bool_, endian, + builder_->makeUintConstant( + static_cast(xenos::Endian::k8in32))); + spv::Id is_8in16_or_8in32 = + builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32); + spv::Block& block_pre_8in16 = *builder_->getBuildPoint(); + assert_false(block_pre_8in16.isTerminated()); + spv::Block& block_8in16 = builder_->makeNewBlock(); + spv::Block& block_8in16_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_8in16_merge.getId()); + builder_->createConditionalBranch(is_8in16_or_8in32, &block_8in16, + &block_8in16_merge); + builder_->setBuildPoint(&block_8in16); + spv::Id swapped_8in16 = builder_->createBinOp( + spv::OpBitwiseOr, type, + builder_->createBinOp( + spv::OpBitwiseAnd, type, + builder_->createBinOp(spv::OpShiftRightLogical, type, value, + const_uint_8_typed), + const_uint_00ff00ff_typed), + builder_->createBinOp( + spv::OpShiftLeftLogical, type, + builder_->createBinOp(spv::OpBitwiseAnd, type, value, + const_uint_00ff00ff_typed), + const_uint_8_typed)); + builder_->createBranch(&block_8in16_merge); + builder_->setBuildPoint(&block_8in16_merge); + { + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), type, + spv::OpPhi); + phi_op->addIdOperand(swapped_8in16); + phi_op->addIdOperand(block_8in16.getId()); + phi_op->addIdOperand(value); + phi_op->addIdOperand(block_pre_8in16.getId()); + value = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + + // 16-in-32 or another half of 8-in-32 (doing 16-in-32 swap). + spv::Id is_16in32 = builder_->createBinOp( + spv::OpIEqual, type_bool_, endian, + builder_->makeUintConstant( + static_cast(xenos::Endian::k16in32))); + spv::Id is_8in32_or_16in32 = + builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32); + spv::Block& block_pre_16in32 = *builder_->getBuildPoint(); + spv::Block& block_16in32 = builder_->makeNewBlock(); + spv::Block& block_16in32_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_16in32_merge.getId()); + builder_->createConditionalBranch(is_8in32_or_16in32, &block_16in32, + &block_16in32_merge); + builder_->setBuildPoint(&block_16in32); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(builder_->createBinOp( + spv::OpShiftRightLogical, type, value, const_uint_16_typed)); + id_vector_temp_.push_back(value); + id_vector_temp_.insert(id_vector_temp_.cend(), 2, + builder_->makeIntConstant(16)); + spv::Id swapped_16in32 = + builder_->createOp(spv::OpBitFieldInsert, type, id_vector_temp_); + builder_->createBranch(&block_16in32_merge); + builder_->setBuildPoint(&block_16in32_merge); + { + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), type, + spv::OpPhi); + phi_op->addIdOperand(swapped_16in32); + phi_op->addIdOperand(block_16in32.getId()); + phi_op->addIdOperand(value); + phi_op->addIdOperand(block_pre_16in32.getId()); + value = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + + return value; +} + +spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( + spv::Id address_dwords_int) { + spv::Block& head_block = *builder_->getBuildPoint(); + assert_false(head_block.isTerminated()); + + spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3 + ? spv::StorageClassStorageBuffer + : spv::StorageClassUniform; + uint32_t buffer_count_log2 = GetSharedMemoryStorageBufferCountLog2(); + if (!buffer_count_log2) { + // Single binding - load directly. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + // The only SSBO struct member. + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(address_dwords_int); + return builder_->createLoad( + builder_->createAccessChain(storage_class, buffers_shared_memory_, + id_vector_temp_), + spv::NoPrecision); + } + + // The memory is split into multiple bindings - check which binding to load + // from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the + // array with the variable itself because it needs VK_EXT_descriptor_indexing. + uint32_t binding_address_bits = (29 - 2) - buffer_count_log2; + spv::Id binding_index = builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, + builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int), + builder_->makeUintConstant(binding_address_bits)); + spv::Id binding_address = builder_->createBinOp( + spv::OpBitwiseAnd, type_int_, address_dwords_int, + builder_->makeIntConstant( + int((uint32_t(1) << binding_address_bits) - 1))); + uint32_t buffer_count = 1 << buffer_count_log2; + spv::Block* switch_case_blocks[512 / 128]; + for (uint32_t i = 0; i < buffer_count; ++i) { + switch_case_blocks[i] = &builder_->makeNewBlock(); + } + spv::Block& switch_merge_block = builder_->makeNewBlock(); + spv::Id value_phi_result = builder_->getUniqueId(); + std::unique_ptr value_phi_op = + std::make_unique(value_phi_result, type_uint_, + spv::OpPhi); + SpirvCreateSelectionMerge(switch_merge_block.getId(), + spv::SelectionControlDontFlattenMask); + { + std::unique_ptr switch_op = + std::make_unique(spv::OpSwitch); + switch_op->addIdOperand(binding_index); + // Highest binding index is the default case. + switch_op->addIdOperand(switch_case_blocks[buffer_count - 1]->getId()); + switch_case_blocks[buffer_count - 1]->addPredecessor(&head_block); + for (uint32_t i = 0; i < buffer_count - 1; ++i) { + switch_op->addImmediateOperand(int(i)); + switch_op->addIdOperand(switch_case_blocks[i]->getId()); + switch_case_blocks[i]->addPredecessor(&head_block); + } + builder_->getBuildPoint()->addInstruction(std::move(switch_op)); + } + for (uint32_t i = 0; i < buffer_count; ++i) { + builder_->setBuildPoint(switch_case_blocks[i]); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); + // The only SSBO struct member. + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(binding_address); + value_phi_op->addIdOperand(builder_->createLoad( + builder_->createAccessChain(storage_class, buffers_shared_memory_, + id_vector_temp_), + spv::NoPrecision)); + value_phi_op->addIdOperand(switch_case_blocks[i]->getId()); + builder_->createBranch(&switch_merge_block); + } + builder_->setBuildPoint(&switch_merge_block); + builder_->getBuildPoint()->addInstruction(std::move(value_phi_op)); + return value_phi_result; +} + +spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma, + bool gamma_pre_saturated) { + spv::Id value_type = builder_->getTypeId(gamma); + assert_true(builder_->isFloatType(builder_->getScalarTypeId(value_type))); + bool is_vector = builder_->isVectorType(value_type); + assert_true(is_vector || builder_->isFloatType(value_type)); + int num_components = builder_->getNumTypeComponents(value_type); + assert_true(num_components < 4); + spv::Id bool_type = type_bool_vectors_[num_components - 1]; + + spv::Id const_vector_0 = const_float_vectors_0_[num_components - 1]; + spv::Id const_vector_1 = SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1.0f), value_type); + + if (!gamma_pre_saturated) { + // Saturate, flushing NaN to 0. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(gamma); + id_vector_temp_.push_back(const_vector_0); + id_vector_temp_.push_back(const_vector_1); + gamma = builder_->createBuiltinCall(value_type, ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_); + } + + spv::Id is_piece_at_least_3 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, gamma, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(192.0f / 255.0f), value_type)); + spv::Id scale_3_or_2 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_3, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(8.0f / 1024.0f), value_type), + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(4.0f / 1024.0f), value_type)); + spv::Id offset_3_or_2 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_3, + SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(-1024.0f), + value_type), + SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(-256.0f), + value_type)); + + spv::Id is_piece_at_least_1 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, gamma, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(64.0f / 255.0f), value_type)); + spv::Id scale_1_or_0 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_1, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(2.0f / 1024.0f), value_type), + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1.0f / 1024.0f), value_type)); + spv::Id offset_1_or_0 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_1, + SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(-64.0f), + value_type), + const_vector_0); + + spv::Id is_piece_at_least_2 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, gamma, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(96.0f / 255.0f), value_type)); + spv::Id scale = + builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, + scale_3_or_2, scale_1_or_0); + spv::Id offset = + builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, + offset_3_or_2, offset_1_or_0); + + spv::Op value_times_scalar_opcode = + is_vector ? spv::OpVectorTimesScalar : spv::OpFMul; + // linear = gamma * (255.0f * 1024.0f) * scale + offset + spv::Id linear = + builder_->createBinOp(value_times_scalar_opcode, value_type, gamma, + builder_->makeFloatConstant(255.0f * 1024.0f)); + builder_->addDecoration(linear, spv::DecorationNoContraction); + linear = builder_->createBinOp(spv::OpFMul, value_type, linear, scale); + builder_->addDecoration(linear, spv::DecorationNoContraction); + linear = builder_->createBinOp(spv::OpFAdd, value_type, linear, offset); + builder_->addDecoration(linear, spv::DecorationNoContraction); + // linear += trunc(linear * scale) + spv::Id linear_integer_term = + builder_->createBinOp(spv::OpFMul, value_type, linear, scale); + builder_->addDecoration(linear_integer_term, spv::DecorationNoContraction); + id_vector_temp_.clear(); + id_vector_temp_.push_back(linear_integer_term); + linear_integer_term = builder_->createBuiltinCall( + value_type, ext_inst_glsl_std_450_, GLSLstd450Trunc, id_vector_temp_); + linear = builder_->createBinOp(spv::OpFAdd, value_type, linear, + linear_integer_term); + builder_->addDecoration(linear, spv::DecorationNoContraction); + // linear *= 1.0f / 1023.0f + linear = builder_->createBinOp(value_times_scalar_opcode, value_type, linear, + builder_->makeFloatConstant(1.0f / 1023.0f)); + builder_->addDecoration(linear, spv::DecorationNoContraction); + return linear; +} + +spv::Id SpirvShaderTranslator::LinearToPWLGamma(spv::Id linear, + bool linear_pre_saturated) { + spv::Id value_type = builder_->getTypeId(linear); + assert_true(builder_->isFloatType(builder_->getScalarTypeId(value_type))); + bool is_vector = builder_->isVectorType(value_type); + assert_true(is_vector || builder_->isFloatType(value_type)); + int num_components = builder_->getNumTypeComponents(value_type); + assert_true(num_components < 4); + spv::Id bool_type = type_bool_vectors_[num_components - 1]; + + spv::Id const_vector_0 = const_float_vectors_0_[num_components - 1]; + spv::Id const_vector_1 = SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1.0f), value_type); + + if (!linear_pre_saturated) { + // Saturate, flushing NaN to 0. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(linear); + id_vector_temp_.push_back(const_vector_0); + id_vector_temp_.push_back(const_vector_1); + linear = builder_->createBuiltinCall(value_type, ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_); + } + + spv::Id is_piece_at_least_3 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, linear, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(512.0f / 1023.0f), value_type)); + spv::Id scale_3_or_2 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_3, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1023.0f / 8.0f), value_type), + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1023.0f / 4.0f), value_type)); + spv::Id offset_3_or_2 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_3, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(128.0f / 255.0f), value_type), + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(64.0f / 255.0f), value_type)); + + spv::Id is_piece_at_least_1 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, linear, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(64.0f / 1023.0f), value_type)); + spv::Id scale_1_or_0 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_1, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1023.0f / 2.0f), value_type), + SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(1023.0f), + value_type)); + spv::Id offset_1_or_0 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_1, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(32.0f / 255.0f), value_type), + const_vector_0); + + spv::Id is_piece_at_least_2 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, linear, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(128.0f / 1023.0f), value_type)); + spv::Id scale = + builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, + scale_3_or_2, scale_1_or_0); + spv::Id offset = + builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, + offset_3_or_2, offset_1_or_0); + + // gamma = trunc(linear * scale) * (1.0f / 255.0f) + offset + spv::Id gamma = builder_->createBinOp(spv::OpFMul, value_type, linear, scale); + builder_->addDecoration(gamma, spv::DecorationNoContraction); + id_vector_temp_.clear(); + id_vector_temp_.push_back(gamma); + gamma = builder_->createBuiltinCall(value_type, ext_inst_glsl_std_450_, + GLSLstd450Trunc, id_vector_temp_); + gamma = builder_->createBinOp( + is_vector ? spv::OpVectorTimesScalar : spv::OpFMul, value_type, gamma, + builder_->makeFloatConstant(1.0f / 255.0f)); + builder_->addDecoration(gamma, spv::DecorationNoContraction); + gamma = builder_->createBinOp(spv::OpFAdd, value_type, gamma, offset); + builder_->addDecoration(gamma, spv::DecorationNoContraction); + return gamma; } } // namespace gpu diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 6c3c86526..aca23efe5 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,91 +10,274 @@ #ifndef XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ #define XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ +#include +#include #include #include -#include +#include #include -#include "third_party/glslang-spirv/SpvBuilder.h" -#include "third_party/spirv/GLSL.std.450.hpp11" +#include "third_party/glslang/SPIRV/SpvBuilder.h" #include "xenia/gpu/shader_translator.h" -#include "xenia/ui/spirv/spirv_disassembler.h" -#include "xenia/ui/spirv/spirv_validator.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { namespace gpu { -// Push constants embedded within the command buffer. -// The total size of this struct must be <= 128b (as that's the commonly -// supported size). -struct SpirvPushConstants { - // Accessible to vertex shader only: - float window_scale[4]; // scale x/y, offset x/y (pixels) - float vtx_fmt[4]; - - // Accessible to geometry shader only: - float point_size[4]; // psx, psy, unused, unused - - // Accessible to fragment shader only: - float alpha_test[4]; // alpha test enable, func, ref - float color_exp_bias[4]; - uint32_t ps_param_gen; -}; -static_assert(sizeof(SpirvPushConstants) <= 128, - "Push constants must fit <= 128b"); -constexpr uint32_t kSpirvPushConstantVertexRangeOffset = 0; -constexpr uint32_t kSpirvPushConstantVertexRangeSize = (sizeof(float) * 4) * 2; -constexpr uint32_t kSpirvPushConstantGeometryRangeOffset = - kSpirvPushConstantVertexRangeOffset + kSpirvPushConstantVertexRangeSize; -constexpr uint32_t kSpirvPushConstantGeometryRangeSize = (sizeof(float) * 4); -constexpr uint32_t kSpirvPushConstantFragmentRangeOffset = - kSpirvPushConstantGeometryRangeOffset + kSpirvPushConstantGeometryRangeSize; -constexpr uint32_t kSpirvPushConstantFragmentRangeSize = - (sizeof(float) * 4) + sizeof(uint32_t); -constexpr uint32_t kSpirvPushConstantsSize = sizeof(SpirvPushConstants); - class SpirvShaderTranslator : public ShaderTranslator { public: - SpirvShaderTranslator(); - ~SpirvShaderTranslator() override; + union Modification { + // If anything in this is structure is changed in a way not compatible with + // the previous layout, invalidate the pipeline storages by increasing this + // version number (0xYYYYMMDD)! + // TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid + // prototyping stage (easier to do small granular updates with an + // incremental counter). + static constexpr uint32_t kVersion = 4; + + enum class DepthStencilMode : uint32_t { + kNoModifiers, + // Early fragment tests - enable if alpha test and alpha to coverage are + // disabled; ignored if anything in the shader blocks early Z writing. + kEarlyHint, + // TODO(Triang3l): Unorm24 (rounding) and float24 (truncating and + // rounding) output modes. + }; + + struct { + // Dynamically indexable register count from SQ_PROGRAM_CNTL. + uint32_t dynamic_addressable_register_count : 8; + // Pipeline stage and input configuration. + Shader::HostVertexShaderType host_vertex_shader_type + : Shader::kHostVertexShaderTypeBitCount; + } vertex; + struct PixelShaderModification { + // Dynamically indexable register count from SQ_PROGRAM_CNTL. + uint32_t dynamic_addressable_register_count : 8; + uint32_t param_gen_enable : 1; + uint32_t param_gen_interpolator : 4; + // If param_gen_enable is set, this must be set for point primitives, and + // must not be set for other primitive types - enables the point sprite + // coordinates input, and also effects the flag bits in PsParamGen. + uint32_t param_gen_point : 1; + // For host render targets - depth / stencil output mode. + DepthStencilMode depth_stencil_mode : 3; + } pixel; + uint64_t value = 0; + + Modification(uint64_t modification_value = 0) : value(modification_value) {} + }; + + enum : uint32_t { + kSysFlag_XYDividedByW_Shift, + kSysFlag_ZDividedByW_Shift, + kSysFlag_WNotReciprocal_Shift, + kSysFlag_PrimitivePolygonal_Shift, + kSysFlag_PrimitiveLine_Shift, + kSysFlag_AlphaPassIfLess_Shift, + kSysFlag_AlphaPassIfEqual_Shift, + kSysFlag_AlphaPassIfGreater_Shift, + kSysFlag_ConvertColor0ToGamma_Shift, + kSysFlag_ConvertColor1ToGamma_Shift, + kSysFlag_ConvertColor2ToGamma_Shift, + kSysFlag_ConvertColor3ToGamma_Shift, + + kSysFlag_Count, + + kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift, + kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift, + kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, + kSysFlag_PrimitivePolygonal = 1u << kSysFlag_PrimitivePolygonal_Shift, + kSysFlag_PrimitiveLine = 1u << kSysFlag_PrimitiveLine_Shift, + kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift, + kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift, + kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift, + kSysFlag_ConvertColor0ToGamma = 1u << kSysFlag_ConvertColor0ToGamma_Shift, + kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift, + kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift, + kSysFlag_ConvertColor3ToGamma = 1u << kSysFlag_ConvertColor3ToGamma_Shift, + }; + static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants"); + + // IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED: + // - SystemConstantIndex enum. + // - Structure members in BeginTranslation. + struct SystemConstants { + uint32_t flags; + xenos::Endian vertex_index_endian; + int32_t vertex_base_index; + uint32_t padding_vertex_base_index; + + float ndc_scale[3]; + uint32_t padding_ndc_scale; + + float ndc_offset[3]; + uint32_t padding_ndc_offset; + + // Each byte contains post-swizzle TextureSign values for each of the needed + // components of each of the 32 used texture fetch constants. + uint32_t texture_swizzled_signs[8]; + + // If the imageViewFormatSwizzle portability subset is not supported, the + // component swizzle (taking both guest and host swizzles into account) to + // apply to the result directly in the shader code. In each uint32_t, + // swizzles for 2 texture fetch constants (in bits 0:11 and 12:23). + uint32_t texture_swizzles[16]; + + float alpha_test_reference; + float padding_alpha_test_reference[3]; + + float color_exp_bias[4]; + }; + + // The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for + // maxStorageBufferRange it's 128 MB. These are the values of those limits on + // Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound, + // therefore SSBOs must only be used for shared memory - all other storage + // resources must be images or texel buffers. + enum DescriptorSet : uint32_t { + // According to the "Pipeline Layout Compatibility" section of the Vulkan + // specification: + // "Two pipeline layouts are defined to be "compatible for set N" if they + // were created with identically defined descriptor set layouts for sets + // zero through N, and if they were created with identical push constant + // ranges." + // "Place the least frequently changing descriptor sets near the start of + // the pipeline layout, and place the descriptor sets representing the most + // frequently changing resources near the end. When pipelines are switched, + // only the descriptor set bindings that have been invalidated will need to + // be updated and the remainder of the descriptor set bindings will remain + // in place." + // This is partially the reverse of the Direct3D 12's rule of placing the + // most frequently changed descriptor sets in the beginning. Here all + // descriptor sets with an immutable layout are placed first, in reverse + // frequency of changing, and sets that may be different for different + // pipeline states last. + + // Always the same descriptor set layouts for all pipeline layouts: + + // Never changed. + kDescriptorSetSharedMemoryAndEdram, + // Pretty rarely used and rarely changed - flow control constants. + kDescriptorSetBoolLoopConstants, + // May stay the same across many draws. + kDescriptorSetSystemConstants, + // Less frequently changed (per-material). + kDescriptorSetFloatConstantsPixel, + // Quite frequently changed (for one object drawn multiple times, for + // instance - may contain projection matrices). + kDescriptorSetFloatConstantsVertex, + // Very frequently changed, especially for UI draws, and for models drawn in + // multiple parts - contains vertex and texture fetch constants. + kDescriptorSetFetchConstants, + + // Mutable part of the pipeline layout: + kDescriptorSetMutableLayoutsStart, + + // Rarely used at all, but may be changed at an unpredictable rate when + // vertex textures are used. + kDescriptorSetSamplersVertex = kDescriptorSetMutableLayoutsStart, + kDescriptorSetTexturesVertex, + // Per-material textures. + kDescriptorSetSamplersPixel, + kDescriptorSetTexturesPixel, + kDescriptorSetCount, + }; + + // "Xenia Emulator Microcode Translator". + // https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79 + static constexpr uint32_t kSpirvMagicToolId = 26; + + struct Features { + explicit Features(const ui::vulkan::VulkanProvider& provider); + explicit Features(bool all = false); + unsigned int spirv_version; + uint32_t max_storage_buffer_range; + bool clip_distance; + bool cull_distance; + bool image_view_format_swizzle; + bool signed_zero_inf_nan_preserve_float32; + bool denorm_flush_to_zero_float32; + }; + SpirvShaderTranslator(const Features& features); - // Not storing anything else in modifications (as this shader translator is - // being replaced anyway). uint64_t GetDefaultVertexShaderModification( uint32_t dynamic_addressable_register_count, Shader::HostVertexShaderType host_vertex_shader_type = - Shader::HostVertexShaderType::kVertex) const override { - return dynamic_addressable_register_count; - } + Shader::HostVertexShaderType::kVertex) const override; uint64_t GetDefaultPixelShaderModification( - uint32_t dynamic_addressable_register_count) const override { - return dynamic_addressable_register_count; + uint32_t dynamic_addressable_register_count) const override; + + static constexpr uint32_t GetSharedMemoryStorageBufferCountLog2( + uint32_t max_storage_buffer_range) { + if (max_storage_buffer_range >= 512 * 1024 * 1024) { + return 0; + } + if (max_storage_buffer_range >= 256 * 1024 * 1024) { + return 1; + } + return 2; } + uint32_t GetSharedMemoryStorageBufferCountLog2() const { + return GetSharedMemoryStorageBufferCountLog2( + features_.max_storage_buffer_range); + } + + // Common functions useful not only for the translator, but also for EDRAM + // emulation via conventional render targets. + + // Converts the color value externally clamped to [0, 31.875] to 7e3 floating + // point, with zeros in bits 10:31, rounding to the nearest even. + static spv::Id PreClampedFloat32To7e3(spv::Builder& builder, + spv::Id f32_scalar, + spv::Id ext_inst_glsl_std_450); + // Same as PreClampedFloat32To7e3, but clamps the input to [0, 31.875]. + static spv::Id UnclampedFloat32To7e3(spv::Builder& builder, + spv::Id f32_scalar, + spv::Id ext_inst_glsl_std_450); + // Converts the 7e3 number in bits [f10_shift, f10_shift + 10) to a 32-bit + // float. + static spv::Id Float7e3To32(spv::Builder& builder, spv::Id f10_uint_scalar, + uint32_t f10_shift, bool result_as_uint, + spv::Id ext_inst_glsl_std_450); + // Converts the depth value externally clamped to the representable [0, 2) + // range to 20e4 floating point, with zeros in bits 24:31, rounding to the + // nearest even or towards zero. If remap_from_0_to_0_5 is true, it's assumed + // that 0...1 is pre-remapped to 0...0.5 in the input. + static spv::Id PreClampedDepthTo20e4(spv::Builder& builder, + spv::Id f32_scalar, + bool round_to_nearest_even, + bool remap_from_0_to_0_5, + spv::Id ext_inst_glsl_std_450); + // Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit + // float. + static spv::Id Depth20e4To32(spv::Builder& builder, spv::Id f24_uint_scalar, + uint32_t f24_shift, bool remap_to_0_to_0_5, + bool result_as_uint, + spv::Id ext_inst_glsl_std_450); protected: - uint32_t GetModificationRegisterCount() const override { - return uint32_t(current_translation().modification()); - } + void Reset() override; + + uint32_t GetModificationRegisterCount() const override; + void StartTranslation() override; + std::vector CompleteTranslation() override; + void PostTranslation() override; - void PreProcessControlFlowInstructions( - std::vector instrs) override; void ProcessLabel(uint32_t cf_index) override; - void ProcessControlFlowInstructionBegin(uint32_t cf_index) override; - void ProcessControlFlowInstructionEnd(uint32_t cf_index) override; - void ProcessControlFlowNopInstruction(uint32_t cf_index) override; + void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; void ProcessLoopStartInstruction( const ParsedLoopStartInstruction& instr) override; void ProcessLoopEndInstruction( const ParsedLoopEndInstruction& instr) override; - void ProcessCallInstruction(const ParsedCallInstruction& instr) override; - void ProcessReturnInstruction(const ParsedReturnInstruction& instr) override; void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; - void ProcessAllocInstruction(const ParsedAllocInstruction& instr) override; + void ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) override; void ProcessTextureFetchInstruction( @@ -102,99 +285,374 @@ class SpirvShaderTranslator : public ShaderTranslator { void ProcessAluInstruction(const ParsedAluInstruction& instr) override; private: - spv::Function* CreateCubeFunction(); + struct TextureBinding { + uint32_t fetch_constant; + // Stacked and 3D are separate TextureBindings. + xenos::FetchOpDimension dimension; + bool is_signed; - bool ProcessVectorAluOperation(const ParsedAluInstruction& instr, - bool& close_predicate_block); - bool ProcessScalarAluOperation(const ParsedAluInstruction& instr, - bool& close_predicate_block); - - spv::Id BitfieldExtract(spv::Id result_type, spv::Id base, bool is_signed, - uint32_t offset, uint32_t count); - spv::Id ConvertNormVar(spv::Id var, spv::Id result_type, uint32_t bits, - bool is_signed); - - // Creates a call to the given GLSL intrinsic. - spv::Id CreateGlslStd450InstructionCall(spv::Decoration precision, - spv::Id result_type, - spv::GLSLstd450 instruction_ordinal, - std::vector args); - - // Loads an operand into a value. - // The value returned will be in the form described in the operand (number of - // components, etc). - spv::Id LoadFromOperand(const InstructionOperand& op); - // Stores a value based on the specified result information. - // The value will be transformed into the appropriate form for the result and - // the proper components will be selected. - void StoreToResult(spv::Id source_value_id, const InstructionResult& result); - - xe::ui::spirv::SpirvDisassembler disassembler_; - xe::ui::spirv::SpirvValidator validator_; - - // True if there's an open predicated block - bool open_predicated_block_ = false; - bool predicated_block_cond_ = false; - spv::Block* predicated_block_end_ = nullptr; - - // Exec block conditional? - bool exec_cond_ = false; - spv::Block* exec_skip_block_ = nullptr; - - // TODO(benvanik): replace with something better, make reusable, etc. - std::unique_ptr builder_; - spv::Id glsl_std_450_instruction_set_ = 0; - - // Generated function - spv::Function* translated_main_ = nullptr; - spv::Function* cube_function_ = nullptr; - - // Types. - spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0, uint_type_ = 0; - spv::Id vec2_int_type_ = 0, vec2_uint_type_ = 0, vec3_int_type_ = 0; - spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; - spv::Id vec4_int_type_ = 0, vec4_uint_type_ = 0; - spv::Id vec2_bool_type_ = 0, vec3_bool_type_ = 0, vec4_bool_type_ = 0; - spv::Id image_2d_type_ = 0, image_3d_type_ = 0, image_cube_type_ = 0; - - // Constants. - spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0; - - // Array of AMD registers. - // These values are all pointers. - spv::Id registers_ptr_ = 0, registers_type_ = 0; - spv::Id consts_ = 0, a0_ = 0, p0_ = 0; - spv::Id aL_ = 0; // Loop index stack - .x is active loop - spv::Id loop_count_ = 0; // Loop counter stack - spv::Id ps_ = 0, pv_ = 0; // IDs of previous results - spv::Id pc_ = 0; // Program counter - spv::Id lod_ = 0; // LOD register - spv::Id pos_ = 0; - spv::Id push_consts_ = 0; - spv::Id interpolators_ = 0; - spv::Id point_size_ = 0; - spv::Id point_coord_ = 0; - spv::Id vertex_idx_ = 0; - spv::Id frag_outputs_ = 0, frag_depth_ = 0; - spv::Id samplers_ = 0; - spv::Id tex_[3] = {0}; // Images {2D, 3D, Cube} - std::unordered_map tex_binding_map_; - spv::Id vtx_ = 0; // Vertex buffer array (32 runtime arrays) - std::unordered_map vtx_binding_map_; - - // SPIR-V IDs that are part of the in/out interface. - std::vector interface_ids_; - - struct CFBlock { - spv::Block* block = nullptr; - bool labelled = false; + spv::Id variable; }; - std::vector cf_blocks_; - spv::Block* switch_break_block_ = nullptr; - spv::Block* loop_head_block_ = nullptr; - spv::Block* loop_body_block_ = nullptr; - spv::Block* loop_cont_block_ = nullptr; - spv::Block* loop_exit_block_ = nullptr; + + struct SamplerBinding { + uint32_t fetch_constant; + xenos::TextureFilter mag_filter; + xenos::TextureFilter min_filter; + xenos::TextureFilter mip_filter; + xenos::AnisoFilter aniso_filter; + + spv::Id variable; + }; + + // Builder helpers. + spv::Id SpirvSmearScalarResultOrConstant(spv::Id scalar, spv::Id vector_type); + void SpirvCreateSelectionMerge( + spv::Id merge_block_id, spv::SelectionControlMask selection_control_mask = + spv::SelectionControlMaskNone) { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(merge_block_id); + selection_merge_op->addImmediateOperand(selection_control_mask); + builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op)); + } + + Modification GetSpirvShaderModification() const { + return Modification(current_translation().modification()); + } + + bool IsSpirvVertexShader() const { + return is_vertex_shader() && + !Shader::IsHostVertexShaderTypeDomain( + GetSpirvShaderModification().vertex.host_vertex_shader_type); + } + bool IsSpirvTessEvalShader() const { + return is_vertex_shader() && + Shader::IsHostVertexShaderTypeDomain( + GetSpirvShaderModification().vertex.host_vertex_shader_type); + } + + bool IsExecutionModeEarlyFragmentTests() const { + // TODO(Triang3l): Not applicable to fragment shader interlock. + return is_pixel_shader() && + GetSpirvShaderModification().pixel.depth_stencil_mode == + Modification::DepthStencilMode::kEarlyHint && + current_shader().implicit_early_z_write_allowed(); + } + + // Returns UINT32_MAX if PsParamGen doesn't need to be written. + uint32_t GetPsParamGenInterpolator() const; + + // Must be called before emitting any SPIR-V operations that must be in a + // block in translator callbacks to ensure that if the last instruction added + // was something like OpBranch - in this case, an unreachable block is + // created. + void EnsureBuildPointAvailable(); + + void StartVertexOrTessEvalShaderBeforeMain(); + void StartVertexOrTessEvalShaderInMain(); + void CompleteVertexOrTessEvalShaderInMain(); + + void StartFragmentShaderBeforeMain(); + void StartFragmentShaderInMain(); + void CompleteFragmentShaderInMain(); + + // Updates the current flow control condition (to be called in the beginning + // of exec and in jumps), closing the previous conditionals if needed. + // However, if the condition is not different, the instruction-level predicate + // conditional also won't be closed - this must be checked separately if + // needed (for example, in jumps). + void UpdateExecConditionals(ParsedExecInstruction::Type type, + uint32_t bool_constant_index, bool condition); + // Opens or reopens the predicate check conditional for the instruction. + // Should be called before processing a non-control-flow instruction. + void UpdateInstructionPredication(bool predicated, bool condition); + // Closes the instruction-level predicate conditional if it's open, useful if + // a control flow instruction needs to do some code which needs to respect the + // current exec conditional, but can't itself be predicated. + void CloseInstructionPredication(); + // Closes conditionals opened by exec and instructions within them (but not by + // labels) and updates the state accordingly. + void CloseExecConditionals(); + + spv::Id GetStorageAddressingIndex( + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index, + bool is_float_constant = false); + // Loads unswizzled operand without sign modifiers as float4. + spv::Id LoadOperandStorage(const InstructionOperand& operand); + spv::Id ApplyOperandModifiers(spv::Id operand_value, + const InstructionOperand& original_operand, + bool invert_negate = false, + bool force_absolute = false); + // Returns the requested components, with the operand's swizzle applied, in a + // condensed form, but without negation / absolute value modifiers. The + // storage is float4, no matter what the component count of original_operand + // is (the storage will be either r# or c#, but the instruction may be + // scalar). + spv::Id GetUnmodifiedOperandComponents( + spv::Id operand_storage, const InstructionOperand& original_operand, + uint32_t components); + spv::Id GetOperandComponents(spv::Id operand_storage, + const InstructionOperand& original_operand, + uint32_t components, bool invert_negate = false, + bool force_absolute = false) { + return ApplyOperandModifiers( + GetUnmodifiedOperandComponents(operand_storage, original_operand, + components), + original_operand, invert_negate, force_absolute); + } + // If components are identical, the same Id will be written to both outputs. + void GetOperandScalarXY(spv::Id operand_storage, + const InstructionOperand& original_operand, + spv::Id& a_out, spv::Id& b_out, + bool invert_negate = false, + bool force_absolute = false); + // Gets the absolute value of the loaded operand if it's not absolute already. + spv::Id GetAbsoluteOperand(spv::Id operand_storage, + const InstructionOperand& original_operand); + // The type of the value must be a float vector consisting of + // xe::bit_count(result.GetUsedResultComponents()) elements, or (to replicate + // a scalar into all used components) float, or the value can be spv::NoResult + // if there's no result to store (like constants only). + void StoreResult(const InstructionResult& result, spv::Id value); + + // For Shader Model 3 multiplication (+-0 or denormal * anything = +0), + // replaces the value with +0 if the minimum of the two operands is 0. This + // must be called with absolute values of operands - use GetAbsoluteOperand! + spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs, + spv::Id operand_1_abs); + // Return type is a xe::bit_count(result.GetUsedResultComponents())-component + // float vector or a single float, depending on whether it's a reduction + // instruction (check getTypeId of the result), or returns spv::NoResult if + // nothing to store. + spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr, + bool& predicate_written); + // Returns a float value to write to the previous scalar register and to the + // destination. If the return value is ps itself (in the retain_prev case), + // returns spv::NoResult (handled as a special case, so if it's retain_prev, + // but don't need to write to anywhere, no OpLoad(ps) will be done). + spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr, + bool& predicate_written); + + // Perform endian swap of a uint scalar or vector. + spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian); + + spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int); + + // The source may be a floating-point scalar or a vector. + spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated); + spv::Id LinearToPWLGamma(spv::Id linear, bool linear_pre_saturated); + + size_t FindOrAddTextureBinding(uint32_t fetch_constant, + xenos::FetchOpDimension dimension, + bool is_signed); + size_t FindOrAddSamplerBinding(uint32_t fetch_constant, + xenos::TextureFilter mag_filter, + xenos::TextureFilter min_filter, + xenos::TextureFilter mip_filter, + xenos::AnisoFilter aniso_filter); + // `texture_parameters` need to be set up except for `sampler`, which will be + // set internally, optionally doing linear interpolation between the an + // existing value and the new one (the result location may be the same as for + // the first lerp endpoint, but not across signedness). + void SampleTexture(spv::Builder::TextureParameters& texture_parameters, + spv::ImageOperandsMask image_operands_mask, + spv::Id image_unsigned, spv::Id image_signed, + spv::Id sampler, spv::Id is_all_signed, + spv::Id is_any_signed, spv::Id& result_unsigned_out, + spv::Id& result_signed_out, + spv::Id lerp_factor = spv::NoResult, + spv::Id lerp_first_unsigned = spv::NoResult, + spv::Id lerp_first_signed = spv::NoResult); + // `texture_parameters` need to be set up except for `sampler`, which will be + // set internally. + spv::Id QueryTextureLod(spv::Builder::TextureParameters& texture_parameters, + spv::Id image_unsigned, spv::Id image_signed, + spv::Id sampler, spv::Id is_all_signed); + + Features features_; + + std::unique_ptr builder_; + + std::vector id_vector_temp_; + // For helper functions like operand loading, so they don't conflict with + // id_vector_temp_ usage in bigger callbacks. + std::vector id_vector_temp_util_; + std::vector uint_vector_temp_; + std::vector uint_vector_temp_util_; + + spv::Id ext_inst_glsl_std_450_; + + spv::Id type_void_; + + union { + struct { + spv::Id type_bool_; + spv::Id type_bool2_; + spv::Id type_bool3_; + spv::Id type_bool4_; + }; + // Index = component count - 1. + spv::Id type_bool_vectors_[4]; + }; + union { + struct { + spv::Id type_int_; + spv::Id type_int2_; + spv::Id type_int3_; + spv::Id type_int4_; + }; + spv::Id type_int_vectors_[4]; + }; + union { + struct { + spv::Id type_uint_; + spv::Id type_uint2_; + spv::Id type_uint3_; + spv::Id type_uint4_; + }; + spv::Id type_uint_vectors_[4]; + }; + union { + struct { + spv::Id type_float_; + spv::Id type_float2_; + spv::Id type_float3_; + spv::Id type_float4_; + }; + spv::Id type_float_vectors_[4]; + }; + + spv::Id const_int_0_; + spv::Id const_int4_0_; + spv::Id const_uint_0_; + spv::Id const_uint4_0_; + union { + struct { + spv::Id const_float_0_; + spv::Id const_float2_0_; + spv::Id const_float3_0_; + spv::Id const_float4_0_; + }; + spv::Id const_float_vectors_0_[4]; + }; + union { + struct { + spv::Id const_float_1_; + spv::Id const_float2_1_; + spv::Id const_float3_1_; + spv::Id const_float4_1_; + }; + spv::Id const_float_vectors_1_[4]; + }; + // vec2(0.0, 1.0), to arbitrarily VectorShuffle non-constant and constant + // components. + spv::Id const_float2_0_1_; + + enum SystemConstantIndex : unsigned int { + kSystemConstantFlags, + kSystemConstantIndexVertexIndexEndian, + kSystemConstantIndexVertexBaseIndex, + kSystemConstantNdcScale, + kSystemConstantNdcOffset, + kSystemConstantTextureSwizzledSigns, + kSystemConstantTextureSwizzles, + kSystemConstantAlphaTestReference, + kSystemConstantColorExpBias, + }; + spv::Id uniform_system_constants_; + spv::Id uniform_float_constants_; + spv::Id uniform_bool_loop_constants_; + spv::Id uniform_fetch_constants_; + + spv::Id buffers_shared_memory_; + + // Not using combined images and samplers because + // maxPerStageDescriptorSamplers is often lower than + // maxPerStageDescriptorSampledImages, and for every fetch constant, there + // are, for regular fetches, two bindings (unsigned and signed). + std::vector texture_bindings_; + std::vector sampler_bindings_; + + // VS as VS only - int. + spv::Id input_vertex_index_; + // VS as TES only - int. + spv::Id input_primitive_id_; + // PS, only when needed - float4. + spv::Id input_fragment_coord_; + // PS, only when needed - bool. + spv::Id input_front_facing_; + + // In vertex or tessellation evaluation shaders - outputs, always + // xenos::kMaxInterpolators. + // In pixel shaders - inputs, min(xenos::kMaxInterpolators, register_count()). + spv::Id input_output_interpolators_[xenos::kMaxInterpolators]; + static const std::string kInterpolatorNamePrefix; + + enum OutputPerVertexMember : unsigned int { + kOutputPerVertexMemberPosition, + kOutputPerVertexMemberCount, + }; + spv::Id output_per_vertex_; + + std::array output_fragment_data_; + + std::vector main_interface_; + spv::Function* function_main_; + spv::Id main_system_constant_flags_; + // bool. + spv::Id var_main_predicate_; + // uint4. + spv::Id var_main_loop_count_; + // int4. + spv::Id var_main_loop_address_; + // int. + spv::Id var_main_address_register_; + // float. + spv::Id var_main_previous_scalar_; + // `base + index * stride` in dwords from the last vfetch_full as it may be + // needed by vfetch_mini - int. + spv::Id var_main_vfetch_address_; + // float. + spv::Id var_main_tfetch_lod_; + // float3. + spv::Id var_main_tfetch_gradients_h_; + spv::Id var_main_tfetch_gradients_v_; + // float4[register_count()]. + spv::Id var_main_registers_; + // VS only - float3 (special exports). + spv::Id var_main_point_size_edge_flag_kill_vertex_; + spv::Block* main_loop_header_; + spv::Block* main_loop_continue_; + spv::Block* main_loop_merge_; + spv::Id main_loop_pc_next_; + spv::Block* main_switch_header_; + std::unique_ptr main_switch_op_; + spv::Block* main_switch_merge_; + std::vector main_switch_next_pc_phi_operands_; + + // If the exec bool constant / predicate conditional is open, block after it + // (not added to the function yet). + spv::Block* cf_exec_conditional_merge_; + // If the instruction-level predicate conditional is open, block after it (not + // added to the function yet). + spv::Block* cf_instruction_predicate_merge_; + // When cf_exec_conditional_merge_ is not null: + // If the current exec conditional is based on a bool constant: the number of + // the bool constant. + // If it's based on the predicate value: kCfExecBoolConstantPredicate. + uint32_t cf_exec_bool_constant_or_predicate_; + static constexpr uint32_t kCfExecBoolConstantPredicate = UINT32_MAX; + // When cf_exec_conditional_merge_ is not null, the expected bool constant or + // predicate value for the current exec conditional. + bool cf_exec_condition_; + // When cf_instruction_predicate_merge_ is not null, the expected predicate + // value for the current or the last instruction. + bool cf_instruction_predicate_condition_; + // Whether there was a `setp` in the current exec before the current + // instruction, thus instruction-level predicate value can be different than + // the exec-level predicate value, and can't merge two execs with the same + // predicate condition anymore. + bool cf_exec_predicate_written_; }; } // namespace gpu diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc new file mode 100644 index 000000000..9dfbccb09 --- /dev/null +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -0,0 +1,1448 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader_translator.h" + +#include +#include +#include +#include + +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/assert.h" +#include "xenia/base/math.h" + +namespace xe { +namespace gpu { + +spv::Id SpirvShaderTranslator::ZeroIfAnyOperandIsZero(spv::Id value, + spv::Id operand_0_abs, + spv::Id operand_1_abs) { + EnsureBuildPointAvailable(); + int num_components = builder_->getNumComponents(value); + assert_true(builder_->getNumComponents(operand_0_abs) == num_components); + assert_true(builder_->getNumComponents(operand_1_abs) == num_components); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(2); + id_vector_temp_util_.push_back(operand_0_abs); + id_vector_temp_util_.push_back(operand_1_abs); + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp( + spv::OpFOrdEqual, type_bool_vectors_[num_components - 1], + builder_->createBuiltinCall(type_float_vectors_[num_components - 1], + ext_inst_glsl_std_450_, GLSLstd450NMin, + id_vector_temp_util_), + const_float_vectors_0_[num_components - 1]), + const_float_vectors_0_[num_components - 1], value); +} + +void SpirvShaderTranslator::ProcessAluInstruction( + const ParsedAluInstruction& instr) { + if (instr.IsNop()) { + // Don't even disassemble or update predication. + return; + } + + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); + + // Floating-point arithmetic operations (addition, subtraction, negation, + // multiplication, division, modulo - see isArithmeticOperation in + // propagateNoContraction of glslang; though for some reason it's not applied + // to SPIR-V OpDot, at least in the February 16, 2020 version installed on + // http://shader-playground.timjones.io/) must have the NoContraction + // decoration to prevent reordering to make sure floating-point calculations + // are optimized predictably and exactly the same in different shaders to + // allow for multipass rendering (in addition to the Invariant decoration on + // outputs). + + // Whether the instruction has changed the predicate, and it needs to be + // checked again later. + bool predicate_written_vector = false; + spv::Id vector_result = + ProcessVectorAluOperation(instr, predicate_written_vector); + + bool predicate_written_scalar = false; + spv::Id scalar_result = + ProcessScalarAluOperation(instr, predicate_written_scalar); + if (scalar_result != spv::NoResult) { + EnsureBuildPointAvailable(); + builder_->createStore(scalar_result, var_main_previous_scalar_); + } else { + // Special retain_prev case - load ps only if needed and don't store the + // same value back to ps. + if (instr.scalar_result.GetUsedWriteMask()) { + EnsureBuildPointAvailable(); + scalar_result = + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); + } + } + + StoreResult(instr.vector_and_constant_result, vector_result); + StoreResult(instr.scalar_result, scalar_result); + + if (predicate_written_vector || predicate_written_scalar) { + cf_exec_predicate_written_ = true; + CloseInstructionPredication(); + } +} + +spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( + const ParsedAluInstruction& instr, bool& predicate_written) { + predicate_written = false; + + uint32_t used_result_components = + instr.vector_and_constant_result.GetUsedResultComponents(); + if (!used_result_components && + !ucode::GetAluVectorOpcodeInfo(instr.vector_opcode).changed_state) { + return spv::NoResult; + } + uint32_t used_result_component_count = xe::bit_count(used_result_components); + + // Load operand storage without swizzle and sign modifiers. + // A small shortcut, operands of cube are the same, but swizzled. + uint32_t operand_count; + if (instr.vector_opcode == ucode::AluVectorOpcode::kCube) { + operand_count = 1; + } else { + operand_count = instr.vector_operand_count; + } + spv::Id operand_storage[3] = {}; + for (uint32_t i = 0; i < operand_count; ++i) { + operand_storage[i] = LoadOperandStorage(instr.vector_operands[i]); + } + spv::Id result_type = + used_result_component_count + ? type_float_vectors_[used_result_component_count - 1] + : spv::NoType; + + // In case the paired scalar instruction (if processed first) terminates the + // block (like via OpKill). + EnsureBuildPointAvailable(); + + // Lookup table for variants of instructions with similar structure. + static const unsigned int kOps[] = { + static_cast(spv::OpNop), // kAdd + static_cast(spv::OpNop), // kMul + static_cast(spv::OpFOrdGreaterThanEqual), // kMax + static_cast(spv::OpFOrdLessThan), // kMin + static_cast(spv::OpFOrdEqual), // kSeq + static_cast(spv::OpFOrdGreaterThan), // kSgt + static_cast(spv::OpFOrdGreaterThanEqual), // kSge + static_cast(spv::OpFUnordNotEqual), // kSne + static_cast(GLSLstd450Fract), // kFrc + static_cast(GLSLstd450Trunc), // kTrunc + static_cast(GLSLstd450Floor), // kFloor + static_cast(spv::OpNop), // kMad + static_cast(spv::OpFOrdEqual), // kCndEq + static_cast(spv::OpFOrdGreaterThanEqual), // kCndGe + static_cast(spv::OpFOrdGreaterThan), // kCndGt + static_cast(spv::OpNop), // kDp4 + static_cast(spv::OpNop), // kDp3 + static_cast(spv::OpNop), // kDp2Add + static_cast(spv::OpNop), // kCube + static_cast(spv::OpNop), // kMax4 + static_cast(spv::OpFOrdEqual), // kSetpEqPush + static_cast(spv::OpFUnordNotEqual), // kSetpNePush + static_cast(spv::OpFOrdGreaterThan), // kSetpGtPush + static_cast(spv::OpFOrdGreaterThanEqual), // kSetpGePush + static_cast(spv::OpFOrdEqual), // kKillEq + static_cast(spv::OpFOrdGreaterThan), // kKillGt + static_cast(spv::OpFOrdGreaterThanEqual), // kKillGe + static_cast(spv::OpFUnordNotEqual), // kKillNe + static_cast(spv::OpNop), // kDst + static_cast(spv::OpFOrdGreaterThanEqual), // kMaxA + }; + + switch (instr.vector_opcode) { + case ucode::AluVectorOpcode::kAdd: { + spv::Id result = builder_->createBinOp( + spv::OpFAdd, result_type, + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + used_result_components), + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + used_result_components)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + case ucode::AluVectorOpcode::kMul: + case ucode::AluVectorOpcode::kMad: { + spv::Id multiplicands[2]; + for (uint32_t i = 0; i < 2; ++i) { + multiplicands[i] = + GetOperandComponents(operand_storage[i], instr.vector_operands[i], + used_result_components); + } + spv::Id result = builder_->createBinOp( + spv::OpFMul, result_type, multiplicands[0], multiplicands[1]); + builder_->addDecoration(result, spv::DecorationNoContraction); + uint32_t multiplicands_different = + used_result_components & + ~instr.vector_operands[0].GetIdenticalComponents( + instr.vector_operands[1]); + if (multiplicands_different) { + // Shader Model 3: +0 or denormal * anything = +-0. + spv::Id different_operands[2] = {multiplicands[0], multiplicands[1]}; + spv::Id different_result = result; + uint32_t different_count = xe::bit_count(multiplicands_different); + spv::Id different_type = type_float_vectors_[different_count - 1]; + // Extract the different components, if not all are different. + if (multiplicands_different != used_result_components) { + uint_vector_temp_.clear(); + uint_vector_temp_.reserve(different_count); + uint32_t components_remaining = used_result_components; + for (uint32_t i = 0; i < used_result_component_count; ++i) { + uint32_t component; + xe::bit_scan_forward(components_remaining, &component); + components_remaining &= ~(uint32_t(1) << component); + if (multiplicands_different & (1 << component)) { + uint_vector_temp_.push_back(i); + } + } + assert_true(uint_vector_temp_.size() == different_count); + if (different_count > 1) { + for (uint32_t i = 0; i < 2; ++i) { + different_operands[i] = builder_->createRvalueSwizzle( + spv::NoPrecision, different_type, different_operands[i], + uint_vector_temp_); + } + different_result = builder_->createRvalueSwizzle( + spv::NoPrecision, different_type, different_result, + uint_vector_temp_); + } else { + for (uint32_t i = 0; i < 2; ++i) { + different_operands[i] = builder_->createCompositeExtract( + different_operands[i], different_type, uint_vector_temp_[0]); + } + different_result = builder_->createCompositeExtract( + different_result, different_type, uint_vector_temp_[0]); + } + } + // Check if the different components in any of the operands are zero, + // even if the other is NaN - if min(|a|, |b|) is 0. + for (uint32_t i = 0; i < 2; ++i) { + different_operands[i] = GetAbsoluteOperand(different_operands[i], + instr.vector_operands[i]); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(different_operands[0]); + id_vector_temp_.push_back(different_operands[1]); + spv::Id different_abs_min = + builder_->createBuiltinCall(different_type, ext_inst_glsl_std_450_, + GLSLstd450NMin, id_vector_temp_); + spv::Id different_zero = builder_->createBinOp( + spv::OpFOrdEqual, type_bool_vectors_[different_count - 1], + different_abs_min, const_float_vectors_0_[different_count - 1]); + // Replace with +0. + different_result = builder_->createTriOp( + spv::OpSelect, different_type, different_zero, + const_float_vectors_0_[different_count - 1], different_result); + // Insert the different components back to the result. + if (multiplicands_different != used_result_components) { + if (different_count > 1) { + std::unique_ptr shuffle_op = + std::make_unique( + builder_->getUniqueId(), result_type, spv::OpVectorShuffle); + shuffle_op->addIdOperand(result); + shuffle_op->addIdOperand(different_result); + uint32_t components_remaining = used_result_components; + unsigned int different_shuffle_index = used_result_component_count; + for (uint32_t i = 0; i < used_result_component_count; ++i) { + uint32_t component; + xe::bit_scan_forward(components_remaining, &component); + components_remaining &= ~(uint32_t(1) << component); + shuffle_op->addImmediateOperand( + (multiplicands_different & (1 << component)) + ? different_shuffle_index++ + : i); + } + result = shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } else { + result = builder_->createCompositeInsert( + different_result, result, result_type, + xe::bit_count(used_result_components & + (multiplicands_different - 1))); + } + } else { + result = different_result; + } + } + if (instr.vector_opcode == ucode::AluVectorOpcode::kMad) { + // Not replacing true `0 + term` with conditional selection of the term + // because +0 + -0 should result in +0, not -0. + result = builder_->createBinOp( + spv::OpFAdd, result_type, result, + GetOperandComponents(operand_storage[2], instr.vector_operands[2], + used_result_components)); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + return result; + } + + case ucode::AluVectorOpcode::kMax: + case ucode::AluVectorOpcode::kMin: + case ucode::AluVectorOpcode::kMaxA: { + bool is_maxa = instr.vector_opcode == ucode::AluVectorOpcode::kMaxA; + spv::Id operand_0 = GetOperandComponents( + operand_storage[0], instr.vector_operands[0], + used_result_components | (is_maxa ? 0b1000 : 0b0000)); + spv::Id maxa_operand_0_w = spv::NoResult; + if (is_maxa) { + // a0 = (int)clamp(floor(src0.w + 0.5), -256.0, 255.0) + int operand_0_num_components = builder_->getNumComponents(operand_0); + if (operand_0_num_components > 1) { + maxa_operand_0_w = builder_->createCompositeExtract( + operand_0, type_float_, + static_cast(operand_0_num_components - 1)); + } else { + maxa_operand_0_w = operand_0; + } + spv::Id maxa_address = + builder_->createBinOp(spv::OpFAdd, type_float_, maxa_operand_0_w, + builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(maxa_address, spv::DecorationNoContraction); + id_vector_temp_.clear(); + id_vector_temp_.push_back(maxa_address); + maxa_address = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(maxa_address); + id_vector_temp_.push_back(builder_->makeFloatConstant(-256.0f)); + id_vector_temp_.push_back(builder_->makeFloatConstant(255.0f)); + builder_->createStore( + builder_->createUnaryOp( + spv::OpConvertFToS, type_int_, + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_)), + var_main_address_register_); + } + if (!used_result_components) { + // maxa returning nothing - can't load src1. + return spv::NoResult; + } + // max is commonly used as mov. + uint32_t identical = instr.vector_operands[0].GetIdenticalComponents( + instr.vector_operands[1]) & + used_result_components; + spv::Id operand_0_per_component; + if (is_maxa && !(used_result_components & 0b1000) && + (identical == used_result_components || !identical)) { + // operand_0 and operand_1 have different lengths though if src0.w is + // forced without W being in the write mask for maxa purposes - + // shuffle/extract the needed part if src0.w is only needed for setting + // a0. + // This is only needed for cases without mixed identical and different + // components - the mixed case uses CompositeExtract, which works fine. + if (used_result_component_count > 1) { + // Need all but the last (W) element of operand_0 as a vector. + uint_vector_temp_.clear(); + uint_vector_temp_.reserve(used_result_component_count); + for (unsigned int i = 0; i < used_result_component_count; ++i) { + uint_vector_temp_.push_back(i); + } + operand_0_per_component = builder_->createRvalueSwizzle( + spv::NoPrecision, + type_float_vectors_[used_result_component_count - 1], operand_0, + uint_vector_temp_); + } else { + // Need the non-W component as scalar. + operand_0_per_component = + builder_->createCompositeExtract(operand_0, type_float_, 0); + } + } else { + operand_0_per_component = operand_0; + } + if (identical == used_result_components) { + // All components are identical - mov (with the correct length in case + // of maxa). Don't access operand_1 at all in this case (operand_0 is + // already accessed for W in case of maxa). + assert_true(builder_->getNumComponents(operand_0_per_component) == + used_result_component_count); + return operand_0_per_component; + } + spv::Id operand_1 = GetOperandComponents( + operand_storage[1], instr.vector_operands[1], used_result_components); + // Shader Model 3 NaN behavior (a op b ? a : b, not SPIR-V FMax/FMin which + // are undefined for NaN or NMax/NMin which return the non-NaN operand). + spv::Op op = spv::Op(kOps[size_t(instr.vector_opcode)]); + if (!identical) { + // All components are different - max/min of the scalars or the entire + // vectors (with the correct length in case of maxa). + assert_true(builder_->getNumComponents(operand_0_per_component) == + used_result_component_count); + return builder_->createTriOp( + spv::OpSelect, result_type, + builder_->createBinOp( + op, type_bool_vectors_[used_result_component_count - 1], + operand_0_per_component, operand_1), + operand_0_per_component, operand_1); + } + // Mixed identical and different components. + assert_true(used_result_component_count > 1); + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_result_component_count); + uint32_t components_remaining = used_result_components; + for (uint32_t i = 0; i < used_result_component_count; ++i) { + // Composite extraction of operand_0[i] works fine even it's maxa with + // src0.w forced without W being in the write mask - src0.w would be the + // last, so all indices before it are still valid. Don't extract twice + // if already extracted though. + spv::Id result_component = + ((used_result_components & 0b1000) && + i + 1 >= used_result_component_count && + maxa_operand_0_w != spv::NoResult) + ? maxa_operand_0_w + : builder_->createCompositeExtract(operand_0, type_float_, i); + uint32_t component_index; + xe::bit_scan_forward(components_remaining, &component_index); + components_remaining &= ~(uint32_t(1) << component_index); + if (!(identical & (1 << component_index))) { + spv::Id operand_1_component = + builder_->createCompositeExtract(operand_1, type_float_, i); + result_component = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(op, type_bool_, result_component, + operand_1_component), + result_component, operand_1_component); + } + id_vector_temp_.push_back(result_component); + } + return builder_->createCompositeConstruct(result_type, id_vector_temp_); + } + + case ucode::AluVectorOpcode::kSeq: + case ucode::AluVectorOpcode::kSgt: + case ucode::AluVectorOpcode::kSge: + case ucode::AluVectorOpcode::kSne: + return builder_->createTriOp( + spv::OpSelect, result_type, + builder_->createBinOp( + spv::Op(kOps[size_t(instr.vector_opcode)]), + type_bool_vectors_[used_result_component_count - 1], + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + used_result_components), + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + used_result_components)), + const_float_vectors_1_[used_result_component_count - 1], + const_float_vectors_0_[used_result_component_count - 1]); + + case ucode::AluVectorOpcode::kFrc: + case ucode::AluVectorOpcode::kTrunc: + case ucode::AluVectorOpcode::kFloor: + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents(operand_storage[0], + instr.vector_operands[0], + used_result_components)); + return builder_->createBuiltinCall( + result_type, ext_inst_glsl_std_450_, + GLSLstd450(kOps[size_t(instr.vector_opcode)]), id_vector_temp_); + + case ucode::AluVectorOpcode::kCndEq: + case ucode::AluVectorOpcode::kCndGe: + case ucode::AluVectorOpcode::kCndGt: + return builder_->createTriOp( + spv::OpSelect, result_type, + builder_->createBinOp( + spv::Op(kOps[size_t(instr.vector_opcode)]), + type_bool_vectors_[used_result_component_count - 1], + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + used_result_components), + const_float_vectors_0_[used_result_component_count - 1]), + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + used_result_components), + GetOperandComponents(operand_storage[2], instr.vector_operands[2], + used_result_components)); + + case ucode::AluVectorOpcode::kDp4: + case ucode::AluVectorOpcode::kDp3: + case ucode::AluVectorOpcode::kDp2Add: { + // Not using OpDot for predictable optimization (especially addition + // order) and NoContraction (which, for some reason, isn't placed on dot + // in glslang as of the February 16, 2020 version). + uint32_t component_count; + if (instr.vector_opcode == ucode::AluVectorOpcode::kDp2Add) { + component_count = 2; + } else if (instr.vector_opcode == ucode::AluVectorOpcode::kDp3) { + component_count = 3; + } else { + component_count = 4; + } + uint32_t component_mask = (1 << component_count) - 1; + spv::Id operands[2]; + for (uint32_t i = 0; i < 2; ++i) { + operands[i] = GetOperandComponents( + operand_storage[i], instr.vector_operands[i], component_mask); + } + uint32_t different = + component_mask & ~instr.vector_operands[0].GetIdenticalComponents( + instr.vector_operands[1]); + spv::Id result = spv::NoResult; + for (uint32_t i = 0; i < component_count; ++i) { + spv::Id operand_components[2]; + for (unsigned int j = 0; j < 2; ++j) { + operand_components[j] = + builder_->createCompositeExtract(operands[j], type_float_, i); + } + spv::Id product = + builder_->createBinOp(spv::OpFMul, type_float_, + operand_components[0], operand_components[1]); + builder_->addDecoration(product, spv::DecorationNoContraction); + if (different & (1 << i)) { + // Shader Model 3: +0 or denormal * anything = +-0. + product = ZeroIfAnyOperandIsZero( + product, + GetAbsoluteOperand(operand_components[0], + instr.vector_operands[0]), + GetAbsoluteOperand(operand_components[1], + instr.vector_operands[1])); + } + if (!i) { + result = product; + continue; + } + result = + builder_->createBinOp(spv::OpFAdd, type_float_, result, product); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + if (instr.vector_opcode == ucode::AluVectorOpcode::kDp2Add) { + result = builder_->createBinOp( + spv::OpFAdd, type_float_, result, + GetOperandComponents(operand_storage[2], instr.vector_operands[2], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + return result; + } + + case ucode::AluVectorOpcode::kCube: { + // operands[0] is .z_xy. + // Result is T coordinate, S coordinate, 2 * major axis, face ID. + // Skipping the second component of the operand, so 120, not 230. + spv::Id operand_vector = GetOperandComponents( + operand_storage[0], instr.vector_operands[0], 0b1101); + // Remapped from ZXY (Z_XY without the skipped component) to XYZ. + spv::Id operand[3]; + for (unsigned int i = 0; i < 3; ++i) { + operand[i] = builder_->createCompositeExtract(operand_vector, + type_float_, (i + 1) % 3); + } + spv::Id operand_abs[3]; + if (!instr.vector_operands[0].is_absolute_value || + instr.vector_operands[0].is_negated) { + for (unsigned int i = 0; i < 3; ++i) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(operand[i]); + operand_abs[i] = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_); + } + } else { + for (unsigned int i = 0; i < 3; ++i) { + operand_abs[i] = operand[i]; + } + } + spv::Id operand_neg[3] = {}; + if (used_result_components & 0b0001) { + operand_neg[1] = + builder_->createUnaryOp(spv::OpFNegate, type_float_, operand[1]); + builder_->addDecoration(operand_neg[1], spv::DecorationNoContraction); + } + if (used_result_components & 0b0010) { + operand_neg[0] = + builder_->createUnaryOp(spv::OpFNegate, type_float_, operand[0]); + builder_->addDecoration(operand_neg[0], spv::DecorationNoContraction); + operand_neg[2] = + builder_->createUnaryOp(spv::OpFNegate, type_float_, operand[2]); + builder_->addDecoration(operand_neg[2], spv::DecorationNoContraction); + } + + // Check if the major axis is Z (abs(z) >= abs(x) && abs(z) >= abs(y)). + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id ma_z_condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + operand_abs[2], operand_abs[0]), + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + operand_abs[2], operand_abs[1])); + spv::Function& function = builder_->getBuildPoint()->getParent(); + spv::Block& ma_z_block = builder_->makeNewBlock(); + spv::Block& ma_yx_block = builder_->makeNewBlock(); + spv::Block* ma_merge_block = + new spv::Block(builder_->getUniqueId(), function); + SpirvCreateSelectionMerge(ma_merge_block->getId()); + builder_->createConditionalBranch(ma_z_condition, &ma_z_block, + &ma_yx_block); + + builder_->setBuildPoint(&ma_z_block); + // The major axis is Z. + spv::Id ma_z_result[4] = {}; + // tc = -y + ma_z_result[0] = operand_neg[1]; + // ma/2 = z + ma_z_result[2] = operand[2]; + if (used_result_components & 0b1010) { + spv::Id z_is_neg = builder_->createBinOp( + spv::OpFOrdLessThan, type_bool_, operand[2], const_float_0_); + if (used_result_components & 0b0010) { + // sc = z < 0.0 ? -x : x + ma_z_result[1] = builder_->createTriOp( + spv::OpSelect, type_float_, z_is_neg, operand_neg[0], operand[0]); + } + if (used_result_components & 0b1000) { + // id = z < 0.0 ? 5.0 : 4.0 + ma_z_result[3] = + builder_->createTriOp(spv::OpSelect, type_float_, z_is_neg, + builder_->makeFloatConstant(5.0f), + builder_->makeFloatConstant(4.0f)); + } + } + builder_->createBranch(ma_merge_block); + + builder_->setBuildPoint(&ma_yx_block); + // The major axis is not Z - create an inner conditional to check if the + // major axis is Y (abs(y) >= abs(x)). + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id ma_y_condition = + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + operand_abs[1], operand_abs[0]); + spv::Block& ma_y_block = builder_->makeNewBlock(); + spv::Block& ma_x_block = builder_->makeNewBlock(); + spv::Block& ma_yx_merge_block = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(ma_yx_merge_block.getId()); + builder_->createConditionalBranch(ma_y_condition, &ma_y_block, + &ma_x_block); + + builder_->setBuildPoint(&ma_y_block); + // The major axis is Y. + spv::Id ma_y_result[4] = {}; + // sc = x + ma_y_result[1] = operand[0]; + // ma/2 = y + ma_y_result[2] = operand[1]; + if (used_result_components & 0b1001) { + spv::Id y_is_neg = builder_->createBinOp( + spv::OpFOrdLessThan, type_bool_, operand[1], const_float_0_); + if (used_result_components & 0b0001) { + // tc = y < 0.0 ? -z : z + ma_y_result[0] = builder_->createTriOp( + spv::OpSelect, type_float_, y_is_neg, operand_neg[2], operand[2]); + // id = y < 0.0 ? 3.0 : 2.0 + ma_y_result[3] = + builder_->createTriOp(spv::OpSelect, type_float_, y_is_neg, + builder_->makeFloatConstant(3.0f), + builder_->makeFloatConstant(2.0f)); + } + } + builder_->createBranch(&ma_yx_merge_block); + + builder_->setBuildPoint(&ma_x_block); + // The major axis is X. + spv::Id ma_x_result[4] = {}; + // tc = -y + ma_x_result[0] = operand_neg[1]; + // ma/2 = x + ma_x_result[2] = operand[0]; + if (used_result_components & 0b1010) { + spv::Id x_is_neg = builder_->createBinOp( + spv::OpFOrdLessThan, type_bool_, operand[0], const_float_0_); + if (used_result_components & 0b0010) { + // sc = x < 0.0 ? z : -z + ma_x_result[1] = builder_->createTriOp( + spv::OpSelect, type_float_, x_is_neg, operand[2], operand_neg[2]); + } + if (used_result_components & 0b1000) { + // id = x < 0.0 ? 1.0 : 0.0 + ma_x_result[3] = + builder_->createTriOp(spv::OpSelect, type_float_, x_is_neg, + const_float_1_, const_float_0_); + } + } + builder_->createBranch(&ma_yx_merge_block); + + builder_->setBuildPoint(&ma_yx_merge_block); + // The major axis is Y or X - choose the options of the result from Y and + // X. + spv::Id ma_yx_result[4] = {}; + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_result_components & (1 << i))) { + continue; + } + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + phi_op->addIdOperand(ma_y_result[i]); + phi_op->addIdOperand(ma_y_block.getId()); + phi_op->addIdOperand(ma_x_result[i]); + phi_op->addIdOperand(ma_x_block.getId()); + ma_yx_result[i] = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + builder_->createBranch(ma_merge_block); + + function.addBlock(ma_merge_block); + builder_->setBuildPoint(ma_merge_block); + // Choose the result options from Z and YX cases. + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_result_component_count); + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_result_components & (1 << i))) { + continue; + } + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + phi_op->addIdOperand(ma_z_result[i]); + phi_op->addIdOperand(ma_z_block.getId()); + phi_op->addIdOperand(ma_yx_result[i]); + phi_op->addIdOperand(ma_yx_merge_block.getId()); + id_vector_temp_.push_back(phi_op->getResultId()); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + assert_true(id_vector_temp_.size() == used_result_component_count); + if (used_result_components & 0b0100) { + // Multiply the major axis by 2. + spv::Id& ma2 = id_vector_temp_[xe::bit_count(used_result_components & + ((1 << 2) - 1))]; + ma2 = builder_->createBinOp(spv::OpFMul, type_float_, + builder_->makeFloatConstant(2.0f), ma2); + builder_->addDecoration(ma2, spv::DecorationNoContraction); + } + if (used_result_component_count == 1) { + // Only one component - not composite. + return id_vector_temp_[0]; + } + return builder_->createCompositeConstruct( + type_float_vectors_[used_result_component_count - 1], + id_vector_temp_); + } + + case ucode::AluVectorOpcode::kMax4: { + // Find max of all different components of the first operand. + // FIXME(Triang3l): Not caring about NaN because no info about the + // correct order, just using NMax here, which replaces them with the + // non-NaN component (however, there's one nice thing about it is that it + // may be compiled into max3 + max on GCN). + uint32_t components_remaining = 0b0000; + for (uint32_t i = 0; i < 4; ++i) { + SwizzleSource swizzle_source = instr.vector_operands[0].GetComponent(i); + assert_true(swizzle_source >= SwizzleSource::kX && + swizzle_source <= SwizzleSource::kW); + components_remaining |= + 1 << (uint32_t(swizzle_source) - uint32_t(SwizzleSource::kX)); + } + assert_not_zero(components_remaining); + spv::Id operand = + ApplyOperandModifiers(operand_storage[0], instr.vector_operands[0]); + uint32_t component; + xe::bit_scan_forward(components_remaining, &component); + components_remaining &= ~(uint32_t(1) << component); + spv::Id result = builder_->createCompositeExtract( + operand, type_float_, static_cast(component)); + while (xe::bit_scan_forward(components_remaining, &component)) { + components_remaining &= ~(uint32_t(1) << component); + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(result); + id_vector_temp_.push_back(builder_->createCompositeExtract( + operand, type_float_, static_cast(component))); + result = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NMax, id_vector_temp_); + } + return result; + } + + case ucode::AluVectorOpcode::kSetpEqPush: + case ucode::AluVectorOpcode::kSetpNePush: + case ucode::AluVectorOpcode::kSetpGtPush: + case ucode::AluVectorOpcode::kSetpGePush: { + // X is only needed for the result, W is needed for the predicate. + spv::Id operands[2]; + spv::Id operands_w[2]; + for (uint32_t i = 0; i < 2; ++i) { + operands[i] = + GetOperandComponents(operand_storage[i], instr.vector_operands[i], + used_result_components ? 0b1001 : 0b1000); + if (used_result_components) { + operands_w[i] = + builder_->createCompositeExtract(operands[i], type_float_, 1); + } else { + operands_w[i] = operands[i]; + } + } + spv::Op op = spv::Op(kOps[size_t(instr.vector_opcode)]); + // p0 = src0.w == 0.0 && src1.w op 0.0 + builder_->createStore( + builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, operands_w[0], + const_float_0_), + builder_->createBinOp(op, type_bool_, operands_w[1], + const_float_0_)), + var_main_predicate_); + predicate_written = true; + if (!used_result_components) { + return spv::NoResult; + } + // result = (src0.x == 0.0 && src1.x op 0.0) ? 0.0 : src0.x + 1.0 + // Or: + // result = ((src0.x == 0.0 && src1.x op 0.0) ? -1.0 : src0.x) + 1.0 + spv::Id operands_x[2]; + for (uint32_t i = 0; i < 2; ++i) { + operands_x[i] = + builder_->createCompositeExtract(operands[i], type_float_, 0); + } + spv::Id condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, operands_x[0], + const_float_0_), + builder_->createBinOp(op, type_bool_, operands_x[1], const_float_0_)); + spv::Id result = builder_->createBinOp( + spv::OpFAdd, type_float_, + builder_->createTriOp(spv::OpSelect, type_float_, condition, + builder_->makeFloatConstant(-1.0f), + operands_x[0]), + const_float_1_); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + + case ucode::AluVectorOpcode::kKillEq: + case ucode::AluVectorOpcode::kKillGt: + case ucode::AluVectorOpcode::kKillGe: + case ucode::AluVectorOpcode::kKillNe: { + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id condition = builder_->createUnaryOp( + spv::OpAny, type_bool_, + builder_->createBinOp( + spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_, + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + 0b1111), + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + 0b1111))); + spv::Block& kill_block = builder_->makeNewBlock(); + spv::Block& merge_block = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(merge_block.getId()); + builder_->createConditionalBranch(condition, &kill_block, &merge_block); + builder_->setBuildPoint(&kill_block); + // TODO(Triang3l): Demote to helper invocation to keep derivatives if + // needed (and return 1 if killed in this case). + builder_->createNoResultOp(spv::OpKill); + builder_->setBuildPoint(&merge_block); + return const_float_0_; + } + + case ucode::AluVectorOpcode::kDst: { + spv::Id operands[2] = {}; + if (used_result_components & 0b0110) { + // result.yz is needed: [0] = y, [1] = z. + // resuly.y is needed: scalar = y. + // resuly.z is needed: scalar = z. + operands[0] = + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + used_result_components & 0b0110); + } + if (used_result_components & 0b1010) { + // result.yw is needed: [0] = y, [1] = w. + // resuly.y is needed: scalar = y. + // resuly.w is needed: scalar = w. + operands[1] = + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + used_result_components & 0b1010); + } + // y = src0.y * src1.y + spv::Id result_y = spv::NoResult; + if (used_result_components & 0b0010) { + spv::Id operands_y[2]; + operands_y[0] = + (used_result_components & 0b0100) + ? builder_->createCompositeExtract(operands[0], type_float_, 0) + : operands[0]; + operands_y[1] = + (used_result_components & 0b1000) + ? builder_->createCompositeExtract(operands[1], type_float_, 0) + : operands[1]; + result_y = builder_->createBinOp(spv::OpFMul, type_float_, + operands_y[0], operands_y[1]); + builder_->addDecoration(result_y, spv::DecorationNoContraction); + if (!(instr.vector_operands[0].GetIdenticalComponents( + instr.vector_operands[1]) & + 0b0010)) { + // Shader Model 3: +0 or denormal * anything = +-0. + result_y = ZeroIfAnyOperandIsZero( + result_y, + GetAbsoluteOperand(operands_y[0], instr.vector_operands[0]), + GetAbsoluteOperand(operands_y[1], instr.vector_operands[1])); + } + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_result_component_count); + if (used_result_components & 0b0001) { + // x = 1.0 + id_vector_temp_.push_back(const_float_1_); + } + if (used_result_components & 0b0010) { + // y = src0.y * src1.y + id_vector_temp_.push_back(result_y); + } + if (used_result_components & 0b0100) { + // z = src0.z + id_vector_temp_.push_back( + (used_result_components & 0b0010) + ? builder_->createCompositeExtract(operands[0], type_float_, 1) + : operands[0]); + } + if (used_result_components & 0b1000) { + // w = src1.w + id_vector_temp_.push_back( + (used_result_components & 0b0010) + ? builder_->createCompositeExtract(operands[1], type_float_, 1) + : operands[1]); + } + assert_true(id_vector_temp_.size() == used_result_component_count); + if (used_result_component_count == 1) { + // Only one component - not composite. + return id_vector_temp_[0]; + } + return builder_->createCompositeConstruct( + type_float_vectors_[used_result_component_count - 1], + id_vector_temp_); + } + } + + assert_unhandled_case(instr.vector_opcode); + EmitTranslationError("Unknown ALU vector operation"); + return spv::NoResult; +} + +spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( + const ParsedAluInstruction& instr, bool& predicate_written) { + predicate_written = false; + + spv::Id operand_storage[2] = {}; + for (uint32_t i = 0; i < instr.scalar_operand_count; ++i) { + operand_storage[i] = LoadOperandStorage(instr.scalar_operands[i]); + } + + // In case the paired vector instruction (if processed first) terminates the + // block (like via OpKill). + EnsureBuildPointAvailable(); + + // Lookup table for variants of instructions with similar structure. + static const unsigned int kOps[] = { + static_cast(spv::OpFAdd), // kAdds + static_cast(spv::OpFAdd), // kAddsPrev + static_cast(spv::OpNop), // kMuls + static_cast(spv::OpNop), // kMulsPrev + static_cast(spv::OpNop), // kMulsPrev2 + static_cast(spv::OpFOrdGreaterThanEqual), // kMaxs + static_cast(spv::OpFOrdLessThan), // kMins + static_cast(spv::OpFOrdEqual), // kSeqs + static_cast(spv::OpFOrdGreaterThan), // kSgts + static_cast(spv::OpFOrdGreaterThanEqual), // kSges + static_cast(spv::OpFUnordNotEqual), // kSnes + static_cast(GLSLstd450Fract), // kFrcs + static_cast(GLSLstd450Trunc), // kTruncs + static_cast(GLSLstd450Floor), // kFloors + static_cast(GLSLstd450Exp2), // kExp + static_cast(spv::OpNop), // kLogc + static_cast(GLSLstd450Log2), // kLog + static_cast(spv::OpNop), // kRcpc + static_cast(spv::OpNop), // kRcpf + static_cast(spv::OpNop), // kRcp + static_cast(spv::OpNop), // kRsqc + static_cast(spv::OpNop), // kRsqf + static_cast(GLSLstd450InverseSqrt), // kRsq + static_cast(spv::OpFOrdGreaterThanEqual), // kMaxAs + static_cast(spv::OpFOrdGreaterThanEqual), // kMaxAsf + static_cast(spv::OpFSub), // kSubs + static_cast(spv::OpFSub), // kSubsPrev + static_cast(spv::OpFOrdEqual), // kSetpEq + static_cast(spv::OpFUnordNotEqual), // kSetpNe + static_cast(spv::OpFOrdGreaterThan), // kSetpGt + static_cast(spv::OpFOrdGreaterThanEqual), // kSetpGe + static_cast(spv::OpNop), // kSetpInv + static_cast(spv::OpNop), // kSetpPop + static_cast(spv::OpNop), // kSetpClr + static_cast(spv::OpNop), // kSetpRstr + static_cast(spv::OpFOrdEqual), // kKillsEq + static_cast(spv::OpFOrdGreaterThan), // kKillsGt + static_cast(spv::OpFOrdGreaterThanEqual), // kKillsGe + static_cast(spv::OpFUnordNotEqual), // kKillsNe + static_cast(spv::OpFOrdEqual), // kKillsOne + static_cast(GLSLstd450Sqrt), // kSqrt + static_cast(spv::OpNop), // Invalid + static_cast(spv::OpNop), // kMulsc0 + static_cast(spv::OpNop), // kMulsc1 + static_cast(spv::OpFAdd), // kAddsc0 + static_cast(spv::OpFAdd), // kAddsc1 + static_cast(spv::OpFSub), // kSubsc0 + static_cast(spv::OpFSub), // kSubsc1 + static_cast(GLSLstd450Sin), // kSin + static_cast(GLSLstd450Cos), // kCos + static_cast(spv::OpNop), // kRetainPrev + }; + + switch (instr.scalar_opcode) { + case ucode::AluScalarOpcode::kAdds: + case ucode::AluScalarOpcode::kSubs: { + spv::Id a, b; + GetOperandScalarXY(operand_storage[0], instr.scalar_operands[0], a, b); + spv::Id result = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_float_, a, b); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + case ucode::AluScalarOpcode::kAddsPrev: + case ucode::AluScalarOpcode::kSubsPrev: { + spv::Id result = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_float_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + case ucode::AluScalarOpcode::kMuls: { + spv::Id a, b; + GetOperandScalarXY(operand_storage[0], instr.scalar_operands[0], a, b); + spv::Id result = builder_->createBinOp(spv::OpFMul, type_float_, a, b); + builder_->addDecoration(result, spv::DecorationNoContraction); + if (a != b) { + // Shader Model 3: +0 or denormal * anything = +-0. + result = ZeroIfAnyOperandIsZero( + result, GetAbsoluteOperand(a, instr.scalar_operands[0]), + GetAbsoluteOperand(b, instr.scalar_operands[0])); + } + return result; + } + case ucode::AluScalarOpcode::kMulsPrev: { + spv::Id a = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001); + spv::Id ps = + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); + spv::Id result = builder_->createBinOp(spv::OpFMul, type_float_, a, ps); + builder_->addDecoration(result, spv::DecorationNoContraction); + // Shader Model 3: +0 or denormal * anything = +-0. + id_vector_temp_.clear(); + id_vector_temp_.push_back(ps); + return ZeroIfAnyOperandIsZero( + result, GetAbsoluteOperand(a, instr.scalar_operands[0]), + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_)); + } + case ucode::AluScalarOpcode::kMulsPrev2: { + // Check if need to select the src0.a * ps case. + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id ps = + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); + // ps != -FLT_MAX. + spv::Id const_float_max_neg = builder_->makeFloatConstant(-FLT_MAX); + spv::Id condition = builder_->createBinOp( + spv::OpFUnordNotEqual, type_bool_, ps, const_float_max_neg); + // isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since -FLT_MAX + // is already loaded to an SGPR, this is also false if it's NaN. + id_vector_temp_.clear(); + id_vector_temp_.push_back(ps); + spv::Id ps_abs = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_); + spv::Id ps_abs_neg = + builder_->createUnaryOp(spv::OpFNegate, type_float_, ps_abs); + builder_->addDecoration(ps_abs_neg, spv::DecorationNoContraction); + condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, condition, + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + ps_abs_neg, const_float_max_neg)); + // isfinite(src0.b), or -|src0.b| >= -FLT_MAX for the same reason. + spv::Id b = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0010); + spv::Id b_abs_neg = b; + if (!instr.scalar_operands[0].is_absolute_value) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(b_abs_neg); + b_abs_neg = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_); + } + if (!instr.scalar_operands[0].is_absolute_value || + !instr.scalar_operands[0].is_negated) { + b_abs_neg = + builder_->createUnaryOp(spv::OpFNegate, type_float_, b_abs_neg); + builder_->addDecoration(b_abs_neg, spv::DecorationNoContraction); + } + condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, condition, + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + b_abs_neg, const_float_max_neg)); + // src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked + // for NaN). + condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, condition, + builder_->createBinOp(spv::OpFOrdGreaterThan, type_bool_, b, + const_float_0_)); + spv::Block& multiply_block = builder_->makeNewBlock(); + spv::Block& merge_block = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(merge_block.getId()); + { + std::unique_ptr branch_conditional_op = + std::make_unique(spv::OpBranchConditional); + branch_conditional_op->addIdOperand(condition); + branch_conditional_op->addIdOperand(multiply_block.getId()); + branch_conditional_op->addIdOperand(merge_block.getId()); + // More likely to multiply that to return -FLT_MAX. + branch_conditional_op->addImmediateOperand(2); + branch_conditional_op->addImmediateOperand(1); + builder_->getBuildPoint()->addInstruction( + std::move(branch_conditional_op)); + } + spv::Block& head_block = *builder_->getBuildPoint(); + multiply_block.addPredecessor(&head_block); + merge_block.addPredecessor(&head_block); + // Multiplication case. + builder_->setBuildPoint(&multiply_block); + spv::Id a = instr.scalar_operands[0].GetComponent(0) != + instr.scalar_operands[0].GetComponent(1) + ? GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001) + : b; + spv::Id product = builder_->createBinOp(spv::OpFMul, type_float_, a, ps); + builder_->addDecoration(product, spv::DecorationNoContraction); + // Shader Model 3: +0 or denormal * anything = +-0. + product = ZeroIfAnyOperandIsZero( + product, GetAbsoluteOperand(a, instr.scalar_operands[0]), ps_abs); + builder_->createBranch(&merge_block); + // Merge case - choose between the product and -FLT_MAX. + builder_->setBuildPoint(&merge_block); + { + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + phi_op->addIdOperand(product); + phi_op->addIdOperand(multiply_block.getId()); + phi_op->addIdOperand(const_float_max_neg); + phi_op->addIdOperand(head_block.getId()); + spv::Id phi_result = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + return phi_result; + } + } + + case ucode::AluScalarOpcode::kMaxs: + case ucode::AluScalarOpcode::kMins: + case ucode::AluScalarOpcode::kMaxAs: + case ucode::AluScalarOpcode::kMaxAsf: { + spv::Id a, b; + GetOperandScalarXY(operand_storage[0], instr.scalar_operands[0], a, b); + if (instr.scalar_opcode == ucode::AluScalarOpcode::kMaxAs || + instr.scalar_opcode == ucode::AluScalarOpcode::kMaxAsf) { + // maxas: a0 = (int)clamp(floor(src0.a + 0.5), -256.0, 255.0) + // maxasf: a0 = (int)clamp(floor(src0.a), -256.0, 255.0) + spv::Id maxa_address; + if (instr.scalar_opcode == ucode::AluScalarOpcode::kMaxAs) { + maxa_address = builder_->createBinOp( + spv::OpFAdd, type_float_, a, builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(maxa_address, spv::DecorationNoContraction); + } else { + maxa_address = a; + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(maxa_address); + maxa_address = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(maxa_address); + id_vector_temp_.push_back(builder_->makeFloatConstant(-256.0f)); + id_vector_temp_.push_back(builder_->makeFloatConstant(255.0f)); + builder_->createStore( + builder_->createUnaryOp( + spv::OpConvertFToS, type_int_, + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_)), + var_main_address_register_); + } + if (a == b) { + // max is commonly used as mov. + return a; + } + // Shader Model 3 NaN behavior (a op b ? a : b, not SPIR-V FMax/FMin which + // are undefined for NaN or NMax/NMin which return the non-NaN operand). + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::Op(kOps[size_t(instr.scalar_opcode)]), + type_bool_, a, b), + a, b); + } + + case ucode::AluScalarOpcode::kSeqs: + case ucode::AluScalarOpcode::kSgts: + case ucode::AluScalarOpcode::kSges: + case ucode::AluScalarOpcode::kSnes: + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + const_float_0_), + const_float_1_, const_float_0_); + + case ucode::AluScalarOpcode::kFrcs: + case ucode::AluScalarOpcode::kTruncs: + case ucode::AluScalarOpcode::kFloors: + case ucode::AluScalarOpcode::kExp: + case ucode::AluScalarOpcode::kLog: + case ucode::AluScalarOpcode::kRsq: + case ucode::AluScalarOpcode::kSqrt: + case ucode::AluScalarOpcode::kSin: + case ucode::AluScalarOpcode::kCos: + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001)); + return builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, + GLSLstd450(kOps[size_t(instr.scalar_opcode)]), id_vector_temp_); + case ucode::AluScalarOpcode::kLogc: { + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001)); + spv::Id result = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Log2, id_vector_temp_); + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeFloatConstant(-FLT_MAX), result); + } + case ucode::AluScalarOpcode::kRcpc: { + spv::Id result = builder_->createBinOp( + spv::OpFDiv, type_float_, const_float_1_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + result = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeFloatConstant(-FLT_MAX), result); + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(INFINITY)), + builder_->makeFloatConstant(FLT_MAX), result); + } + case ucode::AluScalarOpcode::kRcpf: { + spv::Id result = builder_->createBinOp( + spv::OpFDiv, type_float_, const_float_1_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + result = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(INFINITY)), + const_float_0_, result); + // Can't create -0.0f with makeFloatConstant due to float comparison + // internally, cast to bit pattern. + result = builder_->createTriOp( + spv::OpSelect, type_uint_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeUintConstant(uint32_t(INT32_MIN)), + builder_->createUnaryOp(spv::OpBitcast, type_uint_, result)); + return builder_->createUnaryOp(spv::OpBitcast, type_float_, result); + } + case ucode::AluScalarOpcode::kRcp: { + spv::Id result = builder_->createBinOp( + spv::OpFDiv, type_float_, const_float_1_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + case ucode::AluScalarOpcode::kRsqc: { + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001)); + spv::Id result = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450InverseSqrt, id_vector_temp_); + result = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeFloatConstant(-FLT_MAX), result); + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(INFINITY)), + builder_->makeFloatConstant(FLT_MAX), result); + } + case ucode::AluScalarOpcode::kRsqf: { + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001)); + spv::Id result = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450InverseSqrt, id_vector_temp_); + result = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(INFINITY)), + const_float_0_, result); + // Can't create -0.0f with makeFloatConstant due to float comparison + // internally, cast to bit pattern. + result = builder_->createTriOp( + spv::OpSelect, type_uint_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeUintConstant(uint32_t(INT32_MIN)), + builder_->createUnaryOp(spv::OpBitcast, type_uint_, result)); + return builder_->createUnaryOp(spv::OpBitcast, type_float_, result); + } + + case ucode::AluScalarOpcode::kSetpEq: + case ucode::AluScalarOpcode::kSetpNe: + case ucode::AluScalarOpcode::kSetpGt: + case ucode::AluScalarOpcode::kSetpGe: { + spv::Id predicate = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + const_float_0_); + builder_->createStore(predicate, var_main_predicate_); + predicate_written = true; + return builder_->createTriOp(spv::OpSelect, type_float_, predicate, + const_float_0_, const_float_1_); + } + case ucode::AluScalarOpcode::kSetpInv: { + spv::Id a = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001); + spv::Id predicate = builder_->createBinOp(spv::OpFOrdEqual, type_bool_, a, + const_float_1_); + builder_->createStore(predicate, var_main_predicate_); + predicate_written = true; + return builder_->createTriOp( + spv::OpSelect, type_float_, predicate, const_float_0_, + builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, a, + const_float_0_), + const_float_1_, a)); + } + case ucode::AluScalarOpcode::kSetpPop: { + spv::Id a_minus_1 = builder_->createBinOp( + spv::OpFSub, type_float_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + const_float_1_); + builder_->addDecoration(a_minus_1, spv::DecorationNoContraction); + spv::Id predicate = builder_->createBinOp( + spv::OpFOrdLessThanEqual, type_bool_, a_minus_1, const_float_0_); + builder_->createStore(predicate, var_main_predicate_); + predicate_written = true; + return builder_->createTriOp(spv::OpSelect, type_float_, predicate, + const_float_0_, a_minus_1); + } + case ucode::AluScalarOpcode::kSetpClr: + builder_->createStore(builder_->makeBoolConstant(false), + var_main_predicate_); + return builder_->makeFloatConstant(FLT_MAX); + case ucode::AluScalarOpcode::kSetpRstr: { + spv::Id a = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001); + spv::Id predicate = builder_->createBinOp(spv::OpFOrdEqual, type_bool_, a, + const_float_0_); + builder_->createStore(predicate, var_main_predicate_); + predicate_written = true; + return builder_->createTriOp(spv::OpSelect, type_float_, predicate, + const_float_0_, a); + } + + case ucode::AluScalarOpcode::kKillsEq: + case ucode::AluScalarOpcode::kKillsGt: + case ucode::AluScalarOpcode::kKillsGe: + case ucode::AluScalarOpcode::kKillsNe: + case ucode::AluScalarOpcode::kKillsOne: { + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id condition = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne + ? const_float_1_ + : const_float_0_); + spv::Block& kill_block = builder_->makeNewBlock(); + spv::Block& merge_block = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(merge_block.getId()); + builder_->createConditionalBranch(condition, &kill_block, &merge_block); + builder_->setBuildPoint(&kill_block); + // TODO(Triang3l): Demote to helper invocation to keep derivatives if + // needed (and return 1 if killed in this case). + builder_->createNoResultOp(spv::OpKill); + builder_->setBuildPoint(&merge_block); + return const_float_0_; + } + + case ucode::AluScalarOpcode::kMulsc0: + case ucode::AluScalarOpcode::kMulsc1: { + spv::Id operand_0 = GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001); + spv::Id operand_1 = GetOperandComponents( + operand_storage[1], instr.scalar_operands[1], 0b0001); + spv::Id result = + builder_->createBinOp(spv::OpFMul, type_float_, operand_0, operand_1); + builder_->addDecoration(result, spv::DecorationNoContraction); + if (!(instr.scalar_operands[0].GetIdenticalComponents( + instr.scalar_operands[1]) & + 0b0001)) { + // Shader Model 3: +0 or denormal * anything = +-0. + result = ZeroIfAnyOperandIsZero( + result, GetAbsoluteOperand(operand_0, instr.scalar_operands[0]), + GetAbsoluteOperand(operand_1, instr.scalar_operands[1])); + } + return result; + } + case ucode::AluScalarOpcode::kAddsc0: + case ucode::AluScalarOpcode::kAddsc1: + case ucode::AluScalarOpcode::kSubsc0: + case ucode::AluScalarOpcode::kSubsc1: { + spv::Id result = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_float_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + GetOperandComponents(operand_storage[1], instr.scalar_operands[1], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + + case ucode::AluScalarOpcode::kRetainPrev: + // Special case in ProcessAluInstruction - loading ps only if writing to + // anywhere. + return spv::NoResult; + } + + assert_unhandled_case(instr.scalar_opcode); + EmitTranslationError("Unknown ALU scalar operation"); + return spv::NoResult; +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc new file mode 100644 index 000000000..f9bf7c564 --- /dev/null +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -0,0 +1,2696 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader_translator.h" + +#include +#include +#include +#include +#include + +#include "third_party/fmt/include/fmt/format.h" +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/assert.h" +#include "xenia/base/math.h" + +namespace xe { +namespace gpu { + +void SpirvShaderTranslator::ProcessVertexFetchInstruction( + const ParsedVertexFetchInstruction& instr) { + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); + + uint32_t used_result_components = instr.result.GetUsedResultComponents(); + uint32_t needed_words = xenos::GetVertexFormatNeededWords( + instr.attributes.data_format, used_result_components); + // If this is vfetch_full, the address may still be needed for vfetch_mini - + // don't exit before calculating the address. + if (!needed_words && instr.is_mini_fetch) { + // Nothing to load - just constant 0/1 writes, or the swizzle includes only + // components that don't exist in the format (writing zero instead of them). + // Unpacking assumes at least some word is needed. + StoreResult(instr.result, spv::NoResult); + return; + } + + EnsureBuildPointAvailable(); + + uint32_t fetch_constant_word_0_index = instr.operands[1].storage_index << 1; + + spv::Id address; + if (instr.is_mini_fetch) { + // `base + index * stride` loaded by vfetch_full. + address = builder_->createLoad(var_main_vfetch_address_, spv::NoPrecision); + } else { + // Get the base address in dwords from the bits 2:31 of the first fetch + // constant word. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // The only element of the fetch constant buffer. + id_vector_temp_.push_back(const_int_0_); + // Vector index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_0_index >> 2))); + // Component index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_0_index & 3))); + spv::Id fetch_constant_word_0 = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + // TODO(Triang3l): Verify the fetch constant type (that it's a vertex fetch, + // not a texture fetch) here instead of dropping draws with invalid vertex + // fetch constants on the CPU when proper bound checks are added - vfetch + // may be conditional, so fetch constants may also be used conditionally. + address = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, + fetch_constant_word_0, + builder_->makeUintConstant(2))); + if (instr.attributes.stride) { + // Convert the index to an integer by flooring or by rounding to the + // nearest (as floor(index + 0.5) because rounding to the nearest even + // makes no sense for addressing, both 1.5 and 2.5 would be 2). + spv::Id index = GetOperandComponents( + LoadOperandStorage(instr.operands[0]), instr.operands[0], 0b0001); + if (instr.attributes.is_index_rounded) { + index = builder_->createBinOp(spv::OpFAdd, type_float_, index, + builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(index, spv::DecorationNoContraction); + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(index); + index = builder_->createUnaryOp( + spv::OpConvertFToS, type_int_, + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Floor, id_vector_temp_)); + if (instr.attributes.stride > 1) { + index = builder_->createBinOp( + spv::OpIMul, type_int_, index, + builder_->makeIntConstant(int(instr.attributes.stride))); + } + address = builder_->createBinOp(spv::OpIAdd, type_int_, address, index); + } + // Store the address for the subsequent vfetch_mini. + builder_->createStore(address, var_main_vfetch_address_); + } + + if (!needed_words) { + // The vfetch_full address has been loaded for the subsequent vfetch_mini, + // but there's no data to load. + StoreResult(instr.result, spv::NoResult); + return; + } + + // Load the needed words. + unsigned int word_composite_indices[4] = {}; + spv::Id word_composite_constituents[4]; + uint32_t word_count = 0; + uint32_t words_remaining = needed_words; + uint32_t word_index; + while (xe::bit_scan_forward(words_remaining, &word_index)) { + words_remaining &= ~(1 << word_index); + spv::Id word_address = address; + // Add the word offset from the instruction (signed), plus the offset of the + // word within the element. + int32_t word_offset = instr.attributes.offset + word_index; + if (word_offset) { + word_address = + builder_->createBinOp(spv::OpIAdd, type_int_, word_address, + builder_->makeIntConstant(int(word_offset))); + } + word_composite_indices[word_index] = word_count; + // FIXME(Triang3l): Bound checking is not done here, but haven't encountered + // any games relying on out-of-bounds access. On Adreno 200 on Android (LG + // P705), however, words (not full elements) out of glBufferData bounds + // contain 0. + word_composite_constituents[word_count++] = + LoadUint32FromSharedMemory(word_address); + } + spv::Id words; + if (word_count > 1) { + // Copying from the array to id_vector_temp_ now, not in the loop above, + // because of the LoadUint32FromSharedMemory call (potentially using + // id_vector_temp_ internally). + id_vector_temp_.clear(); + id_vector_temp_.reserve(word_count); + id_vector_temp_.insert(id_vector_temp_.cend(), word_composite_constituents, + word_composite_constituents + word_count); + words = builder_->createCompositeConstruct( + type_uint_vectors_[word_count - 1], id_vector_temp_); + } else { + words = word_composite_constituents[0]; + } + + // Endian swap the words, getting the endianness from bits 0:1 of the second + // fetch constant word. + uint32_t fetch_constant_word_1_index = fetch_constant_word_0_index + 1; + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // The only element of the fetch constant buffer. + id_vector_temp_.push_back(const_int_0_); + // Vector index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_1_index >> 2))); + // Component index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_1_index & 3))); + spv::Id fetch_constant_word_1 = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + words = EndianSwap32Uint( + words, builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + fetch_constant_word_1, + builder_->makeUintConstant(0b11))); + + spv::Id result = spv::NoResult; + + // Convert the format. + uint32_t used_format_components = + used_result_components & ((1 << xenos::GetVertexFormatComponentCount( + instr.attributes.data_format)) - + 1); + // If needed_words is not zero (checked in the beginning), this must not be + // zero too. For simplicity, it's assumed that something will be unpacked + // here. + assert_not_zero(used_format_components); + uint32_t used_format_component_count = xe::bit_count(used_format_components); + spv::Id result_type = type_float_vectors_[used_format_component_count - 1]; + bool format_is_packed = false; + int packed_widths[4] = {}, packed_offsets[4] = {}; + uint32_t packed_words[4] = {}; + switch (instr.attributes.data_format) { + case xenos::VertexFormat::k_8_8_8_8: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = packed_widths[2] = + packed_widths[3] = 8; + packed_offsets[1] = 8; + packed_offsets[2] = 16; + packed_offsets[3] = 24; + break; + case xenos::VertexFormat::k_2_10_10_10: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = packed_widths[2] = 10; + packed_widths[3] = 2; + packed_offsets[1] = 10; + packed_offsets[2] = 20; + packed_offsets[3] = 30; + break; + case xenos::VertexFormat::k_10_11_11: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = 11; + packed_widths[2] = 10; + packed_offsets[1] = 11; + packed_offsets[2] = 22; + break; + case xenos::VertexFormat::k_11_11_10: + format_is_packed = true; + packed_widths[0] = 10; + packed_widths[1] = packed_widths[2] = 11; + packed_offsets[1] = 10; + packed_offsets[2] = 21; + break; + case xenos::VertexFormat::k_16_16: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = 16; + packed_offsets[1] = 16; + break; + case xenos::VertexFormat::k_16_16_16_16: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = packed_widths[2] = + packed_widths[3] = 16; + packed_offsets[1] = packed_offsets[3] = 16; + packed_words[2] = packed_words[3] = 1; + break; + + case xenos::VertexFormat::k_16_16_FLOAT: + case xenos::VertexFormat::k_16_16_16_16_FLOAT: { + // FIXME(Triang3l): This converts from GLSL float16 with NaNs instead of + // Xbox 360 float16 with extended range. However, haven't encountered + // games relying on that yet. + spv::Id word_needed_component_values[2] = {}; + for (uint32_t i = 0; i < 2; ++i) { + uint32_t word_needed_components = + (used_format_components >> (i * 2)) & 0b11; + if (!word_needed_components) { + continue; + } + spv::Id word; + if (word_count > 1) { + word = builder_->createCompositeExtract(words, type_uint_, + word_composite_indices[i]); + } else { + word = words; + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(word); + word = builder_->createBuiltinCall(type_float2_, ext_inst_glsl_std_450_, + GLSLstd450UnpackHalf2x16, + id_vector_temp_); + if (word_needed_components != 0b11) { + // If only one of two components is needed, extract it. + word = builder_->createCompositeExtract( + word, type_float_, (word_needed_components & 0b01) ? 0 : 1); + } + word_needed_component_values[i] = word; + } + if (word_needed_component_values[1] == spv::NoResult) { + result = word_needed_component_values[0]; + } else if (word_needed_component_values[0] == spv::NoResult) { + result = word_needed_component_values[1]; + } else { + // Bypassing the assertion in spv::Builder::createCompositeConstruct as + // of November 5, 2020 - can construct vectors by concatenating vectors, + // not just from individual scalars. + std::unique_ptr composite_construct_op = + std::make_unique(builder_->getUniqueId(), + result_type, + spv::OpCompositeConstruct); + composite_construct_op->addIdOperand(word_needed_component_values[0]); + composite_construct_op->addIdOperand(word_needed_component_values[1]); + result = composite_construct_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(composite_construct_op)); + } + } break; + + case xenos::VertexFormat::k_32: + case xenos::VertexFormat::k_32_32: + case xenos::VertexFormat::k_32_32_32_32: + assert_true(used_format_components == needed_words); + if (instr.attributes.is_signed) { + result = builder_->createUnaryOp( + spv::OpBitcast, type_int_vectors_[used_format_component_count - 1], + words); + result = + builder_->createUnaryOp(spv::OpConvertSToF, result_type, result); + } else { + result = + builder_->createUnaryOp(spv::OpConvertUToF, result_type, words); + } + if (!instr.attributes.is_integer) { + if (instr.attributes.is_signed) { + switch (instr.attributes.signed_rf_mode) { + case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne: + result = builder_->createBinOp( + spv::OpVectorTimesScalar, result_type, result, + builder_->makeFloatConstant(1.0f / 2147483647.0f)); + builder_->addDecoration(result, spv::DecorationNoContraction); + // No need to clamp to -1 if signed - 1/(2^31-1) is rounded to + // 1/(2^31) as float32. + break; + case xenos::SignedRepeatingFractionMode::kNoZero: { + result = builder_->createBinOp( + spv::OpVectorTimesScalar, result_type, result, + builder_->makeFloatConstant(1.0f / 2147483647.5f)); + builder_->addDecoration(result, spv::DecorationNoContraction); + spv::Id const_no_zero = + builder_->makeFloatConstant(0.5f / 2147483647.5f); + if (used_format_component_count > 1) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + id_vector_temp_.insert(id_vector_temp_.cend(), + used_format_component_count, + const_no_zero); + const_no_zero = builder_->makeCompositeConstant( + result_type, id_vector_temp_); + } + result = builder_->createBinOp(spv::OpFAdd, result_type, result, + const_no_zero); + builder_->addDecoration(result, spv::DecorationNoContraction); + } break; + default: + assert_unhandled_case(instr.attributes.signed_rf_mode); + } + } else { + result = builder_->createBinOp( + spv::OpVectorTimesScalar, result_type, result, + builder_->makeFloatConstant(1.0f / 4294967295.0f)); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + } + break; + + case xenos::VertexFormat::k_32_FLOAT: + case xenos::VertexFormat::k_32_32_FLOAT: + case xenos::VertexFormat::k_32_32_32_32_FLOAT: + case xenos::VertexFormat::k_32_32_32_FLOAT: + assert_true(used_format_components == needed_words); + result = builder_->createUnaryOp( + spv::OpBitcast, type_float_vectors_[word_count - 1], words); + break; + + default: + assert_unhandled_case(instr.attributes.data_format); + } + + if (format_is_packed) { + assert_true(result == spv::NoResult); + // Extract the components from the words as individual ints or uints. + if (instr.attributes.is_signed) { + // Sign-extending extraction - in GLSL the sign-extending overload accepts + // int. + words = builder_->createUnaryOp(spv::OpBitcast, + type_int_vectors_[word_count - 1], words); + } + int extracted_widths[4] = {}; + spv::Id extracted_components[4] = {}; + uint32_t extracted_component_count = 0; + unsigned int extraction_word_current_index = UINT_MAX; + // Default is `words` itself if 1 word loaded. + spv::Id extraction_word_current = words; + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_format_components & (1 << i))) { + continue; + } + if (word_count > 1) { + unsigned int extraction_word_new_index = + word_composite_indices[packed_words[i]]; + if (extraction_word_current_index != extraction_word_new_index) { + extraction_word_current_index = extraction_word_new_index; + extraction_word_current = builder_->createCompositeExtract( + words, instr.attributes.is_signed ? type_int_ : type_uint_, + extraction_word_new_index); + } + } + int extraction_width = packed_widths[i]; + assert_not_zero(extraction_width); + extracted_widths[extracted_component_count] = extraction_width; + extracted_components[extracted_component_count] = builder_->createTriOp( + instr.attributes.is_signed ? spv::OpBitFieldSExtract + : spv::OpBitFieldUExtract, + instr.attributes.is_signed ? type_int_ : type_uint_, + extraction_word_current, builder_->makeIntConstant(packed_offsets[i]), + builder_->makeIntConstant(extraction_width)); + ++extracted_component_count; + } + // Combine extracted components into a vector. + assert_true(extracted_component_count == used_format_component_count); + if (used_format_component_count > 1) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + id_vector_temp_.insert( + id_vector_temp_.cend(), extracted_components, + extracted_components + used_format_component_count); + result = builder_->createCompositeConstruct( + instr.attributes.is_signed + ? type_int_vectors_[used_format_component_count - 1] + : type_uint_vectors_[used_format_component_count - 1], + id_vector_temp_); + } else { + result = extracted_components[0]; + } + // Convert to floating-point. + result = builder_->createUnaryOp( + instr.attributes.is_signed ? spv::OpConvertSToF : spv::OpConvertUToF, + result_type, result); + // Normalize. + if (!instr.attributes.is_integer) { + float packed_scales[4]; + bool packed_scales_same = true; + for (uint32_t i = 0; i < used_format_component_count; ++i) { + int extracted_width = extracted_widths[i]; + // The signed case would result in 1.0 / 0.0 for 1-bit components, but + // there are no Xenos formats with them. + assert_true(extracted_width >= 2); + packed_scales_same &= extracted_width != extracted_widths[0]; + float packed_scale_inv; + if (instr.attributes.is_signed) { + packed_scale_inv = float((uint32_t(1) << (extracted_width - 1)) - 1); + if (instr.attributes.signed_rf_mode == + xenos::SignedRepeatingFractionMode::kNoZero) { + packed_scale_inv += 0.5f; + } + } else { + packed_scale_inv = float((uint32_t(1) << extracted_width) - 1); + } + packed_scales[i] = 1.0f / packed_scale_inv; + } + spv::Id const_packed_scale = + builder_->makeFloatConstant(packed_scales[0]); + spv::Op packed_scale_mul_op; + if (used_format_component_count > 1) { + if (packed_scales_same) { + packed_scale_mul_op = spv::OpVectorTimesScalar; + } else { + packed_scale_mul_op = spv::OpFMul; + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + id_vector_temp_.push_back(const_packed_scale); + for (uint32_t i = 1; i < used_format_component_count; ++i) { + id_vector_temp_.push_back( + builder_->makeFloatConstant(packed_scales[i])); + } + const_packed_scale = + builder_->makeCompositeConstant(result_type, id_vector_temp_); + } + } else { + packed_scale_mul_op = spv::OpFMul; + } + result = builder_->createBinOp(packed_scale_mul_op, result_type, result, + const_packed_scale); + builder_->addDecoration(result, spv::DecorationNoContraction); + if (instr.attributes.is_signed) { + switch (instr.attributes.signed_rf_mode) { + case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne: { + // Treat both -(2^(n-1)) and -(2^(n-1)-1) as -1. Using regular FMax, + // not NMax, because the number is known not to be NaN. + spv::Id const_minus_1 = builder_->makeFloatConstant(-1.0f); + if (used_format_component_count > 1) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + id_vector_temp_.insert(id_vector_temp_.cend(), + used_format_component_count, + const_minus_1); + const_minus_1 = + builder_->makeCompositeConstant(result_type, id_vector_temp_); + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(result); + id_vector_temp_.push_back(const_minus_1); + result = + builder_->createBuiltinCall(result_type, ext_inst_glsl_std_450_, + GLSLstd450FMax, id_vector_temp_); + } break; + case xenos::SignedRepeatingFractionMode::kNoZero: + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + for (uint32_t i = 0; i < used_format_component_count; ++i) { + id_vector_temp_.push_back( + builder_->makeFloatConstant(0.5f * packed_scales[i])); + } + result = + builder_->createBinOp(spv::OpFAdd, result_type, result, + used_format_component_count > 1 + ? builder_->makeCompositeConstant( + result_type, id_vector_temp_) + : id_vector_temp_[0]); + builder_->addDecoration(result, spv::DecorationNoContraction); + break; + default: + assert_unhandled_case(instr.attributes.signed_rf_mode); + } + } + } + } + + if (result != spv::NoResult) { + // Apply the exponent bias. + if (instr.attributes.exp_adjust) { + result = builder_->createBinOp(spv::OpVectorTimesScalar, + builder_->getTypeId(result), result, + builder_->makeFloatConstant(std::ldexp( + 1.0f, instr.attributes.exp_adjust))); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + + // If any components not present in the format were requested, pad the + // resulting vector with zeros. + uint32_t used_missing_components = + used_result_components & ~used_format_components; + if (used_missing_components) { + // Bypassing the assertion in spv::Builder::createCompositeConstruct as of + // November 5, 2020 - can construct vectors by concatenating vectors, not + // just from individual scalars. + std::unique_ptr composite_construct_op = + std::make_unique( + builder_->getUniqueId(), + type_float_vectors_[xe::bit_count(used_result_components) - 1], + spv::OpCompositeConstruct); + composite_construct_op->addIdOperand(result); + composite_construct_op->addIdOperand( + const_float_vectors_0_[xe::bit_count(used_missing_components) - 1]); + result = composite_construct_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(composite_construct_op)); + } + } + StoreResult(instr.result, result); +} + +void SpirvShaderTranslator::ProcessTextureFetchInstruction( + const ParsedTextureFetchInstruction& instr) { + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); + + EnsureBuildPointAvailable(); + + // Handle the instructions for setting the register LOD. + switch (instr.opcode) { + case ucode::FetchOpcode::kSetTextureLod: + builder_->createStore( + GetOperandComponents(LoadOperandStorage(instr.operands[0]), + instr.operands[0], 0b0001), + var_main_tfetch_lod_); + return; + case ucode::FetchOpcode::kSetTextureGradientsHorz: + builder_->createStore( + GetOperandComponents(LoadOperandStorage(instr.operands[0]), + instr.operands[0], 0b0111), + var_main_tfetch_gradients_h_); + return; + case ucode::FetchOpcode::kSetTextureGradientsVert: + builder_->createStore( + GetOperandComponents(LoadOperandStorage(instr.operands[0]), + instr.operands[0], 0b0111), + var_main_tfetch_gradients_v_); + return; + default: + break; + } + + // Handle instructions that store something. + uint32_t used_result_components = instr.result.GetUsedResultComponents(); + uint32_t used_result_nonzero_components = instr.GetNonZeroResultComponents(); + switch (instr.opcode) { + case ucode::FetchOpcode::kTextureFetch: + break; + case ucode::FetchOpcode::kGetTextureBorderColorFrac: + // TODO(Triang3l): Bind a black texture with a white border to calculate + // the border color fraction (in the X component of the result). + assert_always(); + EmitTranslationError("getBCF is unimplemented", false); + used_result_nonzero_components = 0; + break; + case ucode::FetchOpcode::kGetTextureComputedLod: + break; + case ucode::FetchOpcode::kGetTextureGradients: + break; + case ucode::FetchOpcode::kGetTextureWeights: + // FIXME(Triang3l): Currently disregarding the LOD completely in + // getWeights because the needed code would be very complicated, while + // getWeights is mostly used for things like PCF of shadow maps, that + // don't have mips. The LOD would be needed for the mip lerp factor in W + // of the return value and to choose the LOD where interpolation would + // take place for XYZ. That would require either implementing the LOD + // calculation algorithm using the ALU (since the `lod` instruction is + // limited to pixel shaders and can't be used when there's control flow + // divergence, unlike explicit gradients), or sampling a texture filled + // with LOD numbers (easier and more consistent - unclamped LOD doesn't + // make sense for getWeights anyway). The same applies to offsets. + used_result_nonzero_components &= ~uint32_t(0b1000); + break; + default: + assert_unhandled_case(instr.opcode); + EmitTranslationError("Unknown texture fetch operation"); + used_result_nonzero_components = 0; + } + uint32_t used_result_component_count = xe::bit_count(used_result_components); + if (!used_result_nonzero_components) { + // Nothing to fetch, only constant 0/1 writes - simplify the rest of the + // function so it doesn't have to handle this case. + if (used_result_components) { + StoreResult(instr.result, + const_float_vectors_0_[used_result_component_count - 1]); + } + return; + } + + spv::Id result[] = {const_float_0_, const_float_0_, const_float_0_, + const_float_0_}; + + if (instr.opcode == ucode::FetchOpcode::kGetTextureGradients) { + // Doesn't need the texture, handle separately. + spv::Id operand_0_storage = LoadOperandStorage(instr.operands[0]); + bool derivative_function_x_used = + (used_result_nonzero_components & 0b0011) != 0; + bool derivative_function_y_used = + (used_result_nonzero_components & 0b1100) != 0; + spv::Id derivative_function_x = spv::NoResult; + spv::Id derivative_function_y = spv::NoResult; + if (derivative_function_x_used && derivative_function_y_used) { + spv::Id derivative_function = + GetOperandComponents(operand_0_storage, instr.operands[0], 0b0011); + derivative_function_x = + builder_->createCompositeExtract(derivative_function, type_float_, 0); + derivative_function_y = + builder_->createCompositeExtract(derivative_function, type_float_, 1); + } else { + if (derivative_function_x_used) { + derivative_function_x = + GetOperandComponents(operand_0_storage, instr.operands[0], 0b0001); + } + if (derivative_function_y_used) { + derivative_function_y = + GetOperandComponents(operand_0_storage, instr.operands[0], 0b0010); + } + } + builder_->addCapability(spv::CapabilityDerivativeControl); + uint32_t derivative_components_remaining = used_result_nonzero_components; + uint32_t derivative_component_index; + while (xe::bit_scan_forward(derivative_components_remaining, + &derivative_component_index)) { + derivative_components_remaining &= + ~(UINT32_C(1) << derivative_component_index); + result[derivative_component_index] = builder_->createUnaryOp( + (derivative_component_index & 0b01) ? spv::OpDPdyCoarse + : spv::OpDPdxCoarse, + type_float_, + (derivative_component_index & 0b10) ? derivative_function_y + : derivative_function_x); + } + } else { + // kTextureFetch, kGetTextureComputedLod or kGetTextureWeights. + + // Whether to use gradients (implicit or explicit) for LOD calculation. + bool use_computed_lod = + instr.attributes.use_computed_lod && + (is_pixel_shader() || instr.attributes.use_register_gradients); + if (instr.opcode == ucode::FetchOpcode::kGetTextureComputedLod && + (!use_computed_lod || instr.attributes.use_register_gradients)) { + assert_always(); + EmitTranslationError( + "getCompTexLOD used with explicit LOD or gradients - contradicts " + "MSDN", + false); + StoreResult(instr.result, + const_float_vectors_0_[used_result_component_count - 1]); + return; + } + + uint32_t fetch_constant_index = instr.operands[1].storage_index; + uint32_t fetch_constant_word_0_index = 6 * fetch_constant_index; + + spv::Id sampler = spv::NoResult; + spv::Id image_2d_array_or_cube_unsigned = spv::NoResult; + spv::Id image_2d_array_or_cube_signed = spv::NoResult; + spv::Id image_3d_unsigned = spv::NoResult; + spv::Id image_3d_signed = spv::NoResult; + if (instr.opcode != ucode::FetchOpcode::kGetTextureWeights) { + bool bindings_set_up = true; + // While GL_ARB_texture_query_lod specifies the value for + // GL_NEAREST_MIPMAP_NEAREST and GL_LINEAR_MIPMAP_NEAREST minifying + // functions as rounded (unlike the `lod` instruction in Direct3D 10.1+, + // which is not defined for point sampling), the XNA assembler doesn't + // accept MipFilter overrides for getCompTexLOD - probably should be + // linear only, though not known exactly. + // + // 4D5307F2 uses vertex displacement map textures for tessellated models + // like the beehive tree with explicit LOD with point sampling (they store + // values packed in two components), however, the fetch constant has + // anisotropic filtering enabled. However, Direct3D 12 doesn't allow + // mixing anisotropic and point filtering. Possibly anistropic filtering + // should be disabled when explicit LOD is used - do this here. + size_t sampler_index = FindOrAddSamplerBinding( + fetch_constant_index, instr.attributes.mag_filter, + instr.attributes.min_filter, + instr.opcode == ucode::FetchOpcode::kGetTextureComputedLod + ? xenos::TextureFilter::kLinear + : instr.attributes.mip_filter, + use_computed_lod ? instr.attributes.aniso_filter + : xenos::AnisoFilter::kDisabled); + xenos::FetchOpDimension dimension_2d_array_or_cube = + instr.dimension == xenos::FetchOpDimension::k3DOrStacked + ? xenos::FetchOpDimension::k2D + : instr.dimension; + size_t image_2d_array_or_cube_unsigned_index = FindOrAddTextureBinding( + fetch_constant_index, dimension_2d_array_or_cube, false); + size_t image_2d_array_or_cube_signed_index = FindOrAddTextureBinding( + fetch_constant_index, dimension_2d_array_or_cube, true); + if (sampler_index == SIZE_MAX || + image_2d_array_or_cube_unsigned_index == SIZE_MAX || + image_2d_array_or_cube_signed_index == SIZE_MAX) { + bindings_set_up = false; + } + size_t image_3d_unsigned_index = SIZE_MAX; + size_t image_3d_signed_index = SIZE_MAX; + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { + image_3d_unsigned_index = FindOrAddTextureBinding( + fetch_constant_index, xenos::FetchOpDimension::k3DOrStacked, false); + image_3d_signed_index = FindOrAddTextureBinding( + fetch_constant_index, xenos::FetchOpDimension::k3DOrStacked, true); + if (image_3d_unsigned_index == SIZE_MAX || + image_3d_signed_index == SIZE_MAX) { + bindings_set_up = false; + } + } + if (!bindings_set_up) { + // Too many image or sampler bindings used. + StoreResult(instr.result, + const_float_vectors_0_[used_result_component_count - 1]); + return; + } + sampler = builder_->createLoad(sampler_bindings_[sampler_index].variable, + spv::NoPrecision); + const TextureBinding& image_2d_array_or_cube_unsigned_binding = + texture_bindings_[image_2d_array_or_cube_unsigned_index]; + image_2d_array_or_cube_unsigned = builder_->createLoad( + image_2d_array_or_cube_unsigned_binding.variable, spv::NoPrecision); + const TextureBinding& image_2d_array_or_cube_signed_binding = + texture_bindings_[image_2d_array_or_cube_signed_index]; + image_2d_array_or_cube_signed = builder_->createLoad( + image_2d_array_or_cube_signed_binding.variable, spv::NoPrecision); + if (image_3d_unsigned_index != SIZE_MAX) { + const TextureBinding& image_3d_unsigned_binding = + texture_bindings_[image_3d_unsigned_index]; + image_3d_unsigned = builder_->createLoad( + image_3d_unsigned_binding.variable, spv::NoPrecision); + } + if (image_3d_signed_index != SIZE_MAX) { + const TextureBinding& image_3d_signed_binding = + texture_bindings_[image_3d_signed_index]; + image_3d_signed = builder_->createLoad(image_3d_signed_binding.variable, + spv::NoPrecision); + } + } + + // Get offsets applied to the coordinates before sampling. + // FIXME(Triang3l): Offsets need to be applied at the LOD being fetched, not + // at LOD 0. However, since offsets have granularity of 0.5, not 1, on the + // Xenos, they can't be passed directly as ConstOffset to the image sample + // instruction (plus-minus 0.5 offsets are very common in games). But + // offsetting at mip levels is a rare usage case, mostly offsets are used + // for things like shadow maps and blur, where there are no mips. + float offset_values[3] = {}; + // MSDN doesn't list offsets as getCompTexLOD parameters. + if (instr.opcode != ucode::FetchOpcode::kGetTextureComputedLod) { + // Add a small epsilon to the offset (1.5/4 the fixed-point texture + // coordinate ULP with 8-bit subtexel precision - shouldn't significantly + // effect the fixed-point conversion; 1/4 is also not enough with 3x + // resolution scaling very noticeably on the weapon in 4D5307E6, at least + // on the Direct3D 12 backend) to resolve ambiguity when fetching + // point-sampled textures between texels. This applies to both normalized + // (58410954 Xbox Live Arcade logo, coordinates interpolated between + // vertices with half-pixel offset) and unnormalized (4D5307E6 lighting + // G-buffer reading, ps_param_gen pixels) coordinates. On Nvidia Pascal, + // without this adjustment, blockiness is visible in both cases. Possibly + // there is a better way, however, an attempt was made to error-correct + // division by adding the difference between original and re-denormalized + // coordinates, but on Nvidia, `mul` (on Direct3D 12) and internal + // multiplication in texture sampling apparently round differently, so + // `mul` gives a value that would be floored as expected, but the + // left/upper pixel is still sampled instead. + const float kRoundingOffset = 1.5f / 1024.0f; + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: + offset_values[0] = instr.attributes.offset_x + kRoundingOffset; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + // For coordinate lerp factors. This needs to be done separately for + // point mag/min filters, but they're currently not handled here + // anyway. + offset_values[0] -= 0.5f; + } + break; + case xenos::FetchOpDimension::k2D: + offset_values[0] = instr.attributes.offset_x + kRoundingOffset; + offset_values[1] = instr.attributes.offset_y + kRoundingOffset; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + offset_values[0] -= 0.5f; + offset_values[1] -= 0.5f; + } + break; + case xenos::FetchOpDimension::k3DOrStacked: + offset_values[0] = instr.attributes.offset_x + kRoundingOffset; + offset_values[1] = instr.attributes.offset_y + kRoundingOffset; + offset_values[2] = instr.attributes.offset_z + kRoundingOffset; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + offset_values[0] -= 0.5f; + offset_values[1] -= 0.5f; + offset_values[2] -= 0.5f; + } + break; + case xenos::FetchOpDimension::kCube: + // Applying the rounding epsilon to cube maps too for potential game + // passes processing cube map faces themselves. + offset_values[0] = instr.attributes.offset_x + kRoundingOffset; + offset_values[1] = instr.attributes.offset_y + kRoundingOffset; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + offset_values[0] -= 0.5f; + offset_values[1] -= 0.5f; + // The logic for ST weights is the same for all faces. + // FIXME(Triang3l): If LOD calculation is added to getWeights, face + // offset probably will need to be handled too (if the hardware + // supports it at all, though MSDN lists OffsetZ in tfetchCube). + } else { + offset_values[2] = instr.attributes.offset_z; + } + break; + } + } + uint32_t offsets_not_zero = 0b000; + for (uint32_t i = 0; i < 3; ++i) { + if (offset_values[i]) { + offsets_not_zero |= 1 << i; + } + } + + // Fetch constant word usage: + // - 2: Size (needed only once). + // - 3: Exponent adjustment (needed only once). + // - 4: Conditionally for 3D kTextureFetch: stacked texture filtering modes. + // Unconditionally LOD kTextureFetch: LOD and gradient exponent bias, + // result exponent bias. + // - 5: Dimensionality (3D or 2D stacked - needed only once). + + // Load the texture size and whether it's 3D or stacked if needed. + // 1D: X - width. + // 2D, cube: X - width, Y - height (cube maps probably can be only square, + // but for simplicity). + // 3D: X - width, Y - height, Z - depth. + uint32_t size_needed_components = 0b000; + bool data_is_3d_needed = false; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + // Size needed for denormalization for coordinate lerp factor. + // FIXME(Triang3l): Currently disregarding the LOD completely in + // getWeights. However, if the LOD lerp factor and the LOD where filtering + // would happen are ever calculated, all components of the size may be + // needed for ALU LOD calculation with normalized coordinates (or, if a + // texture filled with LOD indices is used, coordinates will need to be + // normalized as normally). + if (!instr.attributes.unnormalized_coordinates) { + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: + size_needed_components |= used_result_nonzero_components & 0b0001; + break; + case xenos::FetchOpDimension::k2D: + case xenos::FetchOpDimension::kCube: + size_needed_components |= used_result_nonzero_components & 0b0011; + break; + case xenos::FetchOpDimension::k3DOrStacked: + size_needed_components |= used_result_nonzero_components & 0b0111; + break; + } + } + } else { + // Size needed for normalization (or, for stacked texture layers, + // denormalization) and for offsets. + size_needed_components |= offsets_not_zero; + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: + if (instr.attributes.unnormalized_coordinates) { + size_needed_components |= 0b0001; + } + break; + case xenos::FetchOpDimension::k2D: + if (instr.attributes.unnormalized_coordinates) { + size_needed_components |= 0b0011; + } + break; + case xenos::FetchOpDimension::k3DOrStacked: + // Stacked and 3D textures are fetched from different bindings - the + // check is always needed. + data_is_3d_needed = true; + if (instr.attributes.unnormalized_coordinates) { + // Need to normalize all (if 3D). + size_needed_components |= 0b0111; + } else { + // Need to denormalize Z (if stacked). + size_needed_components |= 0b0100; + } + break; + case xenos::FetchOpDimension::kCube: + if (instr.attributes.unnormalized_coordinates) { + size_needed_components |= 0b0011; + } + // The size is not needed for face ID offset. + size_needed_components &= 0b0011; + break; + } + } + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked && + size_needed_components) { + // Stacked and 3D textures have different size packing - need to get + // whether the texture is 3D unconditionally. + data_is_3d_needed = true; + } + spv::Id data_is_3d = spv::NoResult; + if (data_is_3d_needed) { + // Get the data dimensionality from the bits 9:10 of the fetch constant + // word 5. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 5) >> 2))); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 5) & 3))); + spv::Id fetch_constant_word_5 = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + spv::Id data_dimension = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_5, + builder_->makeUintConstant(9), builder_->makeUintConstant(2)); + data_is_3d = builder_->createBinOp( + spv::OpIEqual, type_bool_, data_dimension, + builder_->makeUintConstant( + static_cast(xenos::DataDimension::k3D))); + } + spv::Id size[3] = {}; + if (size_needed_components) { + // Get the size from the fetch constant word 2. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 2) >> 2))); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 2) & 3))); + spv::Id fetch_constant_word_2 = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: { + if (size_needed_components & 0b1) { + size[0] = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + const_uint_0_, + builder_->makeUintConstant(xenos::kTexture1DMaxWidthLog2)); + } + assert_zero(size_needed_components & 0b110); + } break; + case xenos::FetchOpDimension::k2D: + case xenos::FetchOpDimension::kCube: { + if (size_needed_components & 0b1) { + size[0] = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + const_uint_0_, + builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2)); + } + if (size_needed_components & 0b10) { + spv::Id width_height_bit_count = builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2); + size[1] = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + width_height_bit_count, width_height_bit_count); + } + assert_zero(size_needed_components & 0b100); + } break; + case xenos::FetchOpDimension::k3DOrStacked: { + if (size_needed_components & 0b1) { + spv::Id size_3d = + builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, + fetch_constant_word_2, const_uint_0_, + builder_->makeUintConstant( + xenos::kTexture3DMaxWidthHeightLog2)); + spv::Id size_2d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + const_uint_0_, + builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2)); + assert_true(data_is_3d != spv::NoResult); + size[0] = builder_->createTriOp(spv::OpSelect, type_uint_, + data_is_3d, size_3d, size_2d); + } + if (size_needed_components & 0b10) { + spv::Id width_height_bit_count_3d = + builder_->makeUintConstant(xenos::kTexture3DMaxWidthHeightLog2); + spv::Id size_3d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + width_height_bit_count_3d, width_height_bit_count_3d); + spv::Id width_height_bit_count_2d = builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2); + spv::Id size_2d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + width_height_bit_count_2d, width_height_bit_count_2d); + assert_true(data_is_3d != spv::NoResult); + size[1] = builder_->createTriOp(spv::OpSelect, type_uint_, + data_is_3d, size_3d, size_2d); + } + if (size_needed_components & 0b100) { + spv::Id size_3d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + builder_->makeUintConstant(xenos::kTexture3DMaxWidthHeightLog2 * + 2), + builder_->makeUintConstant(xenos::kTexture3DMaxDepthLog2)); + spv::Id size_2d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2 * 2), + builder_->makeUintConstant(xenos::kTexture2DMaxStackDepthLog2)); + assert_true(data_is_3d != spv::NoResult); + size[2] = builder_->createTriOp(spv::OpSelect, type_uint_, + data_is_3d, size_3d, size_2d); + } + } break; + } + { + uint32_t size_remaining_components = size_needed_components; + uint32_t size_component_index; + while (xe::bit_scan_forward(size_remaining_components, + &size_component_index)) { + size_remaining_components &= ~(UINT32_C(1) << size_component_index); + spv::Id& size_component_ref = size[size_component_index]; + // Fetch constants store size minus 1 - add 1. + size_component_ref = + builder_->createBinOp(spv::OpIAdd, type_uint_, size_component_ref, + builder_->makeUintConstant(1)); + // Convert the size to float for multiplication or division. + size_component_ref = builder_->createUnaryOp( + spv::OpConvertUToF, type_float_, size_component_ref); + } + } + } + + // FIXME(Triang3l): Mip lerp factor needs to be calculated, and the + // coordinate lerp factors should be calculated at the mip level texels + // would be sampled from. That would require some way of calculating the + // LOD that would be applicable to explicit gradients and vertex shaders. + // Also, with point sampling, possibly lerp factors need to be 0. W (mip + // lerp factor) should have been masked out previously because it's not + // supported currently. + assert_false(instr.opcode == ucode::FetchOpcode::kGetTextureWeights && + (used_result_nonzero_components & 0b1000)); + + // Load the needed original values of the coordinates operand. + uint32_t coordinates_needed_components = + instr.opcode == ucode::FetchOpcode::kGetTextureWeights + ? used_result_nonzero_components + : ((UINT32_C(1) + << xenos::GetFetchOpDimensionComponentCount(instr.dimension)) - + 1); + assert_not_zero(coordinates_needed_components); + spv::Id coordinates_operand = + GetOperandComponents(LoadOperandStorage(instr.operands[0]), + instr.operands[0], coordinates_needed_components); + spv::Id coordinates[] = {const_float_0_, const_float_0_, const_float_0_}; + if (xe::bit_count(coordinates_needed_components) > 1) { + uint32_t coordinates_remaining_components = coordinates_needed_components; + uint32_t coordinate_component_index; + uint32_t coordinate_operand_component_index = 0; + while (xe::bit_scan_forward(coordinates_remaining_components, + &coordinate_component_index)) { + coordinates_remaining_components &= + ~(UINT32_C(1) << coordinate_component_index); + coordinates[coordinate_component_index] = + builder_->createCompositeExtract( + coordinates_operand, type_float_, + coordinate_operand_component_index++); + } + } else { + uint32_t coordinate_component_index; + xe::bit_scan_forward(coordinates_needed_components, + &coordinate_component_index); + coordinates[coordinate_component_index] = coordinates_operand; + } + + // TODO(Triang3l): Reverting the resolution scale. + + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + // FIXME(Triang3l): Filtering modes should possibly be taken into account, + // but for simplicity, not doing that - from a high level point of view, + // would be useless to get weights that will always be zero. + uint32_t coordinates_remaining_components = coordinates_needed_components; + uint32_t coordinate_component_index; + while (xe::bit_scan_forward(coordinates_remaining_components, + &coordinate_component_index)) { + coordinates_remaining_components &= + ~(UINT32_C(1) << coordinate_component_index); + spv::Id result_component = coordinates[coordinate_component_index]; + // Need unnormalized coordinates. + if (!instr.attributes.unnormalized_coordinates) { + spv::Id size_component = size[coordinate_component_index]; + assert_true(size_component != spv::NoResult); + result_component = builder_->createBinOp( + spv::OpFMul, type_float_, result_component, size_component); + builder_->addDecoration(result_component, + spv::DecorationNoContraction); + } + float component_offset = offset_values[coordinate_component_index]; + if (component_offset) { + result_component = builder_->createBinOp( + spv::OpFAdd, type_float_, result_component, + builder_->makeFloatConstant(component_offset)); + builder_->addDecoration(result_component, + spv::DecorationNoContraction); + } + // 0.5 has already been subtracted via offsets previously. + id_vector_temp_.clear(); + id_vector_temp_.push_back(result_component); + result_component = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Fract, id_vector_temp_); + result[coordinate_component_index] = result_component; + } + } else { + // kTextureFetch or kGetTextureComputedLod. + + // Normalize the XY coordinates, and apply the offset. + for (uint32_t i = 0; + i <= uint32_t(instr.dimension != xenos::FetchOpDimension::k1D); + ++i) { + spv::Id& coordinate_ref = coordinates[i]; + spv::Id component_offset = + offset_values[i] ? builder_->makeFloatConstant(offset_values[i]) + : spv::NoResult; + spv::Id size_component = size[i]; + if (instr.attributes.unnormalized_coordinates) { + if (component_offset != spv::NoResult) { + coordinate_ref = builder_->createBinOp( + spv::OpFAdd, type_float_, coordinate_ref, component_offset); + builder_->addDecoration(coordinate_ref, + spv::DecorationNoContraction); + } + assert_true(size_component != spv::NoResult); + coordinate_ref = builder_->createBinOp( + spv::OpFDiv, type_float_, coordinate_ref, size_component); + builder_->addDecoration(coordinate_ref, spv::DecorationNoContraction); + } else { + if (component_offset != spv::NoResult) { + assert_true(size_component != spv::NoResult); + spv::Id component_offset_normalized = builder_->createBinOp( + spv::OpFDiv, type_float_, component_offset, size_component); + builder_->addDecoration(component_offset_normalized, + spv::DecorationNoContraction); + coordinate_ref = + builder_->createBinOp(spv::OpFAdd, type_float_, coordinate_ref, + component_offset_normalized); + builder_->addDecoration(coordinate_ref, + spv::DecorationNoContraction); + } + } + } + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { + spv::Id& z_coordinate_ref = coordinates[2]; + spv::Id z_offset = offset_values[2] + ? builder_->makeFloatConstant(offset_values[2]) + : spv::NoResult; + spv::Id z_size = size[2]; + if (instr.attributes.unnormalized_coordinates) { + // Apply the offset, and normalize the Z coordinate for a 3D texture. + if (z_offset != spv::NoResult) { + z_coordinate_ref = builder_->createBinOp( + spv::OpFAdd, type_float_, z_coordinate_ref, z_offset); + builder_->addDecoration(z_coordinate_ref, + spv::DecorationNoContraction); + } + spv::Block& block_dimension_head = *builder_->getBuildPoint(); + spv::Block& block_dimension_merge = builder_->makeNewBlock(); + spv::Block& block_dimension_3d = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_dimension_merge.getId(), + spv::SelectionControlDontFlattenMask); + assert_true(data_is_3d != spv::NoResult); + builder_->createConditionalBranch(data_is_3d, &block_dimension_3d, + &block_dimension_merge); + builder_->setBuildPoint(&block_dimension_3d); + assert_true(z_size != spv::NoResult); + spv::Id z_3d = builder_->createBinOp(spv::OpFDiv, type_float_, + z_coordinate_ref, z_size); + builder_->addDecoration(z_3d, spv::DecorationNoContraction); + builder_->createBranch(&block_dimension_merge); + builder_->setBuildPoint(&block_dimension_merge); + { + std::unique_ptr z_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + z_phi_op->addIdOperand(z_3d); + z_phi_op->addIdOperand(block_dimension_3d.getId()); + z_phi_op->addIdOperand(z_coordinate_ref); + z_phi_op->addIdOperand(block_dimension_head.getId()); + z_coordinate_ref = z_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(z_phi_op)); + } + } else { + // Denormalize the Z coordinate for a stacked texture, and apply the + // offset. + spv::Block& block_dimension_head = *builder_->getBuildPoint(); + spv::Block& block_dimension_merge = builder_->makeNewBlock(); + spv::Block* block_dimension_3d = + z_offset != spv::NoResult ? &builder_->makeNewBlock() : nullptr; + spv::Block& block_dimension_stacked = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_dimension_merge.getId(), + spv::SelectionControlDontFlattenMask); + assert_true(data_is_3d != spv::NoResult); + builder_->createConditionalBranch( + data_is_3d, + block_dimension_3d ? block_dimension_3d : &block_dimension_merge, + &block_dimension_stacked); + // 3D case. + spv::Id z_3d = z_coordinate_ref; + if (block_dimension_3d) { + builder_->setBuildPoint(block_dimension_3d); + if (z_offset != spv::NoResult) { + assert_true(z_size != spv::NoResult); + spv::Id z_offset_normalized = builder_->createBinOp( + spv::OpFDiv, type_float_, z_offset, z_size); + builder_->addDecoration(z_offset_normalized, + spv::DecorationNoContraction); + z_3d = builder_->createBinOp(spv::OpFAdd, type_float_, z_3d, + z_offset_normalized); + builder_->addDecoration(z_3d, spv::DecorationNoContraction); + } + builder_->createBranch(&block_dimension_merge); + } + // Stacked case. + builder_->setBuildPoint(&block_dimension_stacked); + spv::Id z_stacked = z_coordinate_ref; + assert_true(z_size != spv::NoResult); + z_stacked = builder_->createBinOp(spv::OpFMul, type_float_, z_stacked, + z_size); + builder_->addDecoration(z_stacked, spv::DecorationNoContraction); + if (z_offset != spv::NoResult) { + z_stacked = builder_->createBinOp(spv::OpFAdd, type_float_, + z_stacked, z_offset); + builder_->addDecoration(z_stacked, spv::DecorationNoContraction); + } + builder_->createBranch(&block_dimension_merge); + // Select one of the two. + builder_->setBuildPoint(&block_dimension_merge); + { + std::unique_ptr z_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + z_phi_op->addIdOperand(z_3d); + z_phi_op->addIdOperand((block_dimension_3d ? *block_dimension_3d + : block_dimension_head) + .getId()); + z_phi_op->addIdOperand(z_stacked); + z_phi_op->addIdOperand(block_dimension_stacked.getId()); + z_coordinate_ref = z_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(z_phi_op)); + } + } + } else if (instr.dimension == xenos::FetchOpDimension::kCube) { + // Transform the cube coordinates from 2D to 3D. + // Move SC/TC from 1...2 to -1...1. + spv::Id const_float_2 = builder_->makeFloatConstant(2.0f); + spv::Id const_float_minus_3 = builder_->makeFloatConstant(-3.0f); + for (uint32_t i = 0; i < 2; ++i) { + spv::Id& coordinate_ref = coordinates[i]; + coordinate_ref = builder_->createBinOp(spv::OpFMul, type_float_, + coordinate_ref, const_float_2); + builder_->addDecoration(coordinate_ref, spv::DecorationNoContraction); + coordinate_ref = builder_->createBinOp( + spv::OpFAdd, type_float_, coordinate_ref, const_float_minus_3); + builder_->addDecoration(coordinate_ref, spv::DecorationNoContraction); + } + // Get the face index (floored, within 0...5 - OpConvertFToU is + // undefined for out-of-range values, so clamping from both sides + // manually). + spv::Id face = coordinates[2]; + if (offset_values[2]) { + face = builder_->createBinOp( + spv::OpFAdd, type_float_, face, + builder_->makeFloatConstant(offset_values[2])); + builder_->addDecoration(face, spv::DecorationNoContraction); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(const_float_0_); + id_vector_temp_.push_back(face); + face = builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NMax, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(builder_->makeFloatConstant(5.0f)); + id_vector_temp_.push_back(face); + face = builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FMin, id_vector_temp_); + face = builder_->createUnaryOp(spv::OpConvertFToU, type_uint_, face); + // Split the face index into the axis and the sign. + spv::Id const_uint_1 = builder_->makeUintConstant(1); + spv::Id face_axis = builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, face, const_uint_1); + spv::Id face_is_negative = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, face, + const_uint_1), + const_uint_0_); + spv::Id face_sign = + builder_->createTriOp(spv::OpSelect, type_float_, face_is_negative, + builder_->makeFloatConstant(-1.0f), + builder_->makeFloatConstant(1.0f)); + // Remap the axes in a way opposite to the ALU cube instruction. + spv::Id sc_negated = builder_->createUnaryOp( + spv::OpFNegate, type_float_, coordinates[0]); + builder_->addDecoration(sc_negated, spv::DecorationNoContraction); + spv::Id tc_negated = builder_->createUnaryOp( + spv::OpFNegate, type_float_, coordinates[1]); + builder_->addDecoration(tc_negated, spv::DecorationNoContraction); + spv::Block& block_ma_head = *builder_->getBuildPoint(); + spv::Block& block_ma_x = builder_->makeNewBlock(); + spv::Block& block_ma_y = builder_->makeNewBlock(); + spv::Block& block_ma_z = builder_->makeNewBlock(); + spv::Block& block_ma_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_ma_merge.getId()); + { + std::unique_ptr ma_switch_op = + std::make_unique(spv::OpSwitch); + ma_switch_op->addIdOperand(face_axis); + // Make Z the default. + ma_switch_op->addIdOperand(block_ma_z.getId()); + ma_switch_op->addImmediateOperand(0); + ma_switch_op->addIdOperand(block_ma_x.getId()); + ma_switch_op->addImmediateOperand(1); + ma_switch_op->addIdOperand(block_ma_y.getId()); + builder_->getBuildPoint()->addInstruction(std::move(ma_switch_op)); + } + block_ma_x.addPredecessor(&block_ma_head); + block_ma_y.addPredecessor(&block_ma_head); + block_ma_z.addPredecessor(&block_ma_head); + // X is the major axis case. + builder_->setBuildPoint(&block_ma_x); + spv::Id ma_x_y = tc_negated; + spv::Id ma_x_z = + builder_->createTriOp(spv::OpSelect, type_float_, face_is_negative, + coordinates[0], sc_negated); + builder_->createBranch(&block_ma_merge); + // Y is the major axis case. + builder_->setBuildPoint(&block_ma_y); + spv::Id ma_y_x = coordinates[0]; + spv::Id ma_y_z = + builder_->createTriOp(spv::OpSelect, type_float_, face_is_negative, + tc_negated, coordinates[1]); + builder_->createBranch(&block_ma_merge); + // Z is the major axis case. + builder_->setBuildPoint(&block_ma_z); + spv::Id ma_z_x = + builder_->createTriOp(spv::OpSelect, type_float_, face_is_negative, + sc_negated, coordinates[0]); + spv::Id ma_z_y = tc_negated; + builder_->createBranch(&block_ma_merge); + // Gather the coordinate components from the branches. + builder_->setBuildPoint(&block_ma_merge); + { + std::unique_ptr x_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + x_phi_op->addIdOperand(face_sign); + x_phi_op->addIdOperand(block_ma_x.getId()); + x_phi_op->addIdOperand(ma_y_x); + x_phi_op->addIdOperand(block_ma_y.getId()); + x_phi_op->addIdOperand(ma_z_x); + x_phi_op->addIdOperand(block_ma_z.getId()); + coordinates[0] = x_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(x_phi_op)); + } + { + std::unique_ptr y_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + y_phi_op->addIdOperand(ma_x_y); + y_phi_op->addIdOperand(block_ma_x.getId()); + y_phi_op->addIdOperand(face_sign); + y_phi_op->addIdOperand(block_ma_y.getId()); + y_phi_op->addIdOperand(ma_z_y); + y_phi_op->addIdOperand(block_ma_z.getId()); + coordinates[1] = y_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(y_phi_op)); + } + { + std::unique_ptr z_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + z_phi_op->addIdOperand(ma_x_z); + z_phi_op->addIdOperand(block_ma_x.getId()); + z_phi_op->addIdOperand(ma_y_z); + z_phi_op->addIdOperand(block_ma_y.getId()); + z_phi_op->addIdOperand(face_sign); + z_phi_op->addIdOperand(block_ma_z.getId()); + coordinates[2] = z_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(z_phi_op)); + } + } + + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantTextureSwizzledSigns)); + id_vector_temp_.push_back( + builder_->makeIntConstant(fetch_constant_index >> 4)); + id_vector_temp_.push_back( + builder_->makeIntConstant((fetch_constant_index >> 2) & 3)); + // All 32 bits containing the values for 4 fetch constants (use + // OpBitFieldUExtract to get the signednesses for the specific components + // of this texture). + spv::Id swizzled_signs_word = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + uint32_t swizzled_signs_word_offset = 8 * (fetch_constant_index & 3); + + spv::Builder::TextureParameters texture_parameters = {}; + + if (instr.opcode == ucode::FetchOpcode::kGetTextureComputedLod) { + // kGetTextureComputedLod. + + // Check if the signed binding is needs to be accessed rather than the + // unsigned (if all signednesses are signed). + spv::Id swizzled_signs_all_signed = builder_->createBinOp( + spv::OpIEqual, type_bool_, + builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, swizzled_signs_word, + builder_->makeUintConstant(swizzled_signs_word_offset), + builder_->makeUintConstant(8)), + builder_->makeUintConstant(uint32_t(xenos::TextureSign::kSigned) * + 0b01010101)); + + // OpImageQueryLod doesn't need the array layer component. + // So, 3 coordinate components for 3D cube, 2 in other cases (including + // 1D, which are emulated as 2D arrays). + // OpSampledImage must be in the same block as where its result is used. + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { + // Check if the texture is 3D or stacked. + spv::Block& block_dimension_head = *builder_->getBuildPoint(); + spv::Block& block_dimension_3d_start = builder_->makeNewBlock(); + spv::Block& block_dimension_stacked_start = builder_->makeNewBlock(); + spv::Block& block_dimension_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_dimension_merge.getId(), + spv::SelectionControlDontFlattenMask); + assert_true(data_is_3d != spv::NoResult); + builder_->createConditionalBranch(data_is_3d, + &block_dimension_3d_start, + &block_dimension_stacked_start); + + // 3D. + builder_->setBuildPoint(&block_dimension_3d_start); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = + builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + spv::Id lod_3d = QueryTextureLod(texture_parameters, + image_3d_unsigned, image_3d_signed, + sampler, swizzled_signs_all_signed); + // Get the actual build point for phi. + spv::Block& block_dimension_3d_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_dimension_merge); + + // 2D stacked. + builder_->setBuildPoint(&block_dimension_stacked_start); + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = + builder_->createCompositeConstruct(type_float2_, id_vector_temp_); + spv::Id lod_stacked = QueryTextureLod( + texture_parameters, image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, sampler, + swizzled_signs_all_signed); + // Get the actual build point for phi. + spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_dimension_merge); + + // Choose between the 3D and the stacked result based on the actual + // data dimensionality. + builder_->setBuildPoint(&block_dimension_merge); + { + std::unique_ptr dimension_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + dimension_phi_op->addIdOperand(lod_3d); + dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); + dimension_phi_op->addIdOperand(lod_stacked); + dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); + result[0] = dimension_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(dimension_phi_op)); + } + } else { + uint32_t lod_query_coordinate_component_count = + instr.dimension == xenos::FetchOpDimension::kCube ? 3 : 2; + id_vector_temp_.clear(); + id_vector_temp_.reserve(lod_query_coordinate_component_count); + for (uint32_t i = 0; i < lod_query_coordinate_component_count; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = builder_->createCompositeConstruct( + type_float_vectors_[lod_query_coordinate_component_count - 1], + id_vector_temp_); + result[0] = QueryTextureLod(texture_parameters, + image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, sampler, + swizzled_signs_all_signed); + } + } else { + // kTextureFetch. + assert_true(instr.opcode == ucode::FetchOpcode::kTextureFetch); + + // Extract the signedness for each component of the swizzled result, and + // get which bindings (unsigned and signed) are needed. + spv::Id swizzled_signs[4] = {}; + spv::Id result_is_signed[4] = {}; + spv::Id is_all_signed = spv::NoResult; + spv::Id is_any_signed = spv::NoResult; + spv::Id const_uint_2 = builder_->makeUintConstant(2); + spv::Id const_uint_sign_signed = + builder_->makeUintConstant(uint32_t(xenos::TextureSign::kSigned)); + { + uint32_t result_remaining_components = used_result_nonzero_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_remaining_components, + &result_component_index)) { + result_remaining_components &= + ~(UINT32_C(1) << result_component_index); + spv::Id result_component_sign = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, swizzled_signs_word, + builder_->makeUintConstant(swizzled_signs_word_offset + + 2 * result_component_index), + const_uint_2); + swizzled_signs[result_component_index] = result_component_sign; + spv::Id is_component_signed = builder_->createBinOp( + spv::OpIEqual, type_bool_, result_component_sign, + const_uint_sign_signed); + result_is_signed[result_component_index] = is_component_signed; + if (is_all_signed != spv::NoResult) { + is_all_signed = + builder_->createBinOp(spv::OpLogicalAnd, type_bool_, + is_all_signed, is_component_signed); + } else { + is_all_signed = is_component_signed; + } + if (is_any_signed != spv::NoResult) { + is_any_signed = + builder_->createBinOp(spv::OpLogicalOr, type_bool_, + is_any_signed, is_component_signed); + } else { + is_any_signed = is_component_signed; + } + } + } + + // Load the fetch constant word 4, needed unconditionally for LOD + // biasing, for result exponent biasing, and conditionally for stacked + // texture filtering. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 4) >> 2))); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 4) & 3))); + spv::Id fetch_constant_word_4 = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + spv::Id fetch_constant_word_4_signed = builder_->createUnaryOp( + spv::OpBitcast, type_int_, fetch_constant_word_4); + + // Accumulate the explicit LOD (or LOD bias) sources (in D3D11.3 + // specification order: specified LOD + sampler LOD bias + instruction + // LOD bias). + // Fetch constant LOD (bits 12:21 of the word 4). + spv::Id lod = builder_->createBinOp( + spv::OpFMul, type_float_, + builder_->createUnaryOp( + spv::OpConvertSToF, type_float_, + builder_->createTriOp(spv::OpBitFieldSExtract, type_int_, + fetch_constant_word_4_signed, + builder_->makeUintConstant(12), + builder_->makeUintConstant(10))), + builder_->makeFloatConstant(1.0f / 32.0f)); + builder_->addDecoration(lod, spv::DecorationNoContraction); + // Register LOD. + if (instr.attributes.use_register_lod) { + lod = builder_->createBinOp( + spv::OpFAdd, type_float_, + builder_->createLoad(var_main_tfetch_lod_, spv::NoPrecision), + lod); + builder_->addDecoration(lod, spv::DecorationNoContraction); + } + // Instruction LOD bias. + if (instr.attributes.lod_bias) { + lod = builder_->createBinOp( + spv::OpFAdd, type_float_, lod, + builder_->makeFloatConstant(instr.attributes.lod_bias)); + builder_->addDecoration(lod, spv::DecorationNoContraction); + } + + // Calculate the gradients for sampling the texture if needed. + // 2D vectors for k1D (because 1D images are emulated as 2D arrays), + // k2D. + // 3D vectors for k3DOrStacked, kCube. + spv::Id gradients_h = spv::NoResult, gradients_v = spv::NoResult; + if (use_computed_lod) { + // TODO(Triang3l): Gradient exponent adjustment is currently not done + // in getCompTexLOD, so not doing it here too for now. Apply the + // gradient exponent biases from the word 4 of the fetch constant in + // the future when it's handled in getCompTexLOD somehow. + id_vector_temp_.clear(); + id_vector_temp_.push_back(lod); + spv::Id lod_gradient_scale = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Exp2, id_vector_temp_); + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: { + spv::Id gradient_h_1d, gradient_v_1d; + if (instr.attributes.use_register_gradients) { + id_vector_temp_.clear(); + // First component. + id_vector_temp_.push_back(const_int_0_); + gradient_h_1d = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassFunction, + var_main_tfetch_gradients_h_, + id_vector_temp_), + spv::NoPrecision); + gradient_v_1d = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassFunction, + var_main_tfetch_gradients_v_, + id_vector_temp_), + spv::NoPrecision); + if (instr.attributes.unnormalized_coordinates) { + // Normalize the gradients. + assert_true(size[0] != spv::NoResult); + gradient_h_1d = builder_->createBinOp( + spv::OpFDiv, type_float_, gradient_h_1d, size[0]); + builder_->addDecoration(gradient_h_1d, + spv::DecorationNoContraction); + gradient_v_1d = builder_->createBinOp( + spv::OpFDiv, type_float_, gradient_v_1d, size[0]); + builder_->addDecoration(gradient_v_1d, + spv::DecorationNoContraction); + } + } else { + builder_->addCapability(spv::CapabilityDerivativeControl); + gradient_h_1d = builder_->createUnaryOp( + spv::OpDPdxCoarse, type_float_, coordinates[0]); + gradient_v_1d = builder_->createUnaryOp( + spv::OpDPdyCoarse, type_float_, coordinates[0]); + } + gradient_h_1d = builder_->createBinOp( + spv::OpFMul, type_float_, gradient_h_1d, lod_gradient_scale); + builder_->addDecoration(gradient_h_1d, + spv::DecorationNoContraction); + gradient_v_1d = builder_->createBinOp( + spv::OpFMul, type_float_, gradient_v_1d, lod_gradient_scale); + builder_->addDecoration(gradient_v_1d, + spv::DecorationNoContraction); + // 1D textures are sampled as 2D arrays - need 2-component + // gradients. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(gradient_h_1d); + id_vector_temp_.push_back(const_float_0_); + gradients_h = builder_->createCompositeConstruct(type_float2_, + id_vector_temp_); + id_vector_temp_[0] = gradient_v_1d; + gradients_v = builder_->createCompositeConstruct(type_float2_, + id_vector_temp_); + } break; + case xenos::FetchOpDimension::k2D: { + if (instr.attributes.use_register_gradients) { + for (uint32_t i = 0; i < 2; ++i) { + spv::Id register_gradient_3d = + builder_->createLoad(i ? var_main_tfetch_gradients_h_ + : var_main_tfetch_gradients_v_, + spv::NoPrecision); + spv::Id register_gradient_x = + builder_->createCompositeExtract(register_gradient_3d, + type_float_, 0); + spv::Id register_gradient_y = + builder_->createCompositeExtract(register_gradient_3d, + type_float_, 1); + if (instr.attributes.unnormalized_coordinates) { + // Normalize the gradients. + assert_true(size[0] != spv::NoResult); + register_gradient_x = builder_->createBinOp( + spv::OpFDiv, type_float_, register_gradient_x, size[0]); + builder_->addDecoration(register_gradient_x, + spv::DecorationNoContraction); + assert_true(size[1] != spv::NoResult); + register_gradient_y = builder_->createBinOp( + spv::OpFDiv, type_float_, register_gradient_y, size[1]); + builder_->addDecoration(register_gradient_y, + spv::DecorationNoContraction); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(register_gradient_x); + id_vector_temp_.push_back(register_gradient_y); + (i ? gradients_v : gradients_h) = + builder_->createCompositeConstruct(type_float2_, + id_vector_temp_); + } + } else { + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + spv::Id gradient_coordinate_vector = + builder_->createCompositeConstruct(type_float2_, + id_vector_temp_); + builder_->addCapability(spv::CapabilityDerivativeControl); + gradients_h = + builder_->createUnaryOp(spv::OpDPdxCoarse, type_float2_, + gradient_coordinate_vector); + gradients_v = + builder_->createUnaryOp(spv::OpDPdyCoarse, type_float2_, + gradient_coordinate_vector); + } + gradients_h = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float2_, + gradients_h, lod_gradient_scale); + builder_->addDecoration(gradients_h, + spv::DecorationNoContraction); + gradients_v = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float2_, + gradients_v, lod_gradient_scale); + builder_->addDecoration(gradients_v, + spv::DecorationNoContraction); + } break; + case xenos::FetchOpDimension::k3DOrStacked: { + if (instr.attributes.use_register_gradients) { + gradients_h = builder_->createLoad(var_main_tfetch_gradients_h_, + spv::NoPrecision); + gradients_v = builder_->createLoad(var_main_tfetch_gradients_v_, + spv::NoPrecision); + if (instr.attributes.unnormalized_coordinates) { + // Normalize the gradients. + for (uint32_t i = 0; i < 2; ++i) { + spv::Id& gradient_ref = i ? gradients_v : gradients_h; + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t j = 0; j < 3; ++j) { + assert_true(size[j] != spv::NoResult); + id_vector_temp_.push_back(builder_->createBinOp( + spv::OpFDiv, type_float_, + builder_->createCompositeExtract(gradient_ref, + type_float_, j), + size[j])); + builder_->addDecoration(id_vector_temp_.back(), + spv::DecorationNoContraction); + } + gradient_ref = builder_->createCompositeConstruct( + type_float3_, id_vector_temp_); + } + } + } else { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + spv::Id gradient_coordinate_vector = + builder_->createCompositeConstruct(type_float3_, + id_vector_temp_); + builder_->addCapability(spv::CapabilityDerivativeControl); + gradients_h = + builder_->createUnaryOp(spv::OpDPdxCoarse, type_float3_, + gradient_coordinate_vector); + gradients_v = + builder_->createUnaryOp(spv::OpDPdyCoarse, type_float3_, + gradient_coordinate_vector); + } + gradients_h = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float3_, + gradients_h, lod_gradient_scale); + builder_->addDecoration(gradients_h, + spv::DecorationNoContraction); + gradients_v = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float3_, + gradients_v, lod_gradient_scale); + builder_->addDecoration(gradients_v, + spv::DecorationNoContraction); + } break; + case xenos::FetchOpDimension::kCube: { + if (instr.attributes.use_register_gradients) { + // Register gradients are already in the cube space for cube + // maps. + // TODO(Triang3l): Are cube map register gradients unnormalized + // if the coordinates themselves are unnormalized? + gradients_h = builder_->createLoad(var_main_tfetch_gradients_h_, + spv::NoPrecision); + gradients_v = builder_->createLoad(var_main_tfetch_gradients_v_, + spv::NoPrecision); + } else { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + spv::Id gradient_coordinate_vector = + builder_->createCompositeConstruct(type_float3_, + id_vector_temp_); + builder_->addCapability(spv::CapabilityDerivativeControl); + gradients_h = + builder_->createUnaryOp(spv::OpDPdxCoarse, type_float3_, + gradient_coordinate_vector); + gradients_v = + builder_->createUnaryOp(spv::OpDPdyCoarse, type_float3_, + gradient_coordinate_vector); + } + gradients_h = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float3_, + gradients_h, lod_gradient_scale); + builder_->addDecoration(gradients_h, + spv::DecorationNoContraction); + gradients_v = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float3_, + gradients_v, lod_gradient_scale); + builder_->addDecoration(gradients_v, + spv::DecorationNoContraction); + } break; + } + } + + // Sample the texture. + spv::ImageOperandsMask image_operands_mask = + use_computed_lod ? spv::ImageOperandsGradMask + : spv::ImageOperandsLodMask; + spv::Id sample_result_unsigned, sample_result_signed; + if (!use_computed_lod) { + texture_parameters.lod = lod; + } + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { + // 3D (3 coordinate components, 3 gradient components, single fetch) + // or 2D stacked (2 coordinate components + 1 array layer coordinate + // component, 2 gradient components, two fetches if the Z axis is + // linear-filtered). + + spv::Block& block_dimension_head = *builder_->getBuildPoint(); + spv::Block& block_dimension_3d_start = builder_->makeNewBlock(); + spv::Block& block_dimension_stacked_start = builder_->makeNewBlock(); + spv::Block& block_dimension_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_dimension_merge.getId(), + spv::SelectionControlDontFlattenMask); + assert_true(data_is_3d != spv::NoResult); + builder_->createConditionalBranch(data_is_3d, + &block_dimension_3d_start, + &block_dimension_stacked_start); + + // 3D. + builder_->setBuildPoint(&block_dimension_3d_start); + if (use_computed_lod) { + texture_parameters.gradX = gradients_h; + texture_parameters.gradY = gradients_v; + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = + builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + spv::Id sample_result_unsigned_3d, sample_result_signed_3d; + SampleTexture(texture_parameters, image_operands_mask, + image_3d_unsigned, image_3d_signed, sampler, + is_all_signed, is_any_signed, sample_result_unsigned_3d, + sample_result_signed_3d); + // Get the actual build point after the SampleTexture call for phi. + spv::Block& block_dimension_3d_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_dimension_merge); + + // 2D stacked. + builder_->setBuildPoint(&block_dimension_stacked_start); + if (use_computed_lod) { + // Extract 2D gradients for stacked textures which are 2D arrays. + { + std::unique_ptr shuffle_op = + std::make_unique(builder_->getUniqueId(), + type_float2_, + spv::OpVectorShuffle); + shuffle_op->addIdOperand(gradients_h); + shuffle_op->addIdOperand(gradients_h); + shuffle_op->addImmediateOperand(0); + shuffle_op->addImmediateOperand(1); + texture_parameters.gradX = shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } + { + std::unique_ptr shuffle_op = + std::make_unique(builder_->getUniqueId(), + type_float2_, + spv::OpVectorShuffle); + shuffle_op->addIdOperand(gradients_v); + shuffle_op->addIdOperand(gradients_v); + shuffle_op->addImmediateOperand(0); + shuffle_op->addImmediateOperand(1); + texture_parameters.gradY = shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } + } + // Check if linear filtering is needed. + bool vol_mag_filter_is_fetch_const = + instr.attributes.vol_mag_filter == + xenos::TextureFilter::kUseFetchConst; + bool vol_min_filter_is_fetch_const = + instr.attributes.vol_min_filter == + xenos::TextureFilter::kUseFetchConst; + bool vol_mag_filter_is_linear = + instr.attributes.vol_mag_filter == xenos::TextureFilter::kLinear; + bool vol_min_filter_is_linear = + instr.attributes.vol_min_filter == xenos::TextureFilter::kLinear; + spv::Id vol_filter_is_linear = spv::NoResult; + if (use_computed_lod && + (vol_mag_filter_is_fetch_const || vol_min_filter_is_fetch_const || + vol_mag_filter_is_linear != vol_min_filter_is_linear)) { + // Check if minifying along layers (derivative > 1 along any axis). + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp_.push_back(builder_->createCompositeExtract( + i ? gradients_v : gradients_h, type_float_, 2)); + } + spv::Id layer_max_gradient = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NMax, id_vector_temp_); + if (!instr.attributes.unnormalized_coordinates) { + // Denormalize the gradient if provided as normalized. + assert_true(size[2] != spv::NoResult); + layer_max_gradient = builder_->createBinOp( + spv::OpFMul, type_float_, layer_max_gradient, size[2]); + builder_->addDecoration(layer_max_gradient, + spv::DecorationNoContraction); + } + // For NaN, considering that magnification is being done. + spv::Id is_minifying_z = builder_->createBinOp( + spv::OpFOrdLessThan, type_bool_, layer_max_gradient, + builder_->makeFloatConstant(1.0f)); + // Choose what filter is actually used, the minification or the + // magnification one. + spv::Id vol_mag_filter_is_linear_loaded = + vol_mag_filter_is_fetch_const + ? builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + fetch_constant_word_4, + builder_->makeUintConstant(UINT32_C(1) << 0)), + const_uint_0_) + : builder_->makeBoolConstant(vol_mag_filter_is_linear); + spv::Id vol_min_filter_is_linear_loaded = + vol_min_filter_is_fetch_const + ? builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + fetch_constant_word_4, + builder_->makeUintConstant(UINT32_C(1) << 1)), + const_uint_0_) + : builder_->makeBoolConstant(vol_min_filter_is_linear); + vol_filter_is_linear = + builder_->createTriOp(spv::OpSelect, type_bool_, is_minifying_z, + vol_min_filter_is_linear_loaded, + vol_mag_filter_is_linear_loaded); + } else { + // No gradients, or using the same filter overrides for magnifying + // and minifying. Assume always magnifying if no gradients (LOD 0, + // always <= 0). LOD is within 2D layers, not between them (unlike + // in 3D textures, which have mips with depth reduced), so it + // shouldn't have effect on filtering between layers. + if (vol_mag_filter_is_fetch_const) { + vol_filter_is_linear = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, fetch_constant_word_4, + builder_->makeUintConstant(UINT32_C(1) << 0)), + const_uint_0_); + } + } + spv::Id layer_coordinate = coordinates[2]; + // Linear filtering may be needed either based on a dynamic condition + // (the filtering mode is taken from the fetch constant, or it's + // different for magnification and minification), or on a static one + // (with gradients - specified in the instruction for both + // magnification and minification as linear, without gradients - + // specified for magnification as linear). + // If the filter is linear, subtract 0.5 from the Z coordinate of the + // first layer in filtering because 0.5 is in the middle of it. + if (vol_filter_is_linear != spv::NoResult) { + spv::Id layer_coordinate_linear = builder_->createBinOp( + spv::OpFSub, type_float_, layer_coordinate, + builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(layer_coordinate_linear, + spv::DecorationNoContraction); + layer_coordinate = builder_->createTriOp( + spv::OpSelect, type_float_, vol_filter_is_linear, + layer_coordinate_linear, layer_coordinate); + } else if (vol_mag_filter_is_linear) { + layer_coordinate = builder_->createBinOp( + spv::OpFSub, type_float_, layer_coordinate, + builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(layer_coordinate, + spv::DecorationNoContraction); + } + // Sample the first layer, needed regardless of whether filtering is + // needed. + // Floor the array layer (Vulkan does rounding to nearest or + 0.5 and + // floor even for the layer index, but on the Xenos, addressing is + // similar to that of 3D textures). This is needed for both point and + // linear filtering (with linear, 0.5 was subtracted previously). + id_vector_temp_.clear(); + id_vector_temp_.push_back(layer_coordinate); + spv::Id layer_0_coordinate = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(coordinates[0]); + id_vector_temp_.push_back(coordinates[1]); + id_vector_temp_.push_back(layer_0_coordinate); + texture_parameters.coords = + builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + spv::Id sample_result_unsigned_stacked, sample_result_signed_stacked; + SampleTexture(texture_parameters, image_operands_mask, + image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, sampler, is_all_signed, + is_any_signed, sample_result_unsigned_stacked, + sample_result_signed_stacked); + // Sample the second layer if linear filtering is potentially needed + // (conditionally or unconditionally, depending on whether the filter + // needs to be chosen at runtime), and filter. + if (vol_filter_is_linear != spv::NoResult || + vol_mag_filter_is_linear) { + spv::Block& block_z_head = *builder_->getBuildPoint(); + spv::Block& block_z_linear = (vol_filter_is_linear != spv::NoResult) + ? builder_->makeNewBlock() + : block_z_head; + spv::Block& block_z_merge = (vol_filter_is_linear != spv::NoResult) + ? builder_->makeNewBlock() + : block_z_head; + if (vol_filter_is_linear != spv::NoResult) { + SpirvCreateSelectionMerge(block_z_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch( + vol_filter_is_linear, &block_z_linear, &block_z_merge); + builder_->setBuildPoint(&block_z_linear); + } + spv::Id layer_1_coordinate = builder_->createBinOp( + spv::OpFAdd, type_float_, layer_0_coordinate, + builder_->makeFloatConstant(1.0f)); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(coordinates[0]); + id_vector_temp_.push_back(coordinates[1]); + id_vector_temp_.push_back(layer_1_coordinate); + texture_parameters.coords = builder_->createCompositeConstruct( + type_float3_, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back(layer_coordinate); + spv::Id layer_lerp_factor = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Fract, id_vector_temp_); + spv::Id sample_result_unsigned_stacked_filtered; + spv::Id sample_result_signed_stacked_filtered; + SampleTexture( + texture_parameters, image_operands_mask, + image_2d_array_or_cube_unsigned, image_2d_array_or_cube_signed, + sampler, is_all_signed, is_any_signed, + sample_result_unsigned_stacked_filtered, + sample_result_signed_stacked_filtered, layer_lerp_factor, + sample_result_unsigned_stacked, sample_result_signed_stacked); + if (vol_filter_is_linear != spv::NoResult) { + // Get the actual build point after the SampleTexture call for + // phi. + spv::Block& block_z_linear_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_z_merge); + builder_->setBuildPoint(&block_z_merge); + { + std::unique_ptr filter_phi_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpPhi); + filter_phi_op->addIdOperand( + sample_result_unsigned_stacked_filtered); + filter_phi_op->addIdOperand(block_z_linear_end.getId()); + filter_phi_op->addIdOperand(sample_result_unsigned_stacked); + filter_phi_op->addIdOperand(block_z_head.getId()); + sample_result_unsigned_stacked = filter_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(filter_phi_op)); + } + { + std::unique_ptr filter_phi_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpPhi); + filter_phi_op->addIdOperand( + sample_result_signed_stacked_filtered); + filter_phi_op->addIdOperand(block_z_linear_end.getId()); + filter_phi_op->addIdOperand(sample_result_signed_stacked); + filter_phi_op->addIdOperand(block_z_head.getId()); + sample_result_signed_stacked = filter_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(filter_phi_op)); + } + } else { + sample_result_unsigned_stacked = + sample_result_unsigned_stacked_filtered; + sample_result_signed_stacked = + sample_result_signed_stacked_filtered; + } + } + // Get the actual build point for phi. + spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_dimension_merge); + + // Choose between the 3D and the stacked result based on the actual + // data dimensionality. + builder_->setBuildPoint(&block_dimension_merge); + { + std::unique_ptr dimension_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float4_, spv::OpPhi); + dimension_phi_op->addIdOperand(sample_result_unsigned_3d); + dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); + dimension_phi_op->addIdOperand(sample_result_unsigned_stacked); + dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); + sample_result_unsigned = dimension_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(dimension_phi_op)); + } + { + std::unique_ptr dimension_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float4_, spv::OpPhi); + dimension_phi_op->addIdOperand(sample_result_signed_3d); + dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); + dimension_phi_op->addIdOperand(sample_result_signed_stacked); + dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); + sample_result_signed = dimension_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(dimension_phi_op)); + } + } else { + if (use_computed_lod) { + texture_parameters.gradX = gradients_h; + texture_parameters.gradY = gradients_v; + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = + builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + SampleTexture(texture_parameters, image_operands_mask, + image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, sampler, is_all_signed, + is_any_signed, sample_result_unsigned, + sample_result_signed); + } + + // Swizzle the result components manually if needed, to `result`. + // Because the same host format component may be replicated into + // multiple guest components (such as for formats with less than 4 + // components), yet the signedness is per-guest-component, it's not + // possible to apply the signedness to host components before swizzling, + // so doing it during (for unsigned vs. signed) and after (for biased + // and gamma) swizzling. + if (!features_.image_view_format_swizzle) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantTextureSwizzles)); + id_vector_temp_.push_back( + builder_->makeIntConstant(fetch_constant_index >> 3)); + id_vector_temp_.push_back( + builder_->makeIntConstant((fetch_constant_index >> 1) & 3)); + // All 32 bits containing the values (24 bits) for 2 fetch constants. + spv::Id swizzle_word = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, + id_vector_temp_), + spv::NoPrecision); + uint32_t swizzle_word_offset = 3 * 4 * (fetch_constant_index & 1); + spv::Id const_float_1 = builder_->makeFloatConstant(1.0f); + uint32_t result_remaining_components = used_result_nonzero_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_remaining_components, + &result_component_index)) { + result_remaining_components &= + ~(UINT32_C(1) << result_component_index); + uint32_t swizzle_bit_0_value = + UINT32_C(1) + << (swizzle_word_offset + 3 * result_component_index); + spv::Id swizzle_bit_0 = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, swizzle_word, + builder_->makeUintConstant(swizzle_bit_0_value)), + const_uint_0_); + // Bit 2 - X/Y/Z/W or 0/1. + spv::Id swizzle_bit_2 = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, swizzle_word, + builder_->makeUintConstant(swizzle_bit_0_value << 2)), + const_uint_0_); + spv::Block& block_swizzle_head = *builder_->getBuildPoint(); + spv::Block& block_swizzle_constant = builder_->makeNewBlock(); + spv::Block& block_swizzle_component = builder_->makeNewBlock(); + spv::Block& block_swizzle_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_swizzle_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(swizzle_bit_2, + &block_swizzle_constant, + &block_swizzle_component); + // Constant values. + builder_->setBuildPoint(&block_swizzle_constant); + // Bit 0 - 0 or 1. + spv::Id swizzle_result_constant = + builder_->createTriOp(spv::OpSelect, type_float_, swizzle_bit_0, + const_float_1, const_float_0_); + builder_->createBranch(&block_swizzle_merge); + // Fetched components. + spv::Id swizzle_result_component; + { + builder_->setBuildPoint(&block_swizzle_component); + // Select whether the result is signed or unsigned (or biased or + // gamma-corrected) based on the post-swizzle signedness. + spv::Id swizzle_sample_result = builder_->createTriOp( + spv::OpSelect, type_float4_, + builder_->smearScalar( + spv::NoPrecision, + result_is_signed[result_component_index], type_bool4_), + sample_result_signed, sample_result_unsigned); + // Bit 0 - X or Y, Z or W, 0 or 1. + spv::Id swizzle_x_or_y = builder_->createTriOp( + spv::OpSelect, type_float_, swizzle_bit_0, + builder_->createCompositeExtract(swizzle_sample_result, + type_float_, 1), + builder_->createCompositeExtract(swizzle_sample_result, + type_float_, 0)); + spv::Id swizzle_z_or_w = builder_->createTriOp( + spv::OpSelect, type_float_, swizzle_bit_0, + builder_->createCompositeExtract(swizzle_sample_result, + type_float_, 3), + builder_->createCompositeExtract(swizzle_sample_result, + type_float_, 2)); + // Bit 1 - X/Y or Z/W. + spv::Id swizzle_bit_1 = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, swizzle_word, + builder_->makeUintConstant(swizzle_bit_0_value << 1)), + const_uint_0_); + swizzle_result_component = builder_->createTriOp( + spv::OpSelect, type_float_, swizzle_bit_1, swizzle_z_or_w, + swizzle_x_or_y); + builder_->createBranch(&block_swizzle_merge); + } + // Select between the constants and the fetched components. + builder_->setBuildPoint(&block_swizzle_merge); + { + std::unique_ptr swizzle_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + swizzle_phi_op->addIdOperand(swizzle_result_constant); + swizzle_phi_op->addIdOperand(block_swizzle_constant.getId()); + swizzle_phi_op->addIdOperand(swizzle_result_component); + swizzle_phi_op->addIdOperand(block_swizzle_component.getId()); + result[result_component_index] = swizzle_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(swizzle_phi_op)); + } + } + } + + // Apply the signednesses to all the needed components. If swizzling is + // done in the shader rather than via the image view, unsigned or signed + // source has already been selected into `result` - only need to bias or + // to gamma-correct. + spv::Id const_float_2 = builder_->makeFloatConstant(2.0f); + spv::Id const_float_minus_1 = builder_->makeFloatConstant(-1.0f); + { + uint32_t result_remaining_components = used_result_nonzero_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_remaining_components, + &result_component_index)) { + result_remaining_components &= + ~(UINT32_C(1) << result_component_index); + spv::Id sample_result_component_unsigned = + features_.image_view_format_swizzle + ? builder_->createCompositeExtract(sample_result_unsigned, + type_float_, + result_component_index) + : result[result_component_index]; + spv::Block& block_sign_head = *builder_->getBuildPoint(); + spv::Block* block_sign_signed = features_.image_view_format_swizzle + ? &builder_->makeNewBlock() + : nullptr; + spv::Block& block_sign_unsigned_biased = builder_->makeNewBlock(); + spv::Block& block_sign_gamma_start = builder_->makeNewBlock(); + spv::Block& block_sign_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_sign_merge.getId(), + spv::SelectionControlDontFlattenMask); + { + std::unique_ptr sign_switch_op = + std::make_unique(spv::OpSwitch); + sign_switch_op->addIdOperand( + swizzled_signs[result_component_index]); + // Make unsigned (do nothing, take the unsigned component in the + // phi) the default, and also, if unsigned or signed has already + // been selected in swizzling, make signed the default to since + // it, just like unsigned, doesn't need any transformations. + sign_switch_op->addIdOperand(block_sign_merge.getId()); + if (block_sign_signed) { + sign_switch_op->addImmediateOperand( + uint32_t(xenos::TextureSign::kSigned)); + sign_switch_op->addIdOperand(block_sign_signed->getId()); + } + sign_switch_op->addImmediateOperand( + uint32_t(xenos::TextureSign::kUnsignedBiased)); + sign_switch_op->addIdOperand(block_sign_unsigned_biased.getId()); + sign_switch_op->addImmediateOperand( + uint32_t(xenos::TextureSign::kGamma)); + sign_switch_op->addIdOperand(block_sign_gamma_start.getId()); + builder_->getBuildPoint()->addInstruction( + std::move(sign_switch_op)); + } + if (block_sign_signed) { + block_sign_signed->addPredecessor(&block_sign_head); + } + block_sign_unsigned_biased.addPredecessor(&block_sign_head); + block_sign_gamma_start.addPredecessor(&block_sign_head); + block_sign_merge.addPredecessor(&block_sign_head); + // Signed. + spv::Id sample_result_component_signed = + sample_result_component_unsigned; + if (block_sign_signed) { + builder_->setBuildPoint(block_sign_signed); + sample_result_component_signed = builder_->createCompositeExtract( + sample_result_signed, type_float_, result_component_index); + builder_->createBranch(&block_sign_merge); + } + // Unsigned biased. + builder_->setBuildPoint(&block_sign_unsigned_biased); + spv::Id sample_result_component_unsigned_biased = + builder_->createBinOp(spv::OpFMul, type_float_, + sample_result_component_unsigned, + const_float_2); + builder_->addDecoration(sample_result_component_unsigned_biased, + spv::DecorationNoContraction); + sample_result_component_unsigned_biased = builder_->createBinOp( + spv::OpFAdd, type_float_, + sample_result_component_unsigned_biased, const_float_minus_1); + builder_->addDecoration(sample_result_component_unsigned_biased, + spv::DecorationNoContraction); + builder_->createBranch(&block_sign_merge); + // Gamma. + builder_->setBuildPoint(&block_sign_gamma_start); + // TODO(Triang3l): Gamma resolve target as sRGB sampling. + spv::Id sample_result_component_gamma = + PWLGammaToLinear(sample_result_component_unsigned, false); + // Get the current build point for the phi operation not to assume + // that it will be the same as before PWLGammaToLinear. + spv::Block& block_sign_gamma_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_sign_merge); + // Merge. + builder_->setBuildPoint(&block_sign_merge); + { + std::unique_ptr sign_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + if (block_sign_signed) { + sign_phi_op->addIdOperand(sample_result_component_signed); + sign_phi_op->addIdOperand(block_sign_signed->getId()); + } + sign_phi_op->addIdOperand( + sample_result_component_unsigned_biased); + sign_phi_op->addIdOperand(block_sign_unsigned_biased.getId()); + sign_phi_op->addIdOperand(sample_result_component_gamma); + sign_phi_op->addIdOperand(block_sign_gamma_end.getId()); + sign_phi_op->addIdOperand(sample_result_component_unsigned); + sign_phi_op->addIdOperand(block_sign_head.getId()); + result[result_component_index] = sign_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(sign_phi_op)); + } + } + } + + // Apply the exponent bias from the bits 13:18 of the fetch constant + // word 4. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(builder_->makeFloatConstant(1.0f)); + id_vector_temp_.push_back(builder_->createTriOp( + spv::OpBitFieldSExtract, type_int_, fetch_constant_word_4_signed, + builder_->makeUintConstant(13), builder_->makeUintConstant(6))); + spv::Id result_exponent_bias = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Ldexp, id_vector_temp_); + { + uint32_t result_remaining_components = used_result_nonzero_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_remaining_components, + &result_component_index)) { + result_remaining_components &= + ~(UINT32_C(1) << result_component_index); + spv::Id& result_component_ref = result[result_component_index]; + result_component_ref = builder_->createBinOp( + spv::OpFMul, type_float_, result_component_ref, + result_exponent_bias); + builder_->addDecoration(result_component_ref, + spv::DecorationNoContraction); + } + } + } + } + } + + // Store the needed components of the result. + spv::Id result_vector; + if (used_result_component_count > 1) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_result_component_count); + uint32_t result_components_remaining = used_result_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_components_remaining, + &result_component_index)) { + result_components_remaining &= ~(UINT32_C(1) << result_component_index); + id_vector_temp_.push_back(result[result_component_index]); + } + result_vector = builder_->createCompositeConstruct( + type_float_vectors_[used_result_component_count - 1], id_vector_temp_); + } else { + uint32_t result_component_index; + xe::bit_scan_forward(used_result_components, &result_component_index); + result_vector = result[result_component_index]; + } + StoreResult(instr.result, result_vector); +} + +size_t SpirvShaderTranslator::FindOrAddTextureBinding( + uint32_t fetch_constant, xenos::FetchOpDimension dimension, + bool is_signed) { + // 1D and 2D textures (including stacked ones) are treated as 2D arrays for + // binding and coordinate simplicity. + if (dimension == xenos::FetchOpDimension::k1D) { + dimension = xenos::FetchOpDimension::k2D; + } + for (size_t i = 0; i < texture_bindings_.size(); ++i) { + const TextureBinding& texture_binding = texture_bindings_[i]; + if (texture_binding.fetch_constant == fetch_constant && + texture_binding.dimension == dimension && + texture_binding.is_signed == is_signed) { + return i; + } + } + // TODO(Triang3l): Limit the total count to that actually supported by the + // implementation. + size_t new_texture_binding_index = texture_bindings_.size(); + TextureBinding& new_texture_binding = texture_bindings_.emplace_back(); + new_texture_binding.fetch_constant = fetch_constant; + new_texture_binding.dimension = dimension; + new_texture_binding.is_signed = is_signed; + spv::Dim type_dimension; + bool is_array; + const char* dimension_name; + switch (dimension) { + case xenos::FetchOpDimension::k3DOrStacked: + type_dimension = spv::Dim3D; + is_array = false; + dimension_name = "3d"; + break; + case xenos::FetchOpDimension::kCube: + type_dimension = spv::DimCube; + is_array = false; + dimension_name = "cube"; + break; + default: + type_dimension = spv::Dim2D; + is_array = true; + dimension_name = "2d"; + } + new_texture_binding.variable = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder_->makeImageType(type_float_, type_dimension, false, is_array, + false, 1, spv::ImageFormatUnknown), + fmt::format("xe_texture{}_{}_{}", fetch_constant, dimension_name, + is_signed ? 's' : 'u') + .c_str()); + builder_->addDecoration( + new_texture_binding.variable, spv::DecorationDescriptorSet, + int(is_vertex_shader() ? kDescriptorSetTexturesVertex + : kDescriptorSetTexturesPixel)); + builder_->addDecoration(new_texture_binding.variable, spv::DecorationBinding, + int(new_texture_binding_index)); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(new_texture_binding.variable); + } + return new_texture_binding_index; +} + +size_t SpirvShaderTranslator::FindOrAddSamplerBinding( + uint32_t fetch_constant, xenos::TextureFilter mag_filter, + xenos::TextureFilter min_filter, xenos::TextureFilter mip_filter, + xenos::AnisoFilter aniso_filter) { + if (aniso_filter != xenos::AnisoFilter::kUseFetchConst) { + // TODO(Triang3l): Limit to what's actually supported by the implementation. + aniso_filter = std::min(aniso_filter, xenos::AnisoFilter::kMax_16_1); + } + for (size_t i = 0; i < sampler_bindings_.size(); ++i) { + const SamplerBinding& sampler_binding = sampler_bindings_[i]; + if (sampler_binding.fetch_constant == fetch_constant && + sampler_binding.mag_filter == mag_filter && + sampler_binding.min_filter == min_filter && + sampler_binding.mip_filter == mip_filter && + sampler_binding.aniso_filter == aniso_filter) { + return i; + } + } + // TODO(Triang3l): Limit the total count to that actually supported by the + // implementation. + size_t new_sampler_binding_index = sampler_bindings_.size(); + SamplerBinding& new_sampler_binding = sampler_bindings_.emplace_back(); + new_sampler_binding.fetch_constant = fetch_constant; + new_sampler_binding.mag_filter = mag_filter; + new_sampler_binding.min_filter = min_filter; + new_sampler_binding.mip_filter = mip_filter; + new_sampler_binding.aniso_filter = aniso_filter; + std::ostringstream name; + static const char kFilterSuffixes[] = {'p', 'l', 'b', 'f'}; + name << "xe_sampler" << fetch_constant << '_' + << kFilterSuffixes[uint32_t(mag_filter)] + << kFilterSuffixes[uint32_t(min_filter)] + << kFilterSuffixes[uint32_t(mip_filter)]; + if (aniso_filter != xenos::AnisoFilter::kUseFetchConst) { + if (aniso_filter == xenos::AnisoFilter::kDisabled) { + name << "_a0"; + } else { + name << "_a" << (UINT32_C(1) << (uint32_t(aniso_filter) - 1)); + } + } + new_sampler_binding.variable = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder_->makeSamplerType(), name.str().c_str()); + builder_->addDecoration( + new_sampler_binding.variable, spv::DecorationDescriptorSet, + int(is_vertex_shader() ? kDescriptorSetSamplersVertex + : kDescriptorSetSamplersPixel)); + builder_->addDecoration(new_sampler_binding.variable, spv::DecorationBinding, + int(new_sampler_binding_index)); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(new_sampler_binding.variable); + } + return new_sampler_binding_index; +} + +void SpirvShaderTranslator::SampleTexture( + spv::Builder::TextureParameters& texture_parameters, + spv::ImageOperandsMask image_operands_mask, spv::Id image_unsigned, + spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed, + spv::Id is_any_signed, spv::Id& result_unsigned_out, + spv::Id& result_signed_out, spv::Id lerp_factor, + spv::Id lerp_first_unsigned, spv::Id lerp_first_signed) { + for (uint32_t i = 0; i < 2; ++i) { + spv::Block& block_sign_head = *builder_->getBuildPoint(); + spv::Block& block_sign = builder_->makeNewBlock(); + spv::Block& block_sign_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_sign_merge.getId(), + spv::SelectionControlDontFlattenMask); + // Unsigned (i == 0) - if there are any non-signed components. + // Signed (i == 1) - if there are any signed components. + builder_->createConditionalBranch(i ? is_any_signed : is_all_signed, + i ? &block_sign : &block_sign_merge, + i ? &block_sign_merge : &block_sign); + builder_->setBuildPoint(&block_sign); + spv::Id image = i ? image_signed : image_unsigned; + // OpSampledImage must be in the same block as where its result is used. + texture_parameters.sampler = builder_->createBinOp( + spv::OpSampledImage, + builder_->makeSampledImageType(builder_->getTypeId(image)), image, + sampler); + spv::Id result = builder_->createTextureCall( + spv::NoPrecision, type_float4_, false, false, false, false, false, + texture_parameters, image_operands_mask); + if (lerp_factor != spv::NoResult) { + spv::Id lerp_first = i ? lerp_first_signed : lerp_first_unsigned; + if (lerp_first != spv::NoResult) { + spv::Id lerp_difference = builder_->createBinOp( + spv::OpFSub, type_float4_, result, lerp_first); + builder_->addDecoration(lerp_difference, spv::DecorationNoContraction); + lerp_difference = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float4_, + lerp_difference, lerp_factor); + builder_->addDecoration(lerp_difference, spv::DecorationNoContraction); + result = builder_->createBinOp(spv::OpFAdd, type_float4_, result, + lerp_difference); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + } + builder_->createBranch(&block_sign_merge); + builder_->setBuildPoint(&block_sign_merge); + { + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), + type_float4_, spv::OpPhi); + phi_op->addIdOperand(result); + phi_op->addIdOperand(block_sign.getId()); + phi_op->addIdOperand(const_float4_0_); + phi_op->addIdOperand(block_sign_head.getId()); + // This may overwrite the first lerp endpoint for the sign (such usage of + // this function is allowed). + (i ? result_signed_out : result_unsigned_out) = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + } +} + +spv::Id SpirvShaderTranslator::QueryTextureLod( + spv::Builder::TextureParameters& texture_parameters, spv::Id image_unsigned, + spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed) { + // OpSampledImage must be in the same block as where its result is used. + spv::Block& block_sign_head = *builder_->getBuildPoint(); + spv::Block& block_sign_signed = builder_->makeNewBlock(); + spv::Block& block_sign_unsigned = builder_->makeNewBlock(); + spv::Block& block_sign_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_sign_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(is_all_signed, &block_sign_signed, + &block_sign_unsigned); + builder_->setBuildPoint(&block_sign_signed); + texture_parameters.sampler = builder_->createBinOp( + spv::OpSampledImage, + builder_->makeSampledImageType(builder_->getTypeId(image_signed)), + image_signed, sampler); + spv::Id lod_signed = builder_->createCompositeExtract( + builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters, + false), + type_float_, 1); + builder_->createBranch(&block_sign_merge); + builder_->setBuildPoint(&block_sign_unsigned); + texture_parameters.sampler = builder_->createBinOp( + spv::OpSampledImage, + builder_->makeSampledImageType(builder_->getTypeId(image_unsigned)), + image_unsigned, sampler); + spv::Id lod_unsigned = builder_->createCompositeExtract( + builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters, + false), + type_float_, 1); + builder_->createBranch(&block_sign_merge); + builder_->setBuildPoint(&block_sign_merge); + spv::Id result; + { + std::unique_ptr sign_phi_op = + std::make_unique(builder_->getUniqueId(), type_float_, + spv::OpPhi); + sign_phi_op->addIdOperand(lod_signed); + sign_phi_op->addIdOperand(block_sign_signed.getId()); + sign_phi_op->addIdOperand(lod_unsigned); + sign_phi_op->addIdOperand(block_sign_unsigned.getId()); + result = sign_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(sign_phi_op)); + } + return result; +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc new file mode 100644 index 000000000..c594a902f --- /dev/null +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -0,0 +1,648 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader_translator.h" + +#include +#include +#include + +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/assert.h" +#include "xenia/base/math.h" + +namespace xe { +namespace gpu { + +spv::Id SpirvShaderTranslator::PreClampedFloat32To7e3( + spv::Builder& builder, spv::Id f32_scalar, spv::Id ext_inst_glsl_std_450) { + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + // Assuming the value is already clamped to [0, 31.875]. + + spv::Id type_uint = builder.makeUintType(32); + + // Need the source as uint for bit operations. + { + spv::Id source_type = builder.getTypeId(f32_scalar); + assert_true(builder.isScalarType(source_type)); + if (!builder.isUintType(source_type)) { + f32_scalar = builder.createUnaryOp(spv::OpBitcast, type_uint, f32_scalar); + } + } + + // The denormal 7e3 case. + // denormal_biased_f32 = (f32 & 0x7FFFFF) | 0x800000 + spv::Id denormal_biased_f32; + { + spv::Instruction* denormal_insert_instruction = new spv::Instruction( + builder.getUniqueId(), type_uint, spv::OpBitFieldInsert); + denormal_insert_instruction->addIdOperand(f32_scalar); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(1)); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(23)); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(9)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_insert_instruction)); + denormal_biased_f32 = denormal_insert_instruction->getResultId(); + } + // denormal_biased_f32_shift_amount = min(125 - (f32 >> 23), 24) + // Not allowing the shift to overflow as that's undefined in SPIR-V. + spv::Id denormal_biased_f32_shift_amount; + { + spv::Instruction* denormal_shift_amount_instruction = + new spv::Instruction(builder.getUniqueId(), type_uint, spv::OpExtInst); + denormal_shift_amount_instruction->addIdOperand(ext_inst_glsl_std_450); + denormal_shift_amount_instruction->addImmediateOperand(GLSLstd450UMin); + denormal_shift_amount_instruction->addIdOperand(builder.createBinOp( + spv::OpISub, type_uint, builder.makeUintConstant(125), + builder.createBinOp(spv::OpShiftRightLogical, type_uint, f32_scalar, + builder.makeUintConstant(23)))); + denormal_shift_amount_instruction->addIdOperand( + builder.makeUintConstant(24)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_shift_amount_instruction)); + denormal_biased_f32_shift_amount = + denormal_shift_amount_instruction->getResultId(); + } + // denormal_biased_f32 = + // ((f32 & 0x7FFFFF) | 0x800000) >> min(125 - (f32 >> 23), 24) + denormal_biased_f32 = builder.createBinOp(spv::OpShiftRightLogical, type_uint, + denormal_biased_f32, + denormal_biased_f32_shift_amount); + + // The normal 7e3 case. + // Bias the exponent. + // normal_biased_f32 = f32 - (124 << 23) + spv::Id normal_biased_f32 = + builder.createBinOp(spv::OpISub, type_uint, f32_scalar, + builder.makeUintConstant(UINT32_C(124) << 23)); + + // Select the needed conversion depending on whether the number is too small + // to be represented as normalized 7e3. + spv::Id biased_f32 = builder.createTriOp( + spv::OpSelect, type_uint, + builder.createBinOp(spv::OpULessThan, builder.makeBoolType(), f32_scalar, + builder.makeUintConstant(0x3E800000)), + denormal_biased_f32, normal_biased_f32); + + // Build the 7e3 number rounding to the nearest even. + // ((biased_f32 + 0x7FFF + ((biased_f32 >> 16) & 1)) >> 16) & 0x3FF + return builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIAdd, type_uint, biased_f32, + builder.makeUintConstant(0x7FFF)), + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, + builder.makeUintConstant(16), + builder.makeUintConstant(1))), + builder.makeUintConstant(16), builder.makeUintConstant(10)); +} + +spv::Id SpirvShaderTranslator::UnclampedFloat32To7e3( + spv::Builder& builder, spv::Id f32_scalar, spv::Id ext_inst_glsl_std_450) { + spv::Id type_float = builder.makeFloatType(32); + + // Need the source as float for clamping. + { + spv::Id source_type = builder.getTypeId(f32_scalar); + assert_true(builder.isScalarType(source_type)); + if (!builder.isFloatType(source_type)) { + f32_scalar = + builder.createUnaryOp(spv::OpBitcast, type_float, f32_scalar); + } + } + + { + spv::Instruction* clamp_instruction = + new spv::Instruction(builder.getUniqueId(), type_float, spv::OpExtInst); + clamp_instruction->addIdOperand(ext_inst_glsl_std_450); + clamp_instruction->addImmediateOperand(GLSLstd450NClamp); + clamp_instruction->addIdOperand(f32_scalar); + clamp_instruction->addIdOperand(builder.makeFloatConstant(0.0f)); + clamp_instruction->addIdOperand(builder.makeFloatConstant(31.875f)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(clamp_instruction)); + f32_scalar = clamp_instruction->getResultId(); + } + + return PreClampedFloat32To7e3(builder, f32_scalar, ext_inst_glsl_std_450); +} + +spv::Id SpirvShaderTranslator::Float7e3To32(spv::Builder& builder, + spv::Id f10_uint_scalar, + uint32_t f10_shift, + bool result_as_uint, + spv::Id ext_inst_glsl_std_450) { + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + + assert_true(builder.isUintType(builder.getTypeId(f10_uint_scalar))); + assert_true(f10_shift <= (32 - 10)); + + spv::Id type_bool = builder.makeBoolType(); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_uint = builder.makeUintType(32); + + spv::Id f10_unbiased_exponent = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, f10_uint_scalar, + builder.makeUintConstant(f10_shift + 7), builder.makeUintConstant(3)); + spv::Id f10_mantissa = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, f10_uint_scalar, + builder.makeUintConstant(f10_shift), builder.makeUintConstant(7)); + + // The denormal nonzero 7e3 case. + // denormal_mantissa_msb = findMSB(f10_mantissa) + spv::Id denormal_mantissa_msb; + { + spv::Instruction* denormal_mantissa_msb_instruction = + new spv::Instruction(builder.getUniqueId(), type_int, spv::OpExtInst); + denormal_mantissa_msb_instruction->addIdOperand(ext_inst_glsl_std_450); + denormal_mantissa_msb_instruction->addImmediateOperand(GLSLstd450FindUMsb); + denormal_mantissa_msb_instruction->addIdOperand(f10_mantissa); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_mantissa_msb_instruction)); + denormal_mantissa_msb = denormal_mantissa_msb_instruction->getResultId(); + } + denormal_mantissa_msb = + builder.createUnaryOp(spv::OpBitcast, type_uint, denormal_mantissa_msb); + // denormal_f32_unbiased_exponent = 1 - (7 - findMSB(f10_mantissa)) + // Or: + // denormal_f32_unbiased_exponent = findMSB(f10_mantissa) - 6 + spv::Id denormal_f32_unbiased_exponent = + builder.createBinOp(spv::OpISub, type_uint, denormal_mantissa_msb, + builder.makeUintConstant(6)); + // Normalize the mantissa. + // denormal_f32_mantissa = f10_mantissa << (7 - findMSB(f10_mantissa)) + spv::Id denormal_f32_mantissa = builder.createBinOp( + spv::OpShiftLeftLogical, type_uint, f10_mantissa, + builder.createBinOp(spv::OpISub, type_uint, builder.makeUintConstant(7), + denormal_mantissa_msb)); + // If the 7e3 number is zero, make sure the float32 number is zero too. + spv::Id f10_mantissa_is_nonzero = builder.createBinOp( + spv::OpINotEqual, type_bool, f10_mantissa, builder.makeUintConstant(0)); + // Set the unbiased exponent to -124 for zero - 124 will be added later, + // resulting in zero float32. + denormal_f32_unbiased_exponent = builder.createTriOp( + spv::OpSelect, type_uint, f10_mantissa_is_nonzero, + denormal_f32_unbiased_exponent, builder.makeUintConstant(uint32_t(-124))); + denormal_f32_mantissa = + builder.createTriOp(spv::OpSelect, type_uint, f10_mantissa_is_nonzero, + denormal_f32_mantissa, builder.makeUintConstant(0)); + + // Select the needed conversion depending on whether the number is normal. + spv::Id f10_is_normal = + builder.createBinOp(spv::OpINotEqual, type_bool, f10_unbiased_exponent, + builder.makeUintConstant(0)); + spv::Id f32_unbiased_exponent = builder.createTriOp( + spv::OpSelect, type_uint, f10_is_normal, f10_unbiased_exponent, + denormal_f32_unbiased_exponent); + spv::Id f32_mantissa = + builder.createTriOp(spv::OpSelect, type_uint, f10_is_normal, f10_mantissa, + denormal_f32_mantissa); + + // Bias the exponent and construct the build the float32 number. + spv::Id f32_shifted; + { + spv::Instruction* f32_insert_instruction = new spv::Instruction( + builder.getUniqueId(), type_uint, spv::OpBitFieldInsert); + f32_insert_instruction->addIdOperand(f32_mantissa); + f32_insert_instruction->addIdOperand( + builder.createBinOp(spv::OpIAdd, type_uint, f32_unbiased_exponent, + builder.makeUintConstant(124))); + f32_insert_instruction->addIdOperand(builder.makeUintConstant(7)); + f32_insert_instruction->addIdOperand(builder.makeUintConstant(8)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(f32_insert_instruction)); + f32_shifted = f32_insert_instruction->getResultId(); + } + spv::Id f32 = + builder.createBinOp(spv::OpShiftLeftLogical, type_uint, f32_shifted, + builder.makeUintConstant(23 - 7)); + + if (!result_as_uint) { + f32 = builder.createUnaryOp(spv::OpBitcast, builder.makeFloatType(32), f32); + } + + return f32; +} + +spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4( + spv::Builder& builder, spv::Id f32_scalar, bool round_to_nearest_even, + bool remap_from_0_to_0_5, spv::Id ext_inst_glsl_std_450) { + // CFloat24 from d3dref9.dll + + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + // Assuming the value is already clamped to [0, 2) (in all places, the depth + // is written with saturation). + + uint32_t remap_bias = uint32_t(remap_from_0_to_0_5); + + spv::Id type_uint = builder.makeUintType(32); + + // Need the source as uint for bit operations. + { + spv::Id source_type = builder.getTypeId(f32_scalar); + assert_true(builder.isScalarType(source_type)); + if (!builder.isUintType(source_type)) { + f32_scalar = builder.createUnaryOp(spv::OpBitcast, type_uint, f32_scalar); + } + } + + // The denormal 20e4 case. + // denormal_biased_f32 = (f32 & 0x7FFFFF) | 0x800000 + spv::Id denormal_biased_f32; + { + spv::Instruction* denormal_insert_instruction = new spv::Instruction( + builder.getUniqueId(), type_uint, spv::OpBitFieldInsert); + denormal_insert_instruction->addIdOperand(f32_scalar); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(1)); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(23)); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(9)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_insert_instruction)); + denormal_biased_f32 = denormal_insert_instruction->getResultId(); + } + // denormal_biased_f32_shift_amount = min(113 - (f32 >> 23), 24) + // Not allowing the shift to overflow as that's undefined in SPIR-V. + spv::Id denormal_biased_f32_shift_amount; + { + spv::Instruction* denormal_shift_amount_instruction = + new spv::Instruction(builder.getUniqueId(), type_uint, spv::OpExtInst); + denormal_shift_amount_instruction->addIdOperand(ext_inst_glsl_std_450); + denormal_shift_amount_instruction->addImmediateOperand(GLSLstd450UMin); + denormal_shift_amount_instruction->addIdOperand(builder.createBinOp( + spv::OpISub, type_uint, builder.makeUintConstant(113 - remap_bias), + builder.createBinOp(spv::OpShiftRightLogical, type_uint, f32_scalar, + builder.makeUintConstant(23)))); + denormal_shift_amount_instruction->addIdOperand( + builder.makeUintConstant(24)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_shift_amount_instruction)); + denormal_biased_f32_shift_amount = + denormal_shift_amount_instruction->getResultId(); + } + // denormal_biased_f32 = + // ((f32 & 0x7FFFFF) | 0x800000) >> min(113 - (f32 >> 23), 24) + denormal_biased_f32 = builder.createBinOp(spv::OpShiftRightLogical, type_uint, + denormal_biased_f32, + denormal_biased_f32_shift_amount); + + // The normal 20e4 case. + // Bias the exponent. + // normal_biased_f32 = f32 - (112 << 23) + spv::Id normal_biased_f32 = builder.createBinOp( + spv::OpISub, type_uint, f32_scalar, + builder.makeUintConstant((UINT32_C(112) - remap_bias) << 23)); + + // Select the needed conversion depending on whether the number is too small + // to be represented as normalized 20e4. + spv::Id biased_f32 = builder.createTriOp( + spv::OpSelect, type_uint, + builder.createBinOp( + spv::OpULessThan, builder.makeBoolType(), f32_scalar, + builder.makeUintConstant(0x38800000 - (remap_bias << 23))), + denormal_biased_f32, normal_biased_f32); + + // Build the 20e4 number rounding to the nearest even or towards zero. + if (round_to_nearest_even) { + // biased_f32 += 3 + ((biased_f32 >> 3) & 1) + biased_f32 = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIAdd, type_uint, biased_f32, + builder.makeUintConstant(3)), + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, + builder.makeUintConstant(3), + builder.makeUintConstant(1))); + } + return builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, + builder.makeUintConstant(3), + builder.makeUintConstant(24)); +} + +spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder, + spv::Id f24_uint_scalar, + uint32_t f24_shift, + bool remap_to_0_to_0_5, + bool result_as_uint, + spv::Id ext_inst_glsl_std_450) { + // CFloat24 from d3dref9.dll + + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + + assert_true(builder.isUintType(builder.getTypeId(f24_uint_scalar))); + assert_true(f24_shift <= (32 - 24)); + + uint32_t remap_bias = uint32_t(remap_to_0_to_0_5); + + spv::Id type_bool = builder.makeBoolType(); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_uint = builder.makeUintType(32); + + spv::Id f24_unbiased_exponent = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, f24_uint_scalar, + builder.makeUintConstant(f24_shift + 20), builder.makeUintConstant(4)); + spv::Id f24_mantissa = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, f24_uint_scalar, + builder.makeUintConstant(f24_shift), builder.makeUintConstant(20)); + + // The denormal nonzero 20e4 case. + // denormal_mantissa_msb = findMSB(f24_mantissa) + spv::Id denormal_mantissa_msb; + { + spv::Instruction* denormal_mantissa_msb_instruction = + new spv::Instruction(builder.getUniqueId(), type_int, spv::OpExtInst); + denormal_mantissa_msb_instruction->addIdOperand(ext_inst_glsl_std_450); + denormal_mantissa_msb_instruction->addImmediateOperand(GLSLstd450FindUMsb); + denormal_mantissa_msb_instruction->addIdOperand(f24_mantissa); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_mantissa_msb_instruction)); + denormal_mantissa_msb = denormal_mantissa_msb_instruction->getResultId(); + } + denormal_mantissa_msb = + builder.createUnaryOp(spv::OpBitcast, type_uint, denormal_mantissa_msb); + // denormal_f32_unbiased_exponent = 1 - (20 - findMSB(f24_mantissa)) + // Or: + // denormal_f32_unbiased_exponent = findMSB(f24_mantissa) - 19 + spv::Id denormal_f32_unbiased_exponent = + builder.createBinOp(spv::OpISub, type_uint, denormal_mantissa_msb, + builder.makeUintConstant(19)); + // Normalize the mantissa. + // denormal_f32_mantissa = f24_mantissa << (20 - findMSB(f24_mantissa)) + spv::Id denormal_f32_mantissa = builder.createBinOp( + spv::OpShiftLeftLogical, type_uint, f24_mantissa, + builder.createBinOp(spv::OpISub, type_uint, builder.makeUintConstant(20), + denormal_mantissa_msb)); + // If the 20e4 number is zero, make sure the float32 number is zero too. + spv::Id f24_mantissa_is_nonzero = builder.createBinOp( + spv::OpINotEqual, type_bool, f24_mantissa, builder.makeUintConstant(0)); + // Set the unbiased exponent to -112 for zero - 112 will be added later, + // resulting in zero float32. + denormal_f32_unbiased_exponent = builder.createTriOp( + spv::OpSelect, type_uint, f24_mantissa_is_nonzero, + denormal_f32_unbiased_exponent, + builder.makeUintConstant(uint32_t(-int32_t(112 - remap_bias)))); + denormal_f32_mantissa = + builder.createTriOp(spv::OpSelect, type_uint, f24_mantissa_is_nonzero, + denormal_f32_mantissa, builder.makeUintConstant(0)); + + // Select the needed conversion depending on whether the number is normal. + spv::Id f24_is_normal = + builder.createBinOp(spv::OpINotEqual, type_bool, f24_unbiased_exponent, + builder.makeUintConstant(0)); + spv::Id f32_unbiased_exponent = builder.createTriOp( + spv::OpSelect, type_uint, f24_is_normal, f24_unbiased_exponent, + denormal_f32_unbiased_exponent); + spv::Id f32_mantissa = + builder.createTriOp(spv::OpSelect, type_uint, f24_is_normal, f24_mantissa, + denormal_f32_mantissa); + + // Bias the exponent and construct the build the float32 number. + spv::Id f32_shifted; + { + spv::Instruction* f32_insert_instruction = new spv::Instruction( + builder.getUniqueId(), type_uint, spv::OpBitFieldInsert); + f32_insert_instruction->addIdOperand(f32_mantissa); + f32_insert_instruction->addIdOperand( + builder.createBinOp(spv::OpIAdd, type_uint, f32_unbiased_exponent, + builder.makeUintConstant(112 - remap_bias))); + f32_insert_instruction->addIdOperand(builder.makeUintConstant(20)); + f32_insert_instruction->addIdOperand(builder.makeUintConstant(8)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(f32_insert_instruction)); + f32_shifted = f32_insert_instruction->getResultId(); + } + spv::Id f32 = + builder.createBinOp(spv::OpShiftLeftLogical, type_uint, f32_shifted, + builder.makeUintConstant(23 - 20)); + + if (!result_as_uint) { + f32 = builder.createUnaryOp(spv::OpBitcast, builder.makeFloatType(32), f32); + } + + return f32; +} + +void SpirvShaderTranslator::CompleteFragmentShaderInMain() { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); + spv::Id system_constant_flags = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + + if (current_shader().writes_color_target(0) && + !IsExecutionModeEarlyFragmentTests()) { + // Alpha test. + // TODO(Triang3l): Check how alpha test works with NaN on Direct3D 9. + // Extract the comparison function (less, equal, greater bits). + spv::Id alpha_test_function = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_AlphaPassIfLess_Shift), + builder_->makeUintConstant(3)); + // Check if the comparison function is not "always" - that should pass even + // for NaN likely, unlike "less, equal or greater". + spv::Id alpha_test_function_is_non_always = builder_->createBinOp( + spv::OpINotEqual, type_bool_, alpha_test_function, + builder_->makeUintConstant(uint32_t(xenos::CompareFunction::kAlways))); + spv::Block& block_alpha_test = builder_->makeNewBlock(); + spv::Block& block_alpha_test_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_alpha_test_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(alpha_test_function_is_non_always, + &block_alpha_test, + &block_alpha_test_merge); + builder_->setBuildPoint(&block_alpha_test); + { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(3)); + spv::Id alpha_test_alpha = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassOutput, + output_fragment_data_[0], id_vector_temp_), + spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantAlphaTestReference)); + spv::Id alpha_test_reference = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + // The comparison function is not "always" - perform the alpha test. + // Handle "not equal" specially (specifically as "not equal" so it's true + // for NaN, not "less or greater" which is false for NaN). + spv::Id alpha_test_function_is_not_equal = builder_->createBinOp( + spv::OpIEqual, type_bool_, alpha_test_function, + builder_->makeUintConstant( + uint32_t(xenos::CompareFunction::kNotEqual))); + spv::Block& block_alpha_test_not_equal = builder_->makeNewBlock(); + spv::Block& block_alpha_test_non_not_equal = builder_->makeNewBlock(); + spv::Block& block_alpha_test_not_equal_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_alpha_test_not_equal_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(alpha_test_function_is_not_equal, + &block_alpha_test_not_equal, + &block_alpha_test_non_not_equal); + spv::Id alpha_test_result_not_equal, alpha_test_result_non_not_equal; + builder_->setBuildPoint(&block_alpha_test_not_equal); + { + // "Not equal" function. + alpha_test_result_not_equal = + builder_->createBinOp(spv::OpFUnordNotEqual, type_bool_, + alpha_test_alpha, alpha_test_reference); + builder_->createBranch(&block_alpha_test_not_equal_merge); + } + builder_->setBuildPoint(&block_alpha_test_non_not_equal); + { + // Function other than "not equal". + static const spv::Op kAlphaTestOps[] = { + spv::OpFOrdLessThan, spv::OpFOrdEqual, spv::OpFOrdGreaterThan}; + for (uint32_t i = 0; i < 3; ++i) { + spv::Id alpha_test_comparison_result = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, + builder_->createBinOp(kAlphaTestOps[i], type_bool_, + alpha_test_alpha, alpha_test_reference), + builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, alpha_test_function, + builder_->makeUintConstant(UINT32_C(1) << i)), + const_uint_0_)); + if (i) { + alpha_test_result_non_not_equal = builder_->createBinOp( + spv::OpLogicalOr, type_bool_, alpha_test_result_non_not_equal, + alpha_test_comparison_result); + } else { + alpha_test_result_non_not_equal = alpha_test_comparison_result; + } + } + builder_->createBranch(&block_alpha_test_not_equal_merge); + } + builder_->setBuildPoint(&block_alpha_test_not_equal_merge); + spv::Id alpha_test_result; + { + std::unique_ptr alpha_test_result_phi_op = + std::make_unique(builder_->getUniqueId(), + type_bool_, spv::OpPhi); + alpha_test_result_phi_op->addIdOperand(alpha_test_result_not_equal); + alpha_test_result_phi_op->addIdOperand( + block_alpha_test_not_equal.getId()); + alpha_test_result_phi_op->addIdOperand(alpha_test_result_non_not_equal); + alpha_test_result_phi_op->addIdOperand( + block_alpha_test_non_not_equal.getId()); + alpha_test_result = alpha_test_result_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(alpha_test_result_phi_op)); + } + // Discard the pixel if the alpha test has failed. Creating a merge block + // even though it will contain just one OpBranch since SPIR-V requires + // structured control flow in shaders. + spv::Block& block_alpha_test_kill = builder_->makeNewBlock(); + spv::Block& block_alpha_test_kill_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_alpha_test_kill_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(alpha_test_result, + &block_alpha_test_kill_merge, + &block_alpha_test_kill); + builder_->setBuildPoint(&block_alpha_test_kill); + builder_->createNoResultOp(spv::OpKill); + // OpKill terminates the block. + builder_->setBuildPoint(&block_alpha_test_kill_merge); + builder_->createBranch(&block_alpha_test_merge); + } + builder_->setBuildPoint(&block_alpha_test_merge); + } + + uint32_t color_targets_remaining = current_shader().writes_color_targets(); + uint32_t color_target_index; + while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { + color_targets_remaining &= ~(UINT32_C(1) << color_target_index); + spv::Id color_variable = output_fragment_data_[color_target_index]; + spv::Id color = builder_->createLoad(color_variable, spv::NoPrecision); + + // Apply the exponent bias after the alpha test and alpha to coverage + // because they need the unbiased alpha from the shader. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantColorExpBias)); + id_vector_temp_.push_back( + builder_->makeIntConstant(int32_t(color_target_index))); + color = builder_->createBinOp( + spv::OpVectorTimesScalar, type_float4_, color, + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision)); + builder_->addDecoration(color, spv::DecorationNoContraction); + + // Convert to gamma space - this is incorrect, since it must be done after + // blending on the Xbox 360, but this is just one of many blending issues in + // the host render target path. + // TODO(Triang3l): Gamma as sRGB check. + spv::Id color_rgb; + { + std::unique_ptr color_rgb_shuffle_op = + std::make_unique( + builder_->getUniqueId(), type_float3_, spv::OpVectorShuffle); + color_rgb_shuffle_op->addIdOperand(color); + color_rgb_shuffle_op->addIdOperand(color); + color_rgb_shuffle_op->addImmediateOperand(0); + color_rgb_shuffle_op->addImmediateOperand(1); + color_rgb_shuffle_op->addImmediateOperand(2); + color_rgb = color_rgb_shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(color_rgb_shuffle_op)); + } + spv::Id is_gamma = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_ConvertColor0ToGamma + << color_target_index)), + const_uint_0_); + spv::Block& block_gamma_head = *builder_->getBuildPoint(); + spv::Block& block_gamma = builder_->makeNewBlock(); + spv::Block& block_gamma_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_gamma_merge.getId()); + builder_->createConditionalBranch(is_gamma, &block_gamma, + &block_gamma_merge); + builder_->setBuildPoint(&block_gamma); + spv::Id color_rgb_gamma = LinearToPWLGamma(color_rgb, false); + builder_->createBranch(&block_gamma_merge); + builder_->setBuildPoint(&block_gamma_merge); + { + std::unique_ptr gamma_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float3_, spv::OpPhi); + gamma_phi_op->addIdOperand(color_rgb_gamma); + gamma_phi_op->addIdOperand(block_gamma.getId()); + gamma_phi_op->addIdOperand(color_rgb); + gamma_phi_op->addIdOperand(block_gamma_head.getId()); + color_rgb = gamma_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(gamma_phi_op)); + } + { + std::unique_ptr color_rgba_shuffle_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpVectorShuffle); + color_rgba_shuffle_op->addIdOperand(color_rgb); + color_rgba_shuffle_op->addIdOperand(color); + color_rgba_shuffle_op->addImmediateOperand(0); + color_rgba_shuffle_op->addImmediateOperand(1); + color_rgba_shuffle_op->addImmediateOperand(2); + color_rgba_shuffle_op->addImmediateOperand(3 + 3); + color = color_rgba_shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(color_rgba_shuffle_op)); + } + + builder_->createStore(color, color_variable); + } +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc deleted file mode 100644 index 6ef89b289..000000000 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ /dev/null @@ -1,850 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/buffer_cache.h" - -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/memory.h" -#include "xenia/base/profiling.h" -#include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" -#include "xenia/ui/vulkan/vulkan_mem_alloc.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -using namespace xe::gpu::xenos; - -namespace xe { -namespace gpu { -namespace vulkan { - -#if XE_ARCH_AMD64 -void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr, - uint16_t cmp_value, size_t count) { - auto dest = reinterpret_cast(dest_ptr); - auto src = reinterpret_cast(src_ptr); - __m128i shufmask = - _mm_set_epi8(0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x06, 0x07, - 0x04, 0x05, 0x02, 0x03, 0x00, 0x01); - __m128i cmpval = _mm_set1_epi16(cmp_value); - - size_t i; - for (i = 0; i + 8 <= count; i += 8) { - __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); - __m128i output = _mm_shuffle_epi8(input, shufmask); - - __m128i mask = _mm_cmpeq_epi16(output, cmpval); - output = _mm_or_si128(output, mask); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); - } - for (; i < count; ++i) { // handle residual elements - dest[i] = byte_swap(src[i]); - } -} - -void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr, - uint32_t cmp_value, size_t count) { - auto dest = reinterpret_cast(dest_ptr); - auto src = reinterpret_cast(src_ptr); - __m128i shufmask = - _mm_set_epi8(0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05, - 0x06, 0x07, 0x00, 0x01, 0x02, 0x03); - __m128i cmpval = _mm_set1_epi32(cmp_value); - - size_t i; - for (i = 0; i + 4 <= count; i += 4) { - __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); - __m128i output = _mm_shuffle_epi8(input, shufmask); - - __m128i mask = _mm_cmpeq_epi32(output, cmpval); - output = _mm_or_si128(output, mask); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); - } - for (; i < count; ++i) { // handle residual elements - dest[i] = byte_swap(src[i]); - } -} -#else -void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr, - uint16_t cmp_value, size_t count) { - auto dest = reinterpret_cast(dest_ptr); - auto src = reinterpret_cast(src_ptr); - for (size_t i = 0; i < count; ++i) { - uint16_t value = byte_swap(src[i]); - dest[i] = value == cmp_value ? 0xFFFF : value; - } -} - -void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr, - uint32_t cmp_value, size_t count) { - auto dest = reinterpret_cast(dest_ptr); - auto src = reinterpret_cast(src_ptr); - for (size_t i = 0; i < count; ++i) { - uint32_t value = byte_swap(src[i]); - dest[i] = value == cmp_value ? 0xFFFFFFFF : value; - } -} -#endif - -using xe::ui::vulkan::util::CheckResult; - -constexpr VkDeviceSize kConstantRegisterUniformRange = - 512 * 4 * 4 + 8 * 4 + 32 * 4; - -BufferCache::BufferCache(RegisterFile* register_file, Memory* memory, - const ui::vulkan::VulkanProvider& provider, - size_t capacity) - : register_file_(register_file), memory_(memory), provider_(provider) { - transient_buffer_ = std::make_unique( - provider_, - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - capacity, 256); -} - -BufferCache::~BufferCache() { Shutdown(); } - -VkResult BufferCache::Initialize() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - - VkMemoryRequirements pool_reqs; - transient_buffer_->GetBufferMemoryRequirements(&pool_reqs); - VkMemoryAllocateInfo pool_allocate_info; - pool_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - pool_allocate_info.pNext = nullptr; - pool_allocate_info.allocationSize = pool_reqs.size; - pool_allocate_info.memoryTypeIndex = ui::vulkan::util::ChooseHostMemoryType( - provider_, pool_reqs.memoryTypeBits, false); - if (pool_allocate_info.memoryTypeIndex == UINT32_MAX) { - return VK_ERROR_INITIALIZATION_FAILED; - } - status = dfn.vkAllocateMemory(device, &pool_allocate_info, nullptr, - &gpu_memory_pool_); - if (status != VK_SUCCESS) { - return status; - } - - status = transient_buffer_->Initialize(gpu_memory_pool_, 0); - if (status != VK_SUCCESS) { - return status; - } - - // Create a memory allocator for textures. - VmaVulkanFunctions vulkan_funcs = {}; - ui::vulkan::FillVMAVulkanFunctions(&vulkan_funcs, provider_); - - VmaAllocatorCreateInfo alloc_info = {}; - alloc_info.physicalDevice = provider_.physical_device(); - alloc_info.device = device; - alloc_info.pVulkanFunctions = &vulkan_funcs; - alloc_info.instance = provider_.instance(); - status = vmaCreateAllocator(&alloc_info, &mem_allocator_); - if (status != VK_SUCCESS) { - return status; - } - - status = CreateConstantDescriptorSet(); - if (status != VK_SUCCESS) { - return status; - } - - status = CreateVertexDescriptorPool(); - if (status != VK_SUCCESS) { - return status; - } - - return VK_SUCCESS; -} - -VkResult BufferCache::CreateVertexDescriptorPool() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status; - - std::vector pool_sizes; - pool_sizes.push_back({ - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - 32 * 16384, - }); - vertex_descriptor_pool_ = std::make_unique( - provider_, 32 * 16384, pool_sizes); - - // 32 storage buffers available to vertex shader. - // TODO(DrChat): In the future, this could hold memexport staging data. - VkDescriptorSetLayoutBinding binding = { - 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - 32, VK_SHADER_STAGE_VERTEX_BIT, - nullptr, - }; - - VkDescriptorSetLayoutCreateInfo layout_info = { - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - nullptr, - 0, - 1, - &binding, - }; - status = dfn.vkCreateDescriptorSetLayout(device, &layout_info, nullptr, - &vertex_descriptor_set_layout_); - if (status != VK_SUCCESS) { - return status; - } - - return VK_SUCCESS; -} - -void BufferCache::FreeVertexDescriptorPool() { - vertex_descriptor_pool_.reset(); - - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, - device, vertex_descriptor_set_layout_); -} - -VkResult BufferCache::CreateConstantDescriptorSet() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - - // Descriptor pool used for all of our cached descriptors. - // In the steady state we don't allocate anything, so these are all manually - // managed. - VkDescriptorPoolCreateInfo transient_descriptor_pool_info; - transient_descriptor_pool_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - transient_descriptor_pool_info.pNext = nullptr; - transient_descriptor_pool_info.flags = - VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - transient_descriptor_pool_info.maxSets = 1; - VkDescriptorPoolSize pool_sizes[1]; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - pool_sizes[0].descriptorCount = 2; - transient_descriptor_pool_info.poolSizeCount = 1; - transient_descriptor_pool_info.pPoolSizes = pool_sizes; - status = dfn.vkCreateDescriptorPool(device, &transient_descriptor_pool_info, - nullptr, &constant_descriptor_pool_); - if (status != VK_SUCCESS) { - return status; - } - - // Create the descriptor set layout used for our uniform buffer. - // As it is a static binding that uses dynamic offsets during draws we can - // create this once and reuse it forever. - VkDescriptorSetLayoutBinding bindings[2] = {}; - - // Vertex constants - bindings[0].binding = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[0].pImmutableSamplers = nullptr; - - // Fragment constants - bindings[1].binding = 1; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - bindings[1].descriptorCount = 1; - bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[1].pImmutableSamplers = nullptr; - - VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {}; - descriptor_set_layout_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_set_layout_info.pNext = nullptr; - descriptor_set_layout_info.flags = 0; - descriptor_set_layout_info.bindingCount = - static_cast(xe::countof(bindings)); - descriptor_set_layout_info.pBindings = bindings; - status = dfn.vkCreateDescriptorSetLayout(device, &descriptor_set_layout_info, - nullptr, - &constant_descriptor_set_layout_); - if (status != VK_SUCCESS) { - return status; - } - - // Create the descriptor we'll use for the uniform buffer. - // This is what we hand out to everyone (who then also needs to use our - // offsets). - VkDescriptorSetAllocateInfo set_alloc_info; - set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - set_alloc_info.pNext = nullptr; - set_alloc_info.descriptorPool = constant_descriptor_pool_; - set_alloc_info.descriptorSetCount = 1; - set_alloc_info.pSetLayouts = &constant_descriptor_set_layout_; - status = dfn.vkAllocateDescriptorSets(device, &set_alloc_info, - &constant_descriptor_set_); - if (status != VK_SUCCESS) { - return status; - } - - // Initialize descriptor set with our buffers. - VkDescriptorBufferInfo buffer_info; - buffer_info.buffer = transient_buffer_->gpu_buffer(); - buffer_info.offset = 0; - buffer_info.range = kConstantRegisterUniformRange; - - VkWriteDescriptorSet descriptor_writes[2]; - auto& vertex_uniform_binding_write = descriptor_writes[0]; - vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vertex_uniform_binding_write.pNext = nullptr; - vertex_uniform_binding_write.dstSet = constant_descriptor_set_; - vertex_uniform_binding_write.dstBinding = 0; - vertex_uniform_binding_write.dstArrayElement = 0; - vertex_uniform_binding_write.descriptorCount = 1; - vertex_uniform_binding_write.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - vertex_uniform_binding_write.pBufferInfo = &buffer_info; - auto& fragment_uniform_binding_write = descriptor_writes[1]; - fragment_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - fragment_uniform_binding_write.pNext = nullptr; - fragment_uniform_binding_write.dstSet = constant_descriptor_set_; - fragment_uniform_binding_write.dstBinding = 1; - fragment_uniform_binding_write.dstArrayElement = 0; - fragment_uniform_binding_write.descriptorCount = 1; - fragment_uniform_binding_write.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - fragment_uniform_binding_write.pBufferInfo = &buffer_info; - dfn.vkUpdateDescriptorSets(device, 2, descriptor_writes, 0, nullptr); - - return VK_SUCCESS; -} - -void BufferCache::FreeConstantDescriptorSet() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - if (constant_descriptor_set_) { - dfn.vkFreeDescriptorSets(device, constant_descriptor_pool_, 1, - &constant_descriptor_set_); - constant_descriptor_set_ = nullptr; - } - - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, - device, - constant_descriptor_set_layout_); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, - constant_descriptor_pool_); -} - -void BufferCache::Shutdown() { - if (mem_allocator_) { - vmaDestroyAllocator(mem_allocator_); - mem_allocator_ = nullptr; - } - - FreeConstantDescriptorSet(); - FreeVertexDescriptorPool(); - - transient_buffer_->Shutdown(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, - gpu_memory_pool_); -} - -std::pair BufferCache::UploadConstantRegisters( - VkCommandBuffer command_buffer, - const Shader::ConstantRegisterMap& vertex_constant_register_map, - const Shader::ConstantRegisterMap& pixel_constant_register_map, - VkFence fence) { - // Fat struct, including all registers: - // struct { - // vec4 float[512]; - // uint bool[8]; - // uint loop[32]; - // }; - auto offset = AllocateTransientData(kConstantRegisterUniformRange, fence); - if (offset == VK_WHOLE_SIZE) { - // OOM. - return {VK_WHOLE_SIZE, VK_WHOLE_SIZE}; - } - - // Copy over all the registers. - const auto& values = register_file_->values; - uint8_t* dest_ptr = transient_buffer_->host_base() + offset; - std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32, - (512 * 4 * 4)); - dest_ptr += 512 * 4 * 4; - std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, - 8 * 4); - dest_ptr += 8 * 4; - std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32, - 32 * 4); - dest_ptr += 32 * 4; - - transient_buffer_->Flush(offset, kConstantRegisterUniformRange); - - // Append a barrier to the command buffer. - VkBufferMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - nullptr, - VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - transient_buffer_->gpu_buffer(), - offset, - kConstantRegisterUniformRange, - }; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1, - &barrier, 0, nullptr); - - return {offset, offset}; - -// Packed upload code. -// This is not currently supported by the shaders, but would be awesome. -// We should be able to use this for any shader that does not do dynamic -// constant indexing. -#if 0 - // Allocate space in the buffer for our data. - auto offset = - AllocateTransientData(constant_register_map.packed_byte_length, fence); - if (offset == VK_WHOLE_SIZE) { - // OOM. - return VK_WHOLE_SIZE; - } - - // Run through registers and copy them into the buffer. - // TODO(benvanik): optimize this - it's hit twice every call. - const auto& values = register_file_->values; - uint8_t* dest_ptr = - reinterpret_cast(transient_buffer_data_) + offset; - for (int i = 0; i < 4; ++i) { - auto piece = constant_register_map.float_bitmap[i]; - if (!piece) { - continue; - } - for (int j = 0, sh = 0; j < 64; ++j, sh << 1) { - if (piece & sh) { - xe::copy_128_aligned( - dest_ptr, - &values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1); - dest_ptr += 16; - } - } - } - for (int i = 0; i < 32; ++i) { - if (constant_register_map.loop_bitmap & (1 << i)) { - xe::store(dest_ptr, - values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32); - dest_ptr += 4; - } - } - for (int i = 0; i < 8; ++i) { - if (constant_register_map.bool_bitmap[i]) { - xe::store( - dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32); - dest_ptr += 4; - } - } - - return offset; -#endif // 0 -} - -std::pair BufferCache::UploadIndexBuffer( - VkCommandBuffer command_buffer, uint32_t source_addr, - uint32_t source_length, xenos::IndexFormat format, VkFence fence) { - // Allocate space in the buffer for our data. - auto offset = AllocateTransientData(source_length, fence); - if (offset == VK_WHOLE_SIZE) { - // OOM. - return {nullptr, VK_WHOLE_SIZE}; - } - - const void* source_ptr = memory_->TranslatePhysical(source_addr); - - uint32_t prim_reset_index = - register_file_->values[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32; - bool prim_reset_enabled = - !!(register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)); - - // Copy data into the buffer. If primitive reset is enabled, translate any - // primitive reset indices to something Vulkan understands. - // TODO(benvanik): memcpy then use compute shaders to swap? - if (prim_reset_enabled) { - if (format == xenos::IndexFormat::kInt16) { - // Endian::k8in16, swap half-words. - copy_cmp_swap_16_unaligned( - transient_buffer_->host_base() + offset, source_ptr, - static_cast(prim_reset_index), source_length / 2); - } else if (format == xenos::IndexFormat::kInt32) { - // Endian::k8in32, swap words. - copy_cmp_swap_32_unaligned(transient_buffer_->host_base() + offset, - source_ptr, prim_reset_index, - source_length / 4); - } - } else { - if (format == xenos::IndexFormat::kInt16) { - // Endian::k8in16, swap half-words. - xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset, - source_ptr, source_length / 2); - } else if (format == xenos::IndexFormat::kInt32) { - // Endian::k8in32, swap words. - xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset, - source_ptr, source_length / 4); - } - } - - transient_buffer_->Flush(offset, source_length); - - // Append a barrier to the command buffer. - VkBufferMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - nullptr, - VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_INDEX_READ_BIT, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - transient_buffer_->gpu_buffer(), - offset, - source_length, - }; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1, - &barrier, 0, nullptr); - - return {transient_buffer_->gpu_buffer(), offset}; -} - -std::pair BufferCache::UploadVertexBuffer( - VkCommandBuffer command_buffer, uint32_t source_addr, - uint32_t source_length, xenos::Endian endian, VkFence fence) { - auto offset = FindCachedTransientData(source_addr, source_length); - if (offset != VK_WHOLE_SIZE) { - return {transient_buffer_->gpu_buffer(), offset}; - } - - // Slow path :) - // Expand the region up to the allocation boundary - auto physical_heap = memory_->GetPhysicalHeap(); - uint32_t upload_base = source_addr; - uint32_t upload_size = source_length; - - // Ping the memory subsystem for allocation size. - // TODO(DrChat): Artifacting occurring in 5841089E with this enabled. - // physical_heap->QueryBaseAndSize(&upload_base, &upload_size); - assert(upload_base <= source_addr); - uint32_t source_offset = source_addr - upload_base; - - // Allocate space in the buffer for our data. - offset = AllocateTransientData(upload_size, fence); - if (offset == VK_WHOLE_SIZE) { - // OOM. - XELOGW( - "Failed to allocate transient data for vertex buffer! Wanted to " - "allocate {} bytes.", - upload_size); - return {nullptr, VK_WHOLE_SIZE}; - } - - const void* upload_ptr = memory_->TranslatePhysical(upload_base); - - // Copy data into the buffer. - // TODO(benvanik): memcpy then use compute shaders to swap? - if (endian == xenos::Endian::k8in32) { - // Endian::k8in32, swap words. - xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset, - upload_ptr, source_length / 4); - } else if (endian == xenos::Endian::k16in32) { - xe::copy_and_swap_16_in_32_unaligned( - transient_buffer_->host_base() + offset, upload_ptr, source_length / 4); - } else { - assert_always(); - } - - transient_buffer_->Flush(offset, upload_size); - - // Append a barrier to the command buffer. - VkBufferMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - nullptr, - VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - transient_buffer_->gpu_buffer(), - offset, - upload_size, - }; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, nullptr, - 1, &barrier, 0, nullptr); - - CacheTransientData(upload_base, upload_size, offset); - return {transient_buffer_->gpu_buffer(), offset + source_offset}; -} - -void BufferCache::HashVertexBindings( - XXH3_state_t* hash_state, - const std::vector& vertex_bindings) { - auto& regs = *register_file_; - for (const auto& vertex_binding : vertex_bindings) { -#if 0 - XXH3_64bits_update(hash_state, &vertex_binding.binding_index, sizeof(vertex_binding.binding_index)); - XXH3_64bits_update(hash_state, &vertex_binding.fetch_constant, sizeof(vertex_binding.fetch_constant)); - XXH3_64bits_update(hash_state, &vertex_binding.stride_words, sizeof(vertex_binding.stride_words)); -#endif - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - (vertex_binding.fetch_constant / 3) * 6; - const auto group = reinterpret_cast(®s.values[r]); - switch (vertex_binding.fetch_constant % 3) { - case 0: { - auto& fetch = group->vertex_fetch_0; - XXH3_64bits_update(hash_state, &fetch, sizeof(fetch)); - } break; - case 1: { - auto& fetch = group->vertex_fetch_1; - XXH3_64bits_update(hash_state, &fetch, sizeof(fetch)); - } break; - case 2: { - auto& fetch = group->vertex_fetch_2; - XXH3_64bits_update(hash_state, &fetch, sizeof(fetch)); - } break; - } - } -} - -VkDescriptorSet BufferCache::PrepareVertexSet( - VkCommandBuffer command_buffer, VkFence fence, - const std::vector& vertex_bindings) { - // (quickly) Generate a hash. - XXH3_state_t hash_state; - XXH3_64bits_reset(&hash_state); - - // (quickly) Generate a hash. - HashVertexBindings(&hash_state, vertex_bindings); - uint64_t hash = XXH3_64bits_digest(&hash_state); - for (auto it = vertex_sets_.find(hash); it != vertex_sets_.end(); ++it) { - // TODO(DrChat): We need to compare the bindings and ensure they're equal. - return it->second; - } - - if (!vertex_descriptor_pool_->has_open_batch()) { - vertex_descriptor_pool_->BeginBatch(fence); - } - - VkDescriptorSet set = - vertex_descriptor_pool_->AcquireEntry(vertex_descriptor_set_layout_); - if (!set) { - return nullptr; - } - - // TODO(DrChat): Define magic number 32 as a constant somewhere. - VkDescriptorBufferInfo buffer_infos[32] = {}; - VkWriteDescriptorSet descriptor_write = { - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - 0, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - nullptr, - buffer_infos, - nullptr, - }; - - auto& regs = *register_file_; - for (const auto& vertex_binding : vertex_bindings) { - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - (vertex_binding.fetch_constant / 3) * 6; - const auto group = reinterpret_cast(®s.values[r]); - const xe_gpu_vertex_fetch_t* fetch = nullptr; - switch (vertex_binding.fetch_constant % 3) { - case 0: - fetch = &group->vertex_fetch_0; - break; - case 1: - fetch = &group->vertex_fetch_1; - break; - case 2: - fetch = &group->vertex_fetch_2; - break; - } - - // TODO(DrChat): Some games use type kInvalidTexture (with no data). - switch (fetch->type) { - case xenos::FetchConstantType::kVertex: - break; - case xenos::FetchConstantType::kInvalidVertex: - if (cvars::gpu_allow_invalid_fetch_constants) { - break; - } - XELOGW( - "Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! " - "This " - "is incorrect behavior, but you can try bypassing this by " - "launching Xenia with --gpu_allow_invalid_fetch_constants=true.", - vertex_binding.fetch_constant, fetch->dword_0, fetch->dword_1); - return nullptr; - default: - XELOGW( - "Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!", - vertex_binding.fetch_constant, fetch->dword_0, fetch->dword_1); - return nullptr; - } - - // TODO(benvanik): compute based on indices or vertex count. - // THIS CAN BE MASSIVELY INCORRECT (too large). - // This may not be possible (with indexed vfetch). - uint32_t source_length = fetch->size * 4; - uint32_t physical_address = fetch->address << 2; - - // TODO(DrChat): This needs to be put in gpu::CommandProcessor - // trace_writer_.WriteMemoryRead(physical_address, source_length); - - // Upload (or get a cached copy of) the buffer. - auto buffer_ref = UploadVertexBuffer(command_buffer, physical_address, - source_length, fetch->endian, fence); - if (buffer_ref.second == VK_WHOLE_SIZE) { - // Failed to upload buffer. - XELOGW("Failed to upload vertex buffer!"); - return nullptr; - } - - // Stash the buffer reference for our bulk bind at the end. - buffer_infos[descriptor_write.descriptorCount++] = { - buffer_ref.first, - buffer_ref.second, - source_length, - }; - } - - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkUpdateDescriptorSets(device, 1, &descriptor_write, 0, nullptr); - vertex_sets_[hash] = set; - return set; -} - -VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize length, - VkFence fence) { - // Try fast path (if we have space). - VkDeviceSize offset = TryAllocateTransientData(length, fence); - if (offset != VK_WHOLE_SIZE) { - return offset; - } - - // Ran out of easy allocations. - // Try consuming fences before we panic. - transient_buffer_->Scavenge(); - - // Try again. It may still fail if we didn't get enough space back. - offset = TryAllocateTransientData(length, fence); - return offset; -} - -VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize length, - VkFence fence) { - auto alloc = transient_buffer_->Acquire(length, fence); - if (alloc) { - return alloc->offset; - } - - // No more space. - return VK_WHOLE_SIZE; -} - -VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address, - uint32_t guest_length) { - if (transient_cache_.empty()) { - // Short-circuit exit. - return VK_WHOLE_SIZE; - } - - // Find the first element > guest_address - auto it = transient_cache_.upper_bound(guest_address); - if (it != transient_cache_.begin()) { - // it = first element <= guest_address - --it; - - if ((it->first + it->second.first) >= (guest_address + guest_length)) { - // This data is contained within some existing transient data. - auto source_offset = static_cast(guest_address - it->first); - return it->second.second + source_offset; - } - } - - return VK_WHOLE_SIZE; -} - -void BufferCache::CacheTransientData(uint32_t guest_address, - uint32_t guest_length, - VkDeviceSize offset) { - transient_cache_[guest_address] = {guest_length, offset}; - - // Erase any entries contained within - auto it = transient_cache_.upper_bound(guest_address); - while (it != transient_cache_.end()) { - if ((guest_address + guest_length) >= (it->first + it->second.first)) { - it = transient_cache_.erase(it); - } else { - break; - } - } -} - -void BufferCache::Flush(VkCommandBuffer command_buffer) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - // If we are flushing a big enough chunk queue up an event. - // We don't want to do this for everything but often enough so that we won't - // run out of space. - if (true) { - // VkEvent finish_event; - // dfn.vkCmdSetEvent(cmd_buffer, finish_event, - // VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - } - - // Flush memory. - // TODO(benvanik): subrange. - VkMappedMemoryRange dirty_range; - dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - dirty_range.pNext = nullptr; - dirty_range.memory = transient_buffer_->gpu_memory(); - dirty_range.offset = 0; - dirty_range.size = transient_buffer_->capacity(); - dfn.vkFlushMappedMemoryRanges(device, 1, &dirty_range); -} - -void BufferCache::InvalidateCache() { - // Called by VulkanCommandProcessor::MakeCoherent() - // Discard everything? - transient_cache_.clear(); -} - -void BufferCache::ClearCache() { transient_cache_.clear(); } - -void BufferCache::Scavenge() { - SCOPE_profile_cpu_f("gpu"); - - transient_cache_.clear(); - transient_buffer_->Scavenge(); - - // TODO(DrChat): These could persist across frames, we just need a smart way - // to delete unused ones. - vertex_sets_.clear(); - if (vertex_descriptor_pool_->has_open_batch()) { - vertex_descriptor_pool_->EndBatch(); - } - - vertex_descriptor_pool_->Scavenge(); -} - -} // namespace vulkan -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h deleted file mode 100644 index 449e23558..000000000 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ /dev/null @@ -1,175 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_ -#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_ - -#include "xenia/base/xxhash.h" -#include "xenia/gpu/register_file.h" -#include "xenia/gpu/shader.h" -#include "xenia/gpu/xenos.h" -#include "xenia/memory.h" -#include "xenia/ui/vulkan/circular_buffer.h" -#include "xenia/ui/vulkan/fenced_pools.h" -#include "xenia/ui/vulkan/vulkan_mem_alloc.h" -#include "xenia/ui/vulkan/vulkan_provider.h" - -#include -#include - -namespace xe { -namespace gpu { -namespace vulkan { - -// Efficiently manages buffers of various kinds. -// Used primarily for uploading index and vertex data from guest memory and -// transient data like shader constants. -class BufferCache { - public: - BufferCache(RegisterFile* register_file, Memory* memory, - const ui::vulkan::VulkanProvider& provider, size_t capacity); - ~BufferCache(); - - VkResult Initialize(); - void Shutdown(); - - // Descriptor set containing the dynamic uniform buffer used for constant - // uploads. Used in conjunction with a dynamic offset returned by - // UploadConstantRegisters. - // The set contains two bindings: - // binding = 0: for use in vertex shaders - // binding = 1: for use in fragment shaders - VkDescriptorSet constant_descriptor_set() const { - return constant_descriptor_set_; - } - VkDescriptorSetLayout constant_descriptor_set_layout() const { - return constant_descriptor_set_layout_; - } - - // Descriptor set containing vertex buffers stored in storage buffers. - // This set contains one binding with an array of 32 storage buffers. - VkDescriptorSetLayout vertex_descriptor_set_layout() const { - return vertex_descriptor_set_layout_; - } - - // Uploads the constants specified in the register maps to the transient - // uniform storage buffer. - // The registers are tightly packed in order as [floats, ints, bools]. - // Returns an offset that can be used with the transient_descriptor_set or - // VK_WHOLE_SIZE if the constants could not be uploaded (OOM). - // The returned offsets may alias. - std::pair UploadConstantRegisters( - VkCommandBuffer command_buffer, - const Shader::ConstantRegisterMap& vertex_constant_register_map, - const Shader::ConstantRegisterMap& pixel_constant_register_map, - VkFence fence); - - // Uploads index buffer data from guest memory, possibly eliding with - // recently uploaded data or cached copies. - // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer. - // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). - std::pair UploadIndexBuffer( - VkCommandBuffer command_buffer, uint32_t source_addr, - uint32_t source_length, xenos::IndexFormat format, VkFence fence); - - // Uploads vertex buffer data from guest memory, possibly eliding with - // recently uploaded data or cached copies. - // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. - // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). - std::pair UploadVertexBuffer( - VkCommandBuffer command_buffer, uint32_t source_addr, - uint32_t source_length, xenos::Endian endian, VkFence fence); - - // Prepares and returns a vertex descriptor set. - VkDescriptorSet PrepareVertexSet( - VkCommandBuffer setup_buffer, VkFence fence, - const std::vector& vertex_bindings); - - // Flushes all pending data to the GPU. - // Until this is called the GPU is not guaranteed to see any data. - // The given command buffer will be used to queue up events so that the - // cache can determine when data has been consumed. - void Flush(VkCommandBuffer command_buffer); - - // Marks the cache as potentially invalid. - // This is not as strong as ClearCache and is a hint that any and all data - // should be verified before being reused. - void InvalidateCache(); - - // Clears all cached content and prevents future elision with pending data. - void ClearCache(); - - // Wipes all data no longer needed. - void Scavenge(); - - private: - // This represents an uploaded vertex buffer. - struct VertexBuffer { - uint32_t guest_address; - uint32_t size; - - VmaAllocation alloc; - VmaAllocationInfo alloc_info; - }; - - VkResult CreateVertexDescriptorPool(); - void FreeVertexDescriptorPool(); - - VkResult CreateConstantDescriptorSet(); - void FreeConstantDescriptorSet(); - - void HashVertexBindings( - XXH3_state_t* hash_state, - const std::vector& vertex_bindings); - - // Allocates a block of memory in the transient buffer. - // When memory is not available fences are checked and space is reclaimed. - // Returns VK_WHOLE_SIZE if requested amount of memory is not available. - VkDeviceSize AllocateTransientData(VkDeviceSize length, VkFence fence); - // Tries to allocate a block of memory in the transient buffer. - // Returns VK_WHOLE_SIZE if requested amount of memory is not available. - VkDeviceSize TryAllocateTransientData(VkDeviceSize length, VkFence fence); - // Finds a block of data in the transient buffer sourced from the specified - // guest address and length. - VkDeviceSize FindCachedTransientData(uint32_t guest_address, - uint32_t guest_length); - // Adds a block of data to the frame cache. - void CacheTransientData(uint32_t guest_address, uint32_t guest_length, - VkDeviceSize offset); - - RegisterFile* register_file_ = nullptr; - Memory* memory_ = nullptr; - const ui::vulkan::VulkanProvider& provider_; - - VkDeviceMemory gpu_memory_pool_ = nullptr; - VmaAllocator mem_allocator_ = nullptr; - - // Staging ringbuffer we cycle through fast. Used for data we don't - // plan on keeping past the current frame. - std::unique_ptr transient_buffer_ = nullptr; - std::map> transient_cache_; - - // Vertex buffer descriptors - std::unique_ptr vertex_descriptor_pool_ = nullptr; - VkDescriptorSetLayout vertex_descriptor_set_layout_ = nullptr; - - // Current frame vertex sets. - std::unordered_map vertex_sets_; - - // Descriptor set used to hold vertex/pixel shader float constants - VkDescriptorPool constant_descriptor_pool_ = nullptr; - VkDescriptorSetLayout constant_descriptor_set_layout_ = nullptr; - VkDescriptorSet constant_descriptor_set_ = nullptr; -}; - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_VULKAN_BUFFER_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc new file mode 100644 index 000000000..65c80cf23 --- /dev/null +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -0,0 +1,367 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/deferred_command_buffer.h" + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +DeferredCommandBuffer::DeferredCommandBuffer( + const VulkanCommandProcessor& command_processor, size_t initial_size) + : command_processor_(command_processor) { + command_stream_.reserve(initial_size / sizeof(uintmax_t)); +} + +void DeferredCommandBuffer::Reset() { command_stream_.clear(); } + +void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = + command_processor_.GetVulkanProvider().dfn(); + const uintmax_t* stream = command_stream_.data(); + size_t stream_remaining = command_stream_.size(); + while (stream_remaining) { + const CommandHeader& header = + *reinterpret_cast(stream); + stream += kCommandHeaderSizeElements; + stream_remaining -= kCommandHeaderSizeElements; + + switch (header.command) { + case Command::kVkBeginRenderPass: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = sizeof(ArgsVkBeginRenderPass); + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = args.render_pass; + render_pass_begin_info.framebuffer = args.framebuffer; + render_pass_begin_info.renderArea = args.render_area; + render_pass_begin_info.clearValueCount = args.clear_value_count; + if (render_pass_begin_info.clearValueCount) { + offset_bytes = xe::align(offset_bytes, alignof(VkClearValue)); + render_pass_begin_info.pClearValues = + reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes += + sizeof(VkClearValue) * render_pass_begin_info.clearValueCount; + } else { + render_pass_begin_info.pClearValues = nullptr; + } + dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info, + args.contents); + } break; + + case Command::kVkBindDescriptorSets: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = xe::align(sizeof(ArgsVkBindDescriptorSets), + alignof(VkDescriptorSet)); + const VkDescriptorSet* descriptor_sets = + reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes += sizeof(VkDescriptorSet) * args.descriptor_set_count; + const uint32_t* dynamic_offsets = nullptr; + if (args.dynamic_offset_count) { + offset_bytes = xe::align(offset_bytes, alignof(uint32_t)); + dynamic_offsets = reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes += sizeof(uint32_t) * args.dynamic_offset_count; + } + dfn.vkCmdBindDescriptorSets(command_buffer, args.pipeline_bind_point, + args.layout, args.first_set, + args.descriptor_set_count, descriptor_sets, + args.dynamic_offset_count, dynamic_offsets); + } break; + + case Command::kVkBindIndexBuffer: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset, + args.index_type); + } break; + + case Command::kVkBindPipeline: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdBindPipeline(command_buffer, args.pipeline_bind_point, + args.pipeline); + } break; + + case Command::kVkBindVertexBuffers: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = + xe::align(sizeof(ArgsVkBindVertexBuffers), alignof(VkBuffer)); + const VkBuffer* buffers = reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes = + xe::align(offset_bytes + sizeof(VkBuffer) * args.binding_count, + alignof(VkDeviceSize)); + const VkDeviceSize* offsets = reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + dfn.vkCmdBindVertexBuffers(command_buffer, args.first_binding, + args.binding_count, buffers, offsets); + } break; + + case Command::kVkClearAttachments: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = xe::align(sizeof(ArgsVkClearAttachments), + alignof(VkClearAttachment)); + const VkClearAttachment* attachments = + reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes = xe::align( + offset_bytes + sizeof(VkClearAttachment) * args.attachment_count, + alignof(VkClearRect)); + const VkClearRect* rects = reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + dfn.vkCmdClearAttachments(command_buffer, args.attachment_count, + attachments, args.rect_count, rects); + } break; + + case Command::kVkClearColorImage: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdClearColorImage( + command_buffer, args.image, args.image_layout, &args.color, + args.range_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkClearColorImage), + alignof(VkImageSubresourceRange)))); + } break; + + case Command::kVkCopyBuffer: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdCopyBuffer( + command_buffer, args.src_buffer, args.dst_buffer, args.region_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)))); + } break; + + case Command::kVkCopyBufferToImage: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdCopyBufferToImage( + command_buffer, args.src_buffer, args.dst_image, + args.dst_image_layout, args.region_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkCopyBufferToImage), + alignof(VkBufferImageCopy)))); + } break; + + case Command::kVkDispatch: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdDispatch(command_buffer, args.group_count_x, + args.group_count_y, args.group_count_z); + } break; + + case Command::kVkDraw: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdDraw(command_buffer, args.vertex_count, args.instance_count, + args.first_vertex, args.first_instance); + } break; + + case Command::kVkDrawIndexed: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdDrawIndexed(command_buffer, args.index_count, + args.instance_count, args.first_index, + args.vertex_offset, args.first_instance); + } break; + + case Command::kVkEndRenderPass: + dfn.vkCmdEndRenderPass(command_buffer); + break; + + case Command::kVkPipelineBarrier: { + auto& args = *reinterpret_cast(stream); + size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier); + const VkMemoryBarrier* memory_barriers = nullptr; + if (args.memory_barrier_count) { + barrier_offset_bytes = + xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier)); + memory_barriers = reinterpret_cast( + reinterpret_cast(stream) + barrier_offset_bytes); + barrier_offset_bytes += + sizeof(VkMemoryBarrier) * args.memory_barrier_count; + } + const VkBufferMemoryBarrier* buffer_memory_barriers = nullptr; + if (args.buffer_memory_barrier_count) { + barrier_offset_bytes = + xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier)); + buffer_memory_barriers = + reinterpret_cast( + reinterpret_cast(stream) + + barrier_offset_bytes); + barrier_offset_bytes += + sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count; + } + const VkImageMemoryBarrier* image_memory_barriers = nullptr; + if (args.image_memory_barrier_count) { + barrier_offset_bytes = + xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier)); + image_memory_barriers = reinterpret_cast( + reinterpret_cast(stream) + barrier_offset_bytes); + barrier_offset_bytes += + sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count; + } + dfn.vkCmdPipelineBarrier( + command_buffer, args.src_stage_mask, args.dst_stage_mask, + args.dependency_flags, args.memory_barrier_count, memory_barriers, + args.buffer_memory_barrier_count, buffer_memory_barriers, + args.image_memory_barrier_count, image_memory_barriers); + } break; + + case Command::kVkPushConstants: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdPushConstants(command_buffer, args.layout, args.stage_flags, + args.offset, args.size, + reinterpret_cast(stream) + + sizeof(ArgsVkPushConstants)); + } break; + + case Command::kVkSetBlendConstants: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetBlendConstants(command_buffer, args.blend_constants); + } break; + + case Command::kVkSetDepthBias: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetDepthBias(command_buffer, args.depth_bias_constant_factor, + args.depth_bias_clamp, + args.depth_bias_slope_factor); + } break; + + case Command::kVkSetScissor: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetScissor( + command_buffer, args.first_scissor, args.scissor_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D)))); + } break; + + case Command::kVkSetStencilCompareMask: { + auto& args = + *reinterpret_cast(stream); + dfn.vkCmdSetStencilCompareMask(command_buffer, args.face_mask, + args.mask_reference); + } break; + + case Command::kVkSetStencilReference: { + auto& args = + *reinterpret_cast(stream); + dfn.vkCmdSetStencilReference(command_buffer, args.face_mask, + args.mask_reference); + } break; + + case Command::kVkSetStencilWriteMask: { + auto& args = + *reinterpret_cast(stream); + dfn.vkCmdSetStencilWriteMask(command_buffer, args.face_mask, + args.mask_reference); + } break; + + case Command::kVkSetViewport: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetViewport( + command_buffer, args.first_viewport, args.viewport_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport)))); + } break; + + default: + assert_unhandled_case(header.command); + break; + } + + stream += header.arguments_size_elements; + stream_remaining -= header.arguments_size_elements; + } +} + +void DeferredCommandBuffer::CmdVkPipelineBarrier( + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, uint32_t memory_barrier_count, + const VkMemoryBarrier* memory_barriers, + uint32_t buffer_memory_barrier_count, + const VkBufferMemoryBarrier* buffer_memory_barriers, + uint32_t image_memory_barrier_count, + const VkImageMemoryBarrier* image_memory_barriers) { + size_t arguments_size = sizeof(ArgsVkPipelineBarrier); + size_t memory_barriers_offset = 0; + if (memory_barrier_count) { + arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier)); + memory_barriers_offset = arguments_size; + arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count; + } + size_t buffer_memory_barriers_offset = 0; + if (buffer_memory_barrier_count) { + arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier)); + buffer_memory_barriers_offset = arguments_size; + arguments_size += + sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count; + } + size_t image_memory_barriers_offset = 0; + if (image_memory_barrier_count) { + arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier)); + image_memory_barriers_offset = arguments_size; + arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count; + } + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkPipelineBarrier, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.src_stage_mask = src_stage_mask; + args.dst_stage_mask = dst_stage_mask; + args.dependency_flags = dependency_flags; + args.memory_barrier_count = memory_barrier_count; + args.buffer_memory_barrier_count = buffer_memory_barrier_count; + args.image_memory_barrier_count = image_memory_barrier_count; + if (memory_barrier_count) { + std::memcpy(args_ptr + memory_barriers_offset, memory_barriers, + sizeof(VkMemoryBarrier) * memory_barrier_count); + } + if (buffer_memory_barrier_count) { + std::memcpy(args_ptr + buffer_memory_barriers_offset, + buffer_memory_barriers, + sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count); + } + if (image_memory_barrier_count) { + std::memcpy(args_ptr + image_memory_barriers_offset, image_memory_barriers, + sizeof(VkImageMemoryBarrier) * image_memory_barrier_count); + } +} + +void* DeferredCommandBuffer::WriteCommand(Command command, + size_t arguments_size_bytes) { + size_t arguments_size_elements = + (arguments_size_bytes + sizeof(uintmax_t) - 1) / sizeof(uintmax_t); + size_t offset = command_stream_.size(); + command_stream_.resize(offset + kCommandHeaderSizeElements + + arguments_size_elements); + CommandHeader& header = + *reinterpret_cast(command_stream_.data() + offset); + header.command = command; + header.arguments_size_elements = uint32_t(arguments_size_elements); + return command_stream_.data() + (offset + kCommandHeaderSizeElements); +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h new file mode 100644 index 000000000..186639c86 --- /dev/null +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -0,0 +1,550 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_ +#define XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_ + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/math.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class DeferredCommandBuffer { + public: + DeferredCommandBuffer(const VulkanCommandProcessor& command_processor, + size_t initial_size_bytes = 1024 * 1024); + + void Reset(); + void Execute(VkCommandBuffer command_buffer); + + // render_pass_begin->pNext of all barriers must be null. + void CmdVkBeginRenderPass(const VkRenderPassBeginInfo* render_pass_begin, + VkSubpassContents contents) { + assert_null(render_pass_begin->pNext); + size_t arguments_size = sizeof(ArgsVkBeginRenderPass); + uint32_t clear_value_count = render_pass_begin->clearValueCount; + size_t clear_values_offset = 0; + if (clear_value_count) { + arguments_size = xe::align(arguments_size, alignof(VkClearValue)); + clear_values_offset = arguments_size; + arguments_size += sizeof(VkClearValue) * clear_value_count; + } + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkBeginRenderPass, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.render_pass = render_pass_begin->renderPass; + args.framebuffer = render_pass_begin->framebuffer; + args.render_area = render_pass_begin->renderArea; + args.clear_value_count = clear_value_count; + args.contents = contents; + if (clear_value_count) { + std::memcpy(args_ptr + clear_values_offset, + render_pass_begin->pClearValues, + sizeof(VkClearValue) * clear_value_count); + } + } + + void CmdVkBindDescriptorSets(VkPipelineBindPoint pipeline_bind_point, + VkPipelineLayout layout, uint32_t first_set, + uint32_t descriptor_set_count, + const VkDescriptorSet* descriptor_sets, + uint32_t dynamic_offset_count, + const uint32_t* dynamic_offsets) { + size_t arguments_size = + xe::align(sizeof(ArgsVkBindDescriptorSets), alignof(VkDescriptorSet)); + size_t descriptor_sets_offset = arguments_size; + arguments_size += sizeof(VkDescriptorSet) * descriptor_set_count; + size_t dynamic_offsets_offset = 0; + if (dynamic_offset_count) { + arguments_size = xe::align(arguments_size, alignof(uint32_t)); + dynamic_offsets_offset = arguments_size; + arguments_size += sizeof(uint32_t) * dynamic_offset_count; + } + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkBindDescriptorSets, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.pipeline_bind_point = pipeline_bind_point; + args.layout = layout; + args.first_set = first_set; + args.descriptor_set_count = descriptor_set_count; + args.dynamic_offset_count = dynamic_offset_count; + std::memcpy(args_ptr + descriptor_sets_offset, descriptor_sets, + sizeof(VkDescriptorSet) * descriptor_set_count); + if (dynamic_offset_count) { + std::memcpy(args_ptr + dynamic_offsets_offset, dynamic_offsets, + sizeof(uint32_t) * dynamic_offset_count); + } + } + + void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, + VkIndexType index_type) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkBindIndexBuffer, sizeof(ArgsVkBindIndexBuffer))); + args.buffer = buffer; + args.offset = offset; + args.index_type = index_type; + } + + void CmdVkBindPipeline(VkPipelineBindPoint pipeline_bind_point, + VkPipeline pipeline) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkBindPipeline, sizeof(ArgsVkBindPipeline))); + args.pipeline_bind_point = pipeline_bind_point; + args.pipeline = pipeline; + } + + void CmdVkBindVertexBuffers(uint32_t first_binding, uint32_t binding_count, + const VkBuffer* buffers, + const VkDeviceSize* offsets) { + size_t arguments_size = + xe::align(sizeof(ArgsVkBindVertexBuffers), alignof(VkBuffer)); + size_t buffers_offset = arguments_size; + arguments_size = + xe::align(arguments_size + sizeof(VkBuffer) * binding_count, + alignof(VkDeviceSize)); + size_t offsets_offset = arguments_size; + arguments_size += sizeof(VkDeviceSize) * binding_count; + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkBindVertexBuffers, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.first_binding = first_binding; + args.binding_count = binding_count; + std::memcpy(args_ptr + buffers_offset, buffers, + sizeof(VkBuffer) * binding_count); + std::memcpy(args_ptr + offsets_offset, offsets, + sizeof(VkDeviceSize) * binding_count); + } + + void CmdClearAttachmentsEmplace(uint32_t attachment_count, + VkClearAttachment*& attachments_out, + uint32_t rect_count, + VkClearRect*& rects_out) { + size_t arguments_size = + xe::align(sizeof(ArgsVkClearAttachments), alignof(VkClearAttachment)); + size_t attachments_offset = arguments_size; + arguments_size = + xe::align(arguments_size + sizeof(VkClearAttachment) * attachment_count, + alignof(VkClearRect)); + size_t rects_offset = arguments_size; + arguments_size += sizeof(VkClearRect) * rect_count; + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkClearAttachments, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.attachment_count = attachment_count; + args.rect_count = rect_count; + attachments_out = + reinterpret_cast(args_ptr + attachments_offset); + rects_out = reinterpret_cast(args_ptr + rects_offset); + } + void CmdVkClearAttachments(uint32_t attachment_count, + const VkClearAttachment* attachments, + uint32_t rect_count, const VkClearRect* rects) { + VkClearAttachment* attachments_arg; + VkClearRect* rects_arg; + CmdClearAttachmentsEmplace(attachment_count, attachments_arg, rect_count, + rects_arg); + std::memcpy(attachments_arg, attachments, + sizeof(VkClearAttachment) * attachment_count); + std::memcpy(rects_arg, rects, sizeof(VkClearRect) * rect_count); + } + + VkImageSubresourceRange* CmdClearColorImageEmplace( + VkImage image, VkImageLayout image_layout, const VkClearColorValue* color, + uint32_t range_count) { + const size_t header_size = xe::align(sizeof(ArgsVkClearColorImage), + alignof(VkImageSubresourceRange)); + uint8_t* args_ptr = reinterpret_cast(WriteCommand( + Command::kVkClearColorImage, + header_size + sizeof(VkImageSubresourceRange) * range_count)); + auto& args = *reinterpret_cast(args_ptr); + args.image = image; + args.image_layout = image_layout; + args.color = *color; + args.range_count = range_count; + return reinterpret_cast(args_ptr + header_size); + } + void CmdVkClearColorImage(VkImage image, VkImageLayout image_layout, + const VkClearColorValue* color, + uint32_t range_count, + const VkImageSubresourceRange* ranges) { + std::memcpy( + CmdClearColorImageEmplace(image, image_layout, color, range_count), + ranges, sizeof(VkImageSubresourceRange) * range_count); + } + + VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer, + uint32_t region_count) { + const size_t header_size = + xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkCopyBuffer, + header_size + sizeof(VkBufferCopy) * region_count)); + auto& args = *reinterpret_cast(args_ptr); + args.src_buffer = src_buffer; + args.dst_buffer = dst_buffer; + args.region_count = region_count; + return reinterpret_cast(args_ptr + header_size); + } + void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, + uint32_t region_count, const VkBufferCopy* regions) { + std::memcpy(CmdCopyBufferEmplace(src_buffer, dst_buffer, region_count), + regions, sizeof(VkBufferCopy) * region_count); + } + + VkBufferImageCopy* CmdCopyBufferToImageEmplace(VkBuffer src_buffer, + VkImage dst_image, + VkImageLayout dst_image_layout, + uint32_t region_count) { + const size_t header_size = + xe::align(sizeof(ArgsVkCopyBufferToImage), alignof(VkBufferImageCopy)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkCopyBufferToImage, + header_size + sizeof(VkBufferImageCopy) * region_count)); + auto& args = *reinterpret_cast(args_ptr); + args.src_buffer = src_buffer; + args.dst_image = dst_image; + args.dst_image_layout = dst_image_layout; + args.region_count = region_count; + return reinterpret_cast(args_ptr + header_size); + } + void CmdVkCopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, + VkImageLayout dst_image_layout, + uint32_t region_count, + const VkBufferImageCopy* regions) { + std::memcpy(CmdCopyBufferToImageEmplace(src_buffer, dst_image, + dst_image_layout, region_count), + regions, sizeof(VkBufferImageCopy) * region_count); + } + + void CmdVkDispatch(uint32_t group_count_x, uint32_t group_count_y, + uint32_t group_count_z) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkDispatch, sizeof(ArgsVkDispatch))); + args.group_count_x = group_count_x; + args.group_count_y = group_count_y; + args.group_count_z = group_count_z; + } + + void CmdVkDraw(uint32_t vertex_count, uint32_t instance_count, + uint32_t first_vertex, uint32_t first_instance) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkDraw, sizeof(ArgsVkDraw))); + args.vertex_count = vertex_count; + args.instance_count = instance_count; + args.first_vertex = first_vertex; + args.first_instance = first_instance; + } + + void CmdVkDrawIndexed(uint32_t index_count, uint32_t instance_count, + uint32_t first_index, int32_t vertex_offset, + uint32_t first_instance) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkDrawIndexed, sizeof(ArgsVkDrawIndexed))); + args.index_count = index_count; + args.instance_count = instance_count; + args.first_index = first_index; + args.vertex_offset = vertex_offset; + args.first_instance = first_instance; + } + + void CmdVkEndRenderPass() { WriteCommand(Command::kVkEndRenderPass, 0); } + + // pNext of all barriers must be null. + void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask, + VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + uint32_t memory_barrier_count, + const VkMemoryBarrier* memory_barriers, + uint32_t buffer_memory_barrier_count, + const VkBufferMemoryBarrier* buffer_memory_barriers, + uint32_t image_memory_barrier_count, + const VkImageMemoryBarrier* image_memory_barriers); + + void CmdVkPushConstants(VkPipelineLayout layout, + VkShaderStageFlags stage_flags, uint32_t offset, + uint32_t size, const void* values) { + uint8_t* args_ptr = reinterpret_cast(WriteCommand( + Command::kVkPushConstants, sizeof(ArgsVkPushConstants) + size)); + auto& args = *reinterpret_cast(args_ptr); + args.layout = layout; + args.stage_flags = stage_flags; + args.offset = offset; + args.size = size; + std::memcpy(args_ptr + sizeof(ArgsVkPushConstants), values, size); + } + + void CmdVkSetBlendConstants(const float* blend_constants) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkSetBlendConstants, sizeof(ArgsVkSetBlendConstants))); + std::memcpy(args.blend_constants, blend_constants, sizeof(float) * 4); + } + + void CmdVkSetDepthBias(float depth_bias_constant_factor, + float depth_bias_clamp, + float depth_bias_slope_factor) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkSetDepthBias, sizeof(ArgsVkSetDepthBias))); + args.depth_bias_constant_factor = depth_bias_constant_factor; + args.depth_bias_clamp = depth_bias_clamp; + args.depth_bias_slope_factor = depth_bias_slope_factor; + } + + void CmdVkSetScissor(uint32_t first_scissor, uint32_t scissor_count, + const VkRect2D* scissors) { + const size_t header_size = + xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkSetScissor, + header_size + sizeof(VkRect2D) * scissor_count)); + auto& args = *reinterpret_cast(args_ptr); + args.first_scissor = first_scissor; + args.scissor_count = scissor_count; + std::memcpy(args_ptr + header_size, scissors, + sizeof(VkRect2D) * scissor_count); + } + + void CmdVkSetStencilCompareMask(VkStencilFaceFlags face_mask, + uint32_t compare_mask) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkSetStencilCompareMask, + sizeof(ArgsSetStencilMaskReference))); + args.face_mask = face_mask; + args.mask_reference = compare_mask; + } + + void CmdVkSetStencilReference(VkStencilFaceFlags face_mask, + uint32_t reference) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkSetStencilReference, sizeof(ArgsSetStencilMaskReference))); + args.face_mask = face_mask; + args.mask_reference = reference; + } + + void CmdVkSetStencilWriteMask(VkStencilFaceFlags face_mask, + uint32_t write_mask) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkSetStencilWriteMask, sizeof(ArgsSetStencilMaskReference))); + args.face_mask = face_mask; + args.mask_reference = write_mask; + } + + void CmdVkSetViewport(uint32_t first_viewport, uint32_t viewport_count, + const VkViewport* viewports) { + const size_t header_size = + xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkSetViewport, + header_size + sizeof(VkViewport) * viewport_count)); + auto& args = *reinterpret_cast(args_ptr); + args.first_viewport = first_viewport; + args.viewport_count = viewport_count; + std::memcpy(args_ptr + header_size, viewports, + sizeof(VkViewport) * viewport_count); + } + + private: + enum class Command { + kVkBeginRenderPass, + kVkBindDescriptorSets, + kVkBindIndexBuffer, + kVkBindPipeline, + kVkBindVertexBuffers, + kVkClearAttachments, + kVkClearColorImage, + kVkCopyBuffer, + kVkCopyBufferToImage, + kVkDispatch, + kVkDraw, + kVkDrawIndexed, + kVkEndRenderPass, + kVkPipelineBarrier, + kVkPushConstants, + kVkSetBlendConstants, + kVkSetDepthBias, + kVkSetScissor, + kVkSetStencilCompareMask, + kVkSetStencilReference, + kVkSetStencilWriteMask, + kVkSetViewport, + }; + + struct CommandHeader { + Command command; + uint32_t arguments_size_elements; + }; + static constexpr size_t kCommandHeaderSizeElements = + (sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t); + + struct ArgsVkBeginRenderPass { + VkRenderPass render_pass; + VkFramebuffer framebuffer; + VkRect2D render_area; + uint32_t clear_value_count; + VkSubpassContents contents; + // Followed by aligned optional VkClearValue[]. + static_assert(alignof(VkClearValue) <= alignof(uintmax_t)); + }; + + struct ArgsVkBindDescriptorSets { + VkPipelineBindPoint pipeline_bind_point; + VkPipelineLayout layout; + uint32_t first_set; + uint32_t descriptor_set_count; + uint32_t dynamic_offset_count; + // Followed by aligned VkDescriptorSet[], optional uint32_t[]. + static_assert(alignof(VkDescriptorSet) <= alignof(uintmax_t)); + }; + + struct ArgsVkBindIndexBuffer { + VkBuffer buffer; + VkDeviceSize offset; + VkIndexType index_type; + }; + + struct ArgsVkBindPipeline { + VkPipelineBindPoint pipeline_bind_point; + VkPipeline pipeline; + }; + + struct ArgsVkBindVertexBuffers { + uint32_t first_binding; + uint32_t binding_count; + // Followed by aligned VkBuffer[], VkDeviceSize[]. + static_assert(alignof(VkBuffer) <= alignof(uintmax_t)); + static_assert(alignof(VkDeviceSize) <= alignof(uintmax_t)); + }; + + struct ArgsVkClearAttachments { + uint32_t attachment_count; + uint32_t rect_count; + // Followed by aligned VkClearAttachment[], VkClearRect[]. + static_assert(alignof(VkClearAttachment) <= alignof(uintmax_t)); + static_assert(alignof(VkClearRect) <= alignof(uintmax_t)); + }; + + struct ArgsVkClearColorImage { + VkImage image; + VkImageLayout image_layout; + VkClearColorValue color; + uint32_t range_count; + // Followed by aligned VkImageSubresourceRange[]. + static_assert(alignof(VkImageSubresourceRange) <= alignof(uintmax_t)); + }; + + struct ArgsVkCopyBuffer { + VkBuffer src_buffer; + VkBuffer dst_buffer; + uint32_t region_count; + // Followed by aligned VkBufferCopy[]. + static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); + }; + + struct ArgsVkCopyBufferToImage { + VkBuffer src_buffer; + VkImage dst_image; + VkImageLayout dst_image_layout; + uint32_t region_count; + // Followed by aligned VkBufferImageCopy[]. + static_assert(alignof(VkBufferImageCopy) <= alignof(uintmax_t)); + }; + + struct ArgsVkDispatch { + uint32_t group_count_x; + uint32_t group_count_y; + uint32_t group_count_z; + }; + + struct ArgsVkDraw { + uint32_t vertex_count; + uint32_t instance_count; + uint32_t first_vertex; + uint32_t first_instance; + }; + + struct ArgsVkDrawIndexed { + uint32_t index_count; + uint32_t instance_count; + uint32_t first_index; + int32_t vertex_offset; + uint32_t first_instance; + }; + + struct ArgsVkPipelineBarrier { + VkPipelineStageFlags src_stage_mask; + VkPipelineStageFlags dst_stage_mask; + VkDependencyFlags dependency_flags; + uint32_t memory_barrier_count; + uint32_t buffer_memory_barrier_count; + uint32_t image_memory_barrier_count; + // Followed by aligned optional VkMemoryBarrier[], + // optional VkBufferMemoryBarrier[], optional VkImageMemoryBarrier[]. + static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); + static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); + static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); + }; + + struct ArgsVkPushConstants { + VkPipelineLayout layout; + VkShaderStageFlags stage_flags; + uint32_t offset; + uint32_t size; + // Followed by `size` bytes of values. + }; + + struct ArgsVkSetBlendConstants { + float blend_constants[4]; + }; + + struct ArgsVkSetDepthBias { + float depth_bias_constant_factor; + float depth_bias_clamp; + float depth_bias_slope_factor; + }; + + struct ArgsVkSetScissor { + uint32_t first_scissor; + uint32_t scissor_count; + // Followed by aligned VkRect2D[]. + static_assert(alignof(VkRect2D) <= alignof(uintmax_t)); + }; + + struct ArgsSetStencilMaskReference { + VkStencilFaceFlags face_mask; + uint32_t mask_reference; + }; + + struct ArgsVkSetViewport { + uint32_t first_viewport; + uint32_t viewport_count; + // Followed by aligned VkViewport[]. + static_assert(alignof(VkViewport) <= alignof(uintmax_t)); + }; + + void* WriteCommand(Command command, size_t arguments_size_bytes); + + const VulkanCommandProcessor& command_processor_; + + // uintmax_t to ensure uint64_t and pointer alignment of all structures. + std::vector command_stream_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_ \ No newline at end of file diff --git a/src/xenia/gpu/vulkan/premake5.lua b/src/xenia/gpu/vulkan/premake5.lua index 0fa0169e3..9c3c83c84 100644 --- a/src/xenia/gpu/vulkan/premake5.lua +++ b/src/xenia/gpu/vulkan/premake5.lua @@ -8,10 +8,10 @@ project("xenia-gpu-vulkan") language("C++") links({ "fmt", + "glslang-spirv", "xenia-base", "xenia-gpu", "xenia-ui", - "xenia-ui-spirv", "xenia-ui-vulkan", "xxhash", }) @@ -20,10 +20,9 @@ project("xenia-gpu-vulkan") }) local_platform_files() files({ - "shaders/bytecode/vulkan_spirv/*.h", + "../shaders/bytecode/vulkan_spirv/*.h", }) --- TODO(benvanik): kill this and move to the debugger UI. group("src") project("xenia-gpu-vulkan-trace-viewer") uuid("86a1dddc-a26a-4885-8c55-cf745225d93e") @@ -43,7 +42,6 @@ project("xenia-gpu-vulkan-trace-viewer") "xenia-kernel", "xenia-patcher", "xenia-ui", - "xenia-ui-spirv", "xenia-ui-vulkan", "xenia-vfs", "xenia-patcher", @@ -58,7 +56,6 @@ project("xenia-gpu-vulkan-trace-viewer") "libavutil", "mspack", "snappy", - "spirv-tools", "xxhash", }) includedirs({ @@ -77,12 +74,6 @@ project("xenia-gpu-vulkan-trace-viewer") }) filter("platforms:Windows") - links({ - "xenia-apu-xaudio2", - "xenia-hid-winkey", - "xenia-hid-xinput", - }) - -- Only create the .user file if it doesn't already exist. local user_file = project_root.."/build/xenia-gpu-vulkan-trace-viewer.vcxproj.user" if not os.isfile(user_file) then @@ -111,7 +102,6 @@ project("xenia-gpu-vulkan-trace-dump") "xenia-hid-nop", "xenia-kernel", "xenia-ui", - "xenia-ui-spirv", "xenia-ui-vulkan", "xenia-vfs", "xenia-patcher", @@ -126,7 +116,6 @@ project("xenia-gpu-vulkan-trace-dump") "libavutil", "mspack", "snappy", - "spirv-tools", "xxhash", }) includedirs({ diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc deleted file mode 100644 index 5b15a304b..000000000 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ /dev/null @@ -1,1469 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/render_cache.h" - -#include - -#include "third_party/fmt/include/fmt/format.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/memory.h" -#include "xenia/base/profiling.h" -#include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/registers.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -using namespace xe::gpu::xenos; -using xe::ui::vulkan::util::CheckResult; - -constexpr uint32_t kEdramBufferCapacity = 10 * 1024 * 1024; - -xenos::ColorRenderTargetFormat GetBaseRTFormat( - xenos::ColorRenderTargetFormat format) { - switch (format) { - case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: - return xenos::ColorRenderTargetFormat::k_8_8_8_8; - case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: - return xenos::ColorRenderTargetFormat::k_2_10_10_10; - case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: - return xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT; - default: - return format; - } -} - -VkFormat ColorRenderTargetFormatToVkFormat( - xenos::ColorRenderTargetFormat format) { - switch (format) { - case xenos::ColorRenderTargetFormat::k_8_8_8_8: - case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: - return VK_FORMAT_R8G8B8A8_UNORM; - case xenos::ColorRenderTargetFormat::k_2_10_10_10: - case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: - return VK_FORMAT_A2R10G10B10_UNORM_PACK32; - case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: - case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SFLOAT; - case xenos::ColorRenderTargetFormat::k_16_16: - return VK_FORMAT_R16G16_UNORM; - case xenos::ColorRenderTargetFormat::k_16_16_16_16: - return VK_FORMAT_R16G16B16A16_UNORM; - case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: - return VK_FORMAT_R16G16_SFLOAT; - case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: - return VK_FORMAT_R16G16B16A16_SFLOAT; - case xenos::ColorRenderTargetFormat::k_32_FLOAT: - return VK_FORMAT_R32_SFLOAT; - case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: - return VK_FORMAT_R32G32_SFLOAT; - default: - assert_unhandled_case(key.edram_format); - return VK_FORMAT_UNDEFINED; - } -} - -VkFormat DepthRenderTargetFormatToVkFormat( - xenos::DepthRenderTargetFormat format) { - switch (format) { - case xenos::DepthRenderTargetFormat::kD24S8: - return VK_FORMAT_D24_UNORM_S8_UINT; - case xenos::DepthRenderTargetFormat::kD24FS8: - // Vulkan doesn't support 24-bit floats, so just promote it to 32-bit - return VK_FORMAT_D32_SFLOAT_S8_UINT; - default: - return VK_FORMAT_UNDEFINED; - } -} - -// Cached framebuffer referencing tile attachments. -// Each framebuffer is specific to a render pass. Ugh. -class CachedFramebuffer { - public: - // TODO(benvanik): optimized key? tile base + format for each? - - // Framebuffer with the attachments ready for use in the parent render pass. - VkFramebuffer handle = nullptr; - // Width of the framebuffer in pixels. - uint32_t width = 0; - // Height of the framebuffer in pixels. - uint32_t height = 0; - // References to color attachments, if used. - CachedTileView* color_attachments[4] = {nullptr}; - // Reference to depth/stencil attachment, if used. - CachedTileView* depth_stencil_attachment = nullptr; - // Associated render pass - VkRenderPass render_pass = nullptr; - - CachedFramebuffer(const ui::vulkan::VulkanProvider& provider, - VkRenderPass render_pass, uint32_t surface_width, - uint32_t surface_height, - CachedTileView* target_color_attachments[4], - CachedTileView* target_depth_stencil_attachment); - ~CachedFramebuffer(); - - VkResult Initialize(); - - bool IsCompatible(const RenderConfiguration& desired_config) const; - - private: - const ui::vulkan::VulkanProvider& provider_; -}; - -// Cached render passes based on register states. -// Each render pass is dependent on the format, dimensions, and use of -// all attachments. The same render pass can be reused for multiple -// framebuffers pointing at various tile views, though those cached -// framebuffers are specific to the render pass. -class CachedRenderPass { - public: - // Configuration this pass was created with. - RenderConfiguration config; - // Initialized render pass for the register state. - VkRenderPass handle = nullptr; - // Cache of framebuffers for the various tile attachments. - std::vector cached_framebuffers; - - CachedRenderPass(const ui::vulkan::VulkanProvider& provider, - const RenderConfiguration& desired_config); - ~CachedRenderPass(); - - VkResult Initialize(); - - bool IsCompatible(const RenderConfiguration& desired_config) const; - - private: - const ui::vulkan::VulkanProvider& provider_; -}; - -CachedTileView::CachedTileView(const ui::vulkan::VulkanProvider& provider, - VkDeviceMemory edram_memory, - TileViewKey view_key) - : provider_(provider), key(std::move(view_key)) {} - -CachedTileView::~CachedTileView() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyImageView, device, - image_view); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyImageView, device, - image_view_depth); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyImageView, device, - image_view_stencil); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyImage, device, image); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, memory); -} - -VkResult CachedTileView::Initialize(VkCommandBuffer command_buffer) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - - // Map format to Vulkan. - VkFormat vulkan_format = VK_FORMAT_UNDEFINED; - uint32_t bpp = 4; - if (key.color_or_depth) { - auto edram_format = - static_cast(key.edram_format); - vulkan_format = ColorRenderTargetFormatToVkFormat(edram_format); - switch (edram_format) { - case xenos::ColorRenderTargetFormat::k_16_16_16_16: - case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: - case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: - bpp = 8; - break; - default: - bpp = 4; - break; - } - } else { - auto edram_format = - static_cast(key.edram_format); - vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format); - } - assert_true(vulkan_format != VK_FORMAT_UNDEFINED); - // FIXME(DrChat): Was this check necessary? - // assert_true(bpp == 4); - - // Create the image with the desired properties. - VkImageCreateInfo image_info; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = nullptr; - // TODO(benvanik): exploit VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT so we can have - // multiple views. - image_info.flags = 0; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = vulkan_format; - image_info.extent.width = key.tile_width * 80; - image_info.extent.height = key.tile_height * 16; - image_info.extent.depth = 1; - image_info.mipLevels = 1; - image_info.arrayLayers = 1; - if (cvars::vulkan_native_msaa) { - auto msaa_samples = static_cast(key.msaa_samples); - switch (msaa_samples) { - case xenos::MsaaSamples::k1X: - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - break; - case xenos::MsaaSamples::k2X: - image_info.samples = VK_SAMPLE_COUNT_2_BIT; - break; - case xenos::MsaaSamples::k4X: - image_info.samples = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(msaa_samples); - } - } else { - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - } - sample_count = image_info.samples; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT; - image_info.usage |= key.color_or_depth - ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT - : VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - status = dfn.vkCreateImage(device, &image_info, nullptr, &image); - if (status != VK_SUCCESS) { - return status; - } - - provider_.SetDeviceObjectName( - VK_OBJECT_TYPE_IMAGE, uint64_t(image), - fmt::format("RT(d): 0x{:08X} 0x{:08X}({}) 0x{:08X}({}) {} {} {}", - uint32_t(key.tile_offset), uint32_t(key.tile_width), - uint32_t(key.tile_width), uint32_t(key.tile_height), - uint32_t(key.tile_height), uint32_t(key.color_or_depth), - uint32_t(key.msaa_samples), uint32_t(key.edram_format)) - .c_str()); - - VkMemoryRequirements memory_requirements; - dfn.vkGetImageMemoryRequirements(device, image, &memory_requirements); - - // Bind to a newly allocated chunk. - // TODO: Alias from a really big buffer? - VkMemoryAllocateInfo memory_allocate_info; - memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_allocate_info.pNext = nullptr; - memory_allocate_info.allocationSize = memory_requirements.size; - if (!xe::bit_scan_forward(memory_requirements.memoryTypeBits & - provider_.memory_types_device_local(), - &memory_allocate_info.memoryTypeIndex)) { - return VK_ERROR_INITIALIZATION_FAILED; - } - status = - dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory); - if (status != VK_SUCCESS) { - return status; - } - status = dfn.vkBindImageMemory(device, image, memory, 0); - if (status != VK_SUCCESS) { - return status; - } - - // Create the image view we'll use to attach it to a framebuffer. - VkImageViewCreateInfo image_view_info; - image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - image_view_info.pNext = nullptr; - image_view_info.flags = 0; - image_view_info.image = image; - image_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - image_view_info.format = image_info.format; - // TODO(benvanik): manipulate? may not be able to when attached. - image_view_info.components = { - VK_COMPONENT_SWIZZLE_R, - VK_COMPONENT_SWIZZLE_G, - VK_COMPONENT_SWIZZLE_B, - VK_COMPONENT_SWIZZLE_A, - }; - image_view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - if (key.color_or_depth) { - image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - } else { - image_view_info.subresourceRange.aspectMask = - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - } - status = - dfn.vkCreateImageView(device, &image_view_info, nullptr, &image_view); - if (status != VK_SUCCESS) { - return status; - } - - // Create separate depth/stencil views. - if (key.color_or_depth == 0) { - image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - status = dfn.vkCreateImageView(device, &image_view_info, nullptr, - &image_view_depth); - if (status != VK_SUCCESS) { - return status; - } - - image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; - status = dfn.vkCreateImageView(device, &image_view_info, nullptr, - &image_view_stencil); - if (status != VK_SUCCESS) { - return status; - } - } - - // TODO(benvanik): transition to general layout? - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = - key.color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.image = image; - image_barrier.subresourceRange.aspectMask = - key.color_or_depth - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - image_barrier.subresourceRange.baseMipLevel = 0; - image_barrier.subresourceRange.levelCount = 1; - image_barrier.subresourceRange.baseArrayLayer = 0; - image_barrier.subresourceRange.layerCount = 1; - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - key.color_or_depth - ? VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - : VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, - 0, 0, nullptr, 0, nullptr, 1, &image_barrier); - - image_layout = image_barrier.newLayout; - return VK_SUCCESS; -} - -CachedFramebuffer::CachedFramebuffer( - const ui::vulkan::VulkanProvider& provider, VkRenderPass render_pass, - uint32_t surface_width, uint32_t surface_height, - CachedTileView* target_color_attachments[4], - CachedTileView* target_depth_stencil_attachment) - : provider_(provider), - width(surface_width), - height(surface_height), - depth_stencil_attachment(target_depth_stencil_attachment), - render_pass(render_pass) { - for (int i = 0; i < 4; ++i) { - color_attachments[i] = target_color_attachments[i]; - } -} - -CachedFramebuffer::~CachedFramebuffer() { - if (handle != VK_NULL_HANDLE) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkDestroyFramebuffer(device, handle, nullptr); - } -} - -VkResult CachedFramebuffer::Initialize() { - // Create framebuffer. - VkImageView image_views[5] = {nullptr}; - int image_view_count = 0; - for (int i = 0; i < 4; ++i) { - if (color_attachments[i]) { - image_views[image_view_count++] = color_attachments[i]->image_view; - } - } - if (depth_stencil_attachment) { - image_views[image_view_count++] = depth_stencil_attachment->image_view; - } - VkFramebufferCreateInfo framebuffer_info; - framebuffer_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_info.pNext = nullptr; - framebuffer_info.flags = 0; - framebuffer_info.renderPass = render_pass; - framebuffer_info.attachmentCount = image_view_count; - framebuffer_info.pAttachments = image_views; - framebuffer_info.width = width; - framebuffer_info.height = height; - framebuffer_info.layers = 1; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - return dfn.vkCreateFramebuffer(device, &framebuffer_info, nullptr, &handle); -} - -bool CachedFramebuffer::IsCompatible( - const RenderConfiguration& desired_config) const { - // We already know all render pass things line up, so let's verify dimensions, - // edram offsets, etc. We need an exact match. - uint32_t surface_pitch_px = - desired_config.surface_msaa != xenos::MsaaSamples::k4X - ? desired_config.surface_pitch_px - : desired_config.surface_pitch_px * 2; - uint32_t surface_height_px = - desired_config.surface_msaa == xenos::MsaaSamples::k1X - ? desired_config.surface_height_px - : desired_config.surface_height_px * 2; - surface_pitch_px = std::min(surface_pitch_px, 2560u); - surface_height_px = std::min(surface_height_px, 2560u); - if (surface_pitch_px != width || surface_height_px != height) { - return false; - } - // TODO(benvanik): separate image views from images in tiles and store in fb? - for (int i = 0; i < 4; ++i) { - // Ensure the the attachment points to the same tile. - if (!color_attachments[i]) { - continue; - } - auto& color_info = color_attachments[i]->key; - auto& desired_color_info = desired_config.color[i]; - if (color_info.tile_offset != desired_color_info.edram_base || - color_info.edram_format != - static_cast(desired_color_info.format)) { - return false; - } - } - // Ensure depth attachment is correct. - if (depth_stencil_attachment && - (depth_stencil_attachment->key.tile_offset != - desired_config.depth_stencil.edram_base || - depth_stencil_attachment->key.edram_format != - static_cast(desired_config.depth_stencil.format))) { - return false; - } - return true; -} - -CachedRenderPass::CachedRenderPass(const ui::vulkan::VulkanProvider& provider, - const RenderConfiguration& desired_config) - : provider_(provider) { - std::memcpy(&config, &desired_config, sizeof(config)); -} - -CachedRenderPass::~CachedRenderPass() { - for (auto framebuffer : cached_framebuffers) { - delete framebuffer; - } - cached_framebuffers.clear(); - - if (handle != VK_NULL_HANDLE) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkDestroyRenderPass(device, handle, nullptr); - } -} - -VkResult CachedRenderPass::Initialize() { - VkSampleCountFlagBits sample_count; - if (cvars::vulkan_native_msaa) { - switch (config.surface_msaa) { - case xenos::MsaaSamples::k1X: - sample_count = VK_SAMPLE_COUNT_1_BIT; - break; - case xenos::MsaaSamples::k2X: - sample_count = VK_SAMPLE_COUNT_2_BIT; - break; - case xenos::MsaaSamples::k4X: - sample_count = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(config.surface_msaa); - break; - } - } else { - sample_count = VK_SAMPLE_COUNT_1_BIT; - } - - // Initialize all attachments to default unused. - // As we set layout(location=RT) in shaders we must always provide 4. - VkAttachmentDescription attachments[5]; - for (int i = 0; i < 4; ++i) { - attachments[i].flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; - attachments[i].format = VK_FORMAT_UNDEFINED; - attachments[i].samples = sample_count; - attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[i].initialLayout = VK_IMAGE_LAYOUT_GENERAL; - attachments[i].finalLayout = VK_IMAGE_LAYOUT_GENERAL; - } - auto& depth_stencil_attachment = attachments[4]; - depth_stencil_attachment.flags = 0; - depth_stencil_attachment.format = VK_FORMAT_UNDEFINED; - depth_stencil_attachment.samples = sample_count; - depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - depth_stencil_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - depth_stencil_attachment.initialLayout = VK_IMAGE_LAYOUT_GENERAL; - depth_stencil_attachment.finalLayout = VK_IMAGE_LAYOUT_GENERAL; - - // Configure attachments based on what's enabled. - VkAttachmentReference color_attachment_refs[4]; - for (int i = 0; i < 4; ++i) { - auto& color_config = config.color[i]; - // TODO(benvanik): see how loose we can be with these. - attachments[i].format = - ColorRenderTargetFormatToVkFormat(color_config.format); - auto& color_attachment_ref = color_attachment_refs[i]; - color_attachment_ref.attachment = i; - color_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL; - } - - // Configure depth. - VkAttachmentReference depth_stencil_attachment_ref; - depth_stencil_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL; - - auto& depth_config = config.depth_stencil; - depth_stencil_attachment_ref.attachment = 4; - depth_stencil_attachment.format = - DepthRenderTargetFormatToVkFormat(depth_config.format); - - // Single subpass that writes to our attachments. - // FIXME: "Multiple attachments that alias the same memory must not be used in - // a single subpass" - // TODO: Input attachment for depth/stencil reads? - VkSubpassDescription subpass_info; - subpass_info.flags = 0; - subpass_info.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_info.inputAttachmentCount = 0; - subpass_info.pInputAttachments = nullptr; - subpass_info.colorAttachmentCount = 4; - subpass_info.pColorAttachments = color_attachment_refs; - subpass_info.pResolveAttachments = nullptr; - subpass_info.pDepthStencilAttachment = &depth_stencil_attachment_ref; - subpass_info.preserveAttachmentCount = 0; - subpass_info.pPreserveAttachments = nullptr; - - // Create the render pass. - VkRenderPassCreateInfo render_pass_info; - std::memset(&render_pass_info, 0, sizeof(render_pass_info)); - render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - render_pass_info.pNext = nullptr; - render_pass_info.flags = 0; - render_pass_info.attachmentCount = 5; - render_pass_info.pAttachments = attachments; - render_pass_info.subpassCount = 1; - render_pass_info.pSubpasses = &subpass_info; - - // Add a dependency on external render passes -> us (MAY_ALIAS bit) - VkSubpassDependency dependencies[1]; - dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL; - dependencies[0].dstSubpass = 0; - dependencies[0].srcStageMask = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; - dependencies[0].dstStageMask = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; - dependencies[0].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dependencies[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dependencies[0].dependencyFlags = 0; - - render_pass_info.dependencyCount = 1; - render_pass_info.pDependencies = dependencies; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - return dfn.vkCreateRenderPass(device, &render_pass_info, nullptr, &handle); -} - -bool CachedRenderPass::IsCompatible( - const RenderConfiguration& desired_config) const { - if (config.surface_msaa != desired_config.surface_msaa && - cvars::vulkan_native_msaa) { - return false; - } - - for (int i = 0; i < 4; ++i) { - // TODO(benvanik): allow compatible vulkan formats. - if (config.color[i].format != desired_config.color[i].format) { - return false; - } - } - if (config.depth_stencil.format != desired_config.depth_stencil.format) { - return false; - } - return true; -} - -RenderCache::RenderCache(RegisterFile* register_file, - const ui::vulkan::VulkanProvider& provider) - : register_file_(register_file), provider_(provider) {} - -RenderCache::~RenderCache() { Shutdown(); } - -VkResult RenderCache::Initialize() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - - // Create the buffer we'll bind to our memory. - VkBufferCreateInfo buffer_info; - buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_info.pNext = nullptr; - buffer_info.flags = 0; - buffer_info.size = kEdramBufferCapacity; - buffer_info.usage = - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_info.queueFamilyIndexCount = 0; - buffer_info.pQueueFamilyIndices = nullptr; - status = dfn.vkCreateBuffer(device, &buffer_info, nullptr, &edram_buffer_); - CheckResult(status, "vkCreateBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - // Query requirements for the buffer. - // It should be 1:1. - VkMemoryRequirements buffer_requirements; - dfn.vkGetBufferMemoryRequirements(device, edram_buffer_, - &buffer_requirements); - assert_true(buffer_requirements.size == kEdramBufferCapacity); - - // Allocate EDRAM memory. - // TODO(benvanik): do we need it host visible? - VkMemoryAllocateInfo buffer_allocate_info; - buffer_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - buffer_allocate_info.pNext = nullptr; - buffer_allocate_info.allocationSize = buffer_requirements.size; - buffer_allocate_info.memoryTypeIndex = ui::vulkan::util::ChooseHostMemoryType( - provider_, buffer_requirements.memoryTypeBits, false); - if (buffer_allocate_info.memoryTypeIndex == UINT32_MAX) { - return VK_ERROR_INITIALIZATION_FAILED; - } - status = dfn.vkAllocateMemory(device, &buffer_allocate_info, nullptr, - &edram_memory_); - if (status != VK_SUCCESS) { - return status; - } - - // Bind buffer to map our entire memory. - status = dfn.vkBindBufferMemory(device, edram_buffer_, edram_memory_, 0); - CheckResult(status, "vkBindBufferMemory"); - if (status != VK_SUCCESS) { - return status; - } - - if (status == VK_SUCCESS) { - // For debugging, upload a grid into the EDRAM buffer. - uint32_t* gpu_data = nullptr; - status = dfn.vkMapMemory(device, edram_memory_, 0, buffer_requirements.size, - 0, reinterpret_cast(&gpu_data)); - - if (status == VK_SUCCESS) { - for (int i = 0; i < kEdramBufferCapacity / 4; i++) { - gpu_data[i] = (i % 8) >= 4 ? 0xFF0000FF : 0xFFFFFFFF; - } - - dfn.vkUnmapMemory(device, edram_memory_); - } - } - - return VK_SUCCESS; -} - -void RenderCache::Shutdown() { - // TODO(benvanik): wait for idle. - - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - // Dispose all render passes (and their framebuffers). - for (auto render_pass : cached_render_passes_) { - delete render_pass; - } - cached_render_passes_.clear(); - - // Dispose all of our cached tile views. - for (auto tile_view : cached_tile_views_) { - delete tile_view; - } - cached_tile_views_.clear(); - - // Release underlying EDRAM memory. - if (edram_buffer_) { - dfn.vkDestroyBuffer(device, edram_buffer_, nullptr); - edram_buffer_ = nullptr; - } - if (edram_memory_) { - dfn.vkFreeMemory(device, edram_memory_, nullptr); - edram_memory_ = nullptr; - } -} - -bool RenderCache::dirty() const { - auto& regs = *register_file_; - auto& cur_regs = shadow_registers_; - - bool dirty = false; - dirty |= cur_regs.rb_modecontrol.value != regs[XE_GPU_REG_RB_MODECONTROL].u32; - dirty |= - cur_regs.rb_surface_info.value != regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - dirty |= cur_regs.rb_color_info.value != regs[XE_GPU_REG_RB_COLOR_INFO].u32; - dirty |= cur_regs.rb_color1_info.value != regs[XE_GPU_REG_RB_COLOR1_INFO].u32; - dirty |= cur_regs.rb_color2_info.value != regs[XE_GPU_REG_RB_COLOR2_INFO].u32; - dirty |= cur_regs.rb_color3_info.value != regs[XE_GPU_REG_RB_COLOR3_INFO].u32; - dirty |= cur_regs.rb_depth_info.value != regs[XE_GPU_REG_RB_DEPTH_INFO].u32; - dirty |= cur_regs.pa_sc_window_scissor_tl != - regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - dirty |= cur_regs.pa_sc_window_scissor_br != - regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; - return dirty; -} - -const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - assert_null(current_command_buffer_); - current_command_buffer_ = command_buffer; - - // Lookup or construct a render pass compatible with our current state. - auto config = ¤t_state_.config; - CachedRenderPass* render_pass = nullptr; - CachedFramebuffer* framebuffer = nullptr; - auto& regs = shadow_registers_; - bool dirty = false; - dirty |= - SetShadowRegister(®s.rb_modecontrol.value, XE_GPU_REG_RB_MODECONTROL); - dirty |= SetShadowRegister(®s.rb_surface_info.value, - XE_GPU_REG_RB_SURFACE_INFO); - dirty |= - SetShadowRegister(®s.rb_color_info.value, XE_GPU_REG_RB_COLOR_INFO); - dirty |= - SetShadowRegister(®s.rb_color1_info.value, XE_GPU_REG_RB_COLOR1_INFO); - dirty |= - SetShadowRegister(®s.rb_color2_info.value, XE_GPU_REG_RB_COLOR2_INFO); - dirty |= - SetShadowRegister(®s.rb_color3_info.value, XE_GPU_REG_RB_COLOR3_INFO); - dirty |= - SetShadowRegister(®s.rb_depth_info.value, XE_GPU_REG_RB_DEPTH_INFO); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - if (!dirty && current_state_.render_pass) { - // No registers have changed so we can reuse the previous render pass - - // just begin with what we had. - render_pass = current_state_.render_pass; - framebuffer = current_state_.framebuffer; - } else { - // Re-parse configuration. - if (!ParseConfiguration(config)) { - return nullptr; - } - - // Lookup or generate a new render pass and framebuffer for the new state. - if (!ConfigureRenderPass(command_buffer, config, &render_pass, - &framebuffer)) { - return nullptr; - } - - current_state_.render_pass = render_pass; - current_state_.render_pass_handle = render_pass->handle; - current_state_.framebuffer = framebuffer; - current_state_.framebuffer_handle = framebuffer->handle; - - // TODO(DrChat): Determine if we actually need an EDRAM buffer. - /* - // Depth - auto depth_target = current_state_.framebuffer->depth_stencil_attachment; - if (depth_target && current_state_.config.depth_stencil.used) { - UpdateTileView(command_buffer, depth_target, true); - } - - // Color - for (int i = 0; i < 4; i++) { - auto target = current_state_.framebuffer->color_attachments[i]; - if (!target || !current_state_.config.color[i].used) { - continue; - } - - UpdateTileView(command_buffer, target, true); - } - */ - } - if (!render_pass) { - return nullptr; - } - - // Setup render pass in command buffer. - // This is meant to preserve previous contents as we may be called - // repeatedly. - VkRenderPassBeginInfo render_pass_begin_info; - render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - render_pass_begin_info.pNext = nullptr; - render_pass_begin_info.renderPass = render_pass->handle; - render_pass_begin_info.framebuffer = framebuffer->handle; - - // Render into the entire buffer (or at least tell the API we are doing - // this). In theory it'd be better to clip this to the scissor region, but - // the docs warn anything but the full framebuffer may be slow. - render_pass_begin_info.renderArea.offset.x = 0; - render_pass_begin_info.renderArea.offset.y = 0; - render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px; - render_pass_begin_info.renderArea.extent.height = config->surface_height_px; - - if (config->surface_msaa == xenos::MsaaSamples::k2X) { - render_pass_begin_info.renderArea.extent.height = - std::min(config->surface_height_px * 2, 2560u); - } else if (config->surface_msaa == xenos::MsaaSamples::k4X) { - render_pass_begin_info.renderArea.extent.width *= 2; - render_pass_begin_info.renderArea.extent.height = - std::min(config->surface_height_px * 2, 2560u); - } - - // Configure clear color, if clearing. - // TODO(benvanik): enable clearing here during resolve? - render_pass_begin_info.clearValueCount = 0; - render_pass_begin_info.pClearValues = nullptr; - - // Begin the render pass. - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info, - VK_SUBPASS_CONTENTS_INLINE); - - return ¤t_state_; -} - -bool RenderCache::ParseConfiguration(RenderConfiguration* config) { - auto& regs = shadow_registers_; - - // RB_MODECONTROL - // Rough mode control (color, color+depth, etc). - config->mode_control = regs.rb_modecontrol.edram_mode; - - // RB_SURFACE_INFO - // https://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html - config->surface_pitch_px = regs.rb_surface_info.surface_pitch; - config->surface_msaa = regs.rb_surface_info.msaa_samples; - - // TODO(benvanik): verify min/max so we don't go out of bounds. - // TODO(benvanik): has to be a good way to get height. - // Guess the height from the scissor height. - // It's wildly inaccurate, but I've never seen it be bigger than the - // EDRAM tiling. - /* - uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; - uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; - config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16)); - */ - - // TODO(DrChat): Find an accurate way to get the surface height. Until we do, - // we're going to hardcode it to 2560, as that's the absolute maximum. - config->surface_height_px = 2560; - - // Color attachment configuration. - if (config->mode_control == ModeControl::kColorDepth) { - reg::RB_COLOR_INFO color_info[4] = { - regs.rb_color_info, - regs.rb_color1_info, - regs.rb_color2_info, - regs.rb_color3_info, - }; - for (int i = 0; i < 4; ++i) { - config->color[i].edram_base = color_info[i].color_base; - config->color[i].format = GetBaseRTFormat(color_info[i].color_format); - } - } else { - for (int i = 0; i < 4; ++i) { - config->color[i].edram_base = 0; - config->color[i].format = xenos::ColorRenderTargetFormat::k_8_8_8_8; - config->color[i].used = false; - } - } - - // Depth/stencil attachment configuration. - if (config->mode_control == ModeControl::kColorDepth || - config->mode_control == ModeControl::kDepth) { - config->depth_stencil.edram_base = regs.rb_depth_info.depth_base; - config->depth_stencil.format = regs.rb_depth_info.depth_format; - } else { - config->depth_stencil.edram_base = 0; - config->depth_stencil.format = xenos::DepthRenderTargetFormat::kD24S8; - config->depth_stencil.used = false; - } - - return true; -} - -bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, - RenderConfiguration* config, - CachedRenderPass** out_render_pass, - CachedFramebuffer** out_framebuffer) { - *out_render_pass = nullptr; - *out_framebuffer = nullptr; - - // TODO(benvanik): better lookup. - // Attempt to find the render pass in our cache. - CachedRenderPass* render_pass = nullptr; - for (auto cached_render_pass : cached_render_passes_) { - if (cached_render_pass->IsCompatible(*config)) { - // Found a match. - render_pass = cached_render_pass; - break; - } - } - - // If no render pass was found in the cache create a new one. - if (!render_pass) { - render_pass = new CachedRenderPass(provider_, *config); - VkResult status = render_pass->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("{}: Failed to create render pass", __func__); - delete render_pass; - return false; - } - - cached_render_passes_.push_back(render_pass); - } - - // TODO(benvanik): better lookup. - // Attempt to find the framebuffer in the render pass cache. - CachedFramebuffer* framebuffer = nullptr; - for (auto cached_framebuffer : render_pass->cached_framebuffers) { - if (cached_framebuffer->IsCompatible(*config)) { - // Found a match. - framebuffer = cached_framebuffer; - break; - } - } - - // If no framebuffer was found in the cache create a new one. - if (!framebuffer) { - uint32_t tile_width = - config->surface_msaa == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = - config->surface_msaa != xenos::MsaaSamples::k1X ? 8 : 16; - - CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr, - nullptr}; - for (int i = 0; i < 4; ++i) { - TileViewKey color_key; - color_key.tile_offset = config->color[i].edram_base; - color_key.tile_width = - xe::round_up(config->surface_pitch_px, tile_width) / tile_width; - // color_key.tile_height = - // xe::round_up(config->surface_height_px, tile_height) / tile_height; - color_key.tile_height = 160; - color_key.color_or_depth = 1; - color_key.msaa_samples = - 0; // static_cast(config->surface_msaa); - color_key.edram_format = static_cast(config->color[i].format); - target_color_attachments[i] = - FindOrCreateTileView(command_buffer, color_key); - if (!target_color_attachments[i]) { - XELOGE("Failed to get tile view for color attachment"); - return false; - } - } - - TileViewKey depth_stencil_key; - depth_stencil_key.tile_offset = config->depth_stencil.edram_base; - depth_stencil_key.tile_width = - xe::round_up(config->surface_pitch_px, tile_width) / tile_width; - // depth_stencil_key.tile_height = - // xe::round_up(config->surface_height_px, tile_height) / tile_height; - depth_stencil_key.tile_height = 160; - depth_stencil_key.color_or_depth = 0; - depth_stencil_key.msaa_samples = - 0; // static_cast(config->surface_msaa); - depth_stencil_key.edram_format = - static_cast(config->depth_stencil.format); - auto target_depth_stencil_attachment = - FindOrCreateTileView(command_buffer, depth_stencil_key); - if (!target_depth_stencil_attachment) { - XELOGE("Failed to get tile view for depth/stencil attachment"); - return false; - } - - uint32_t surface_pitch_px = config->surface_msaa != xenos::MsaaSamples::k4X - ? config->surface_pitch_px - : config->surface_pitch_px * 2; - uint32_t surface_height_px = config->surface_msaa == xenos::MsaaSamples::k1X - ? config->surface_height_px - : config->surface_height_px * 2; - surface_pitch_px = std::min(surface_pitch_px, 2560u); - surface_height_px = std::min(surface_height_px, 2560u); - framebuffer = new CachedFramebuffer( - provider_, render_pass->handle, surface_pitch_px, surface_height_px, - target_color_attachments, target_depth_stencil_attachment); - VkResult status = framebuffer->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("{}: Failed to create framebuffer", __func__); - delete framebuffer; - return false; - } - - render_pass->cached_framebuffers.push_back(framebuffer); - } - - *out_render_pass = render_pass; - *out_framebuffer = framebuffer; - return true; -} - -CachedTileView* RenderCache::FindTileView(uint32_t base, uint32_t pitch, - xenos::MsaaSamples samples, - bool color_or_depth, - uint32_t format) { - uint32_t tile_width = samples == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = samples != xenos::MsaaSamples::k1X ? 8 : 16; - - if (color_or_depth) { - // Adjust similar formats for easier matching. - format = static_cast( - GetBaseRTFormat(static_cast(format))); - } - - TileViewKey key; - key.tile_offset = base; - key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - key.tile_height = 160; - key.color_or_depth = color_or_depth ? 1 : 0; - key.msaa_samples = 0; - key.edram_format = static_cast(format); - auto view = FindTileView(key); - if (view) { - return view; - } - - return nullptr; -} - -CachedTileView* RenderCache::FindOrCreateTileView( - VkCommandBuffer command_buffer, const TileViewKey& view_key) { - auto tile_view = FindTileView(view_key); - if (tile_view) { - return tile_view; - } - - // Create a new tile and add to the cache. - tile_view = new CachedTileView(provider_, edram_memory_, view_key); - VkResult status = tile_view->Initialize(command_buffer); - if (status != VK_SUCCESS) { - XELOGE("{}: Failed to create tile view", __func__); - - delete tile_view; - return nullptr; - } - - cached_tile_views_.push_back(tile_view); - return tile_view; -} - -void RenderCache::UpdateTileView(VkCommandBuffer command_buffer, - CachedTileView* view, bool load, - bool insert_barrier) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - - uint32_t tile_width = - view->key.msaa_samples == uint16_t(xenos::MsaaSamples::k4X) ? 40 : 80; - uint32_t tile_height = - view->key.msaa_samples != uint16_t(xenos::MsaaSamples::k1X) ? 8 : 16; - - if (insert_barrier) { - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - if (load) { - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - } else { - barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - } - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = edram_buffer_; - barrier.offset = view->key.tile_offset * 5120; - barrier.size = view->key.tile_width * tile_width * view->key.tile_height * - tile_height * view->key.color_or_depth - ? 4 - : 1; - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, - 1, &barrier, 0, nullptr); - } - - // TODO(DrChat): Stencil copies. - VkBufferImageCopy region; - region.bufferOffset = view->key.tile_offset * 5120; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - region.imageSubresource = {0, 0, 0, 1}; - region.imageSubresource.aspectMask = view->key.color_or_depth - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT; - region.imageOffset = {0, 0, 0}; - region.imageExtent = {view->key.tile_width * tile_width, - view->key.tile_height * tile_height, 1}; - if (load) { - dfn.vkCmdCopyBufferToImage(command_buffer, edram_buffer_, view->image, - VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); - } else { - dfn.vkCmdCopyImageToBuffer(command_buffer, view->image, - VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, - ®ion); - } -} - -CachedTileView* RenderCache::FindTileView(const TileViewKey& view_key) const { - // Check the cache. - // TODO(benvanik): better lookup. - for (auto tile_view : cached_tile_views_) { - if (tile_view->IsEqual(view_key)) { - return tile_view; - } - } - - return nullptr; -} - -void RenderCache::EndRenderPass() { - assert_not_null(current_command_buffer_); - - // End the render pass. - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - dfn.vkCmdEndRenderPass(current_command_buffer_); - - // Copy all render targets back into our EDRAM buffer. - // Don't bother waiting on this command to complete, as next render pass may - // reuse previous framebuffer attachments. If they need this, they will wait. - // TODO: Should we bother re-tiling the images on copy back? - // - // FIXME: There's a case where we may have a really big render target (as we - // can't get the correct height atm) and we may end up overwriting the valid - // contents of another render target by mistake! Need to reorder copy commands - // to avoid this. - - // TODO(DrChat): Determine if we actually need an EDRAM buffer. - /* - std::vector cached_views; - - // Depth - auto depth_target = current_state_.framebuffer->depth_stencil_attachment; - if (depth_target && current_state_.config.depth_stencil.used) { - cached_views.push_back(depth_target); - } - - // Color - for (int i = 0; i < 4; i++) { - auto target = current_state_.framebuffer->color_attachments[i]; - if (!target || !current_state_.config.color[i].used) { - continue; - } - - cached_views.push_back(target); - } - - std::sort( - cached_views.begin(), cached_views.end(), - [](CachedTileView const* a, CachedTileView const* b) { return *a < *b; }); - - for (auto view : cached_views) { - UpdateTileView(current_command_buffer_, view, false, false); - } - */ - - current_command_buffer_ = nullptr; -} - -void RenderCache::ClearCache() { - // TODO(benvanik): caching. -} - -void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, - uint32_t edram_base, VkImage image, - VkImageLayout image_layout, - bool color_or_depth, VkOffset3D offset, - VkExtent3D extents) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - - // Transition the texture into a transfer destination layout. - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - if (image_layout != VK_IMAGE_LAYOUT_GENERAL && - image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_barrier.oldLayout = image_layout; - image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - image_barrier.image = image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - color_or_depth - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, - 0, nullptr, 1, &image_barrier); - } - - VkBufferMemoryBarrier buffer_barrier; - buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - buffer_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - buffer_barrier.buffer = edram_buffer_; - buffer_barrier.offset = edram_base * 5120; - // TODO: Calculate this accurately (need texel size) - buffer_barrier.size = extents.width * extents.height * 4; - - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, - &buffer_barrier, 0, nullptr); - - // Issue the copy command. - // TODO(DrChat): Stencil copies. - VkBufferImageCopy region; - region.bufferOffset = edram_base * 5120; - region.bufferImageHeight = 0; - region.bufferRowLength = 0; - region.imageOffset = offset; - region.imageExtent = extents; - region.imageSubresource = {0, 0, 0, 1}; - region.imageSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - dfn.vkCmdCopyBufferToImage(command_buffer, edram_buffer_, image, image_layout, - 1, ®ion); - - // Transition the image back into its previous layout. - if (image_layout != VK_IMAGE_LAYOUT_GENERAL && - image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - image_barrier.srcAccessMask = image_barrier.dstAccessMask; - image_barrier.dstAccessMask = 0; - std::swap(image_barrier.oldLayout, image_barrier.newLayout); - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, - 0, nullptr, 1, &image_barrier); - } -} - -void RenderCache::BlitToImage(VkCommandBuffer command_buffer, - uint32_t edram_base, uint32_t pitch, - uint32_t height, xenos::MsaaSamples num_samples, - VkImage image, VkImageLayout image_layout, - bool color_or_depth, uint32_t format, - VkFilter filter, VkOffset3D offset, - VkExtent3D extents) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - - if (color_or_depth) { - // Adjust similar formats for easier matching. - format = static_cast( - GetBaseRTFormat(static_cast(format))); - } - - uint32_t tile_width = num_samples == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = num_samples != xenos::MsaaSamples::k1X ? 8 : 16; - - // Grab a tile view that represents the source image. - TileViewKey key; - key.color_or_depth = color_or_depth ? 1 : 0; - key.msaa_samples = 0; // static_cast(num_samples); - key.edram_format = format; - key.tile_offset = edram_base; - key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - // key.tile_height = xe::round_up(height, tile_height) / tile_height; - key.tile_height = 160; - auto tile_view = FindOrCreateTileView(command_buffer, key); - assert_not_null(tile_view); - - // Update the view with the latest contents. - // UpdateTileView(command_buffer, tile_view, true, true); - - // Put a barrier on the tile view. - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.srcAccessMask = - color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.image = tile_view->image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - - // If we overflow we'll lose the device here. - // assert_true(extents.width <= key.tile_width * tile_width); - // assert_true(extents.height <= key.tile_height * tile_height); - - // Now issue the blit to the destination. - if (tile_view->sample_count == VK_SAMPLE_COUNT_1_BIT) { - VkImageBlit image_blit; - image_blit.srcSubresource = {0, 0, 0, 1}; - image_blit.srcSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - image_blit.srcOffsets[0] = {0, 0, offset.z}; - image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height), - int32_t(extents.depth)}; - - image_blit.dstSubresource = {0, 0, 0, 1}; - image_blit.dstSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - image_blit.dstOffsets[0] = offset; - image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width), - offset.y + int32_t(extents.height), - offset.z + int32_t(extents.depth)}; - dfn.vkCmdBlitImage(command_buffer, tile_view->image, - VK_IMAGE_LAYOUT_GENERAL, image, image_layout, 1, - &image_blit, filter); - } else { - VkImageResolve image_resolve; - image_resolve.srcSubresource = {0, 0, 0, 1}; - image_resolve.srcSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - image_resolve.srcOffset = {0, 0, 0}; - - image_resolve.dstSubresource = {0, 0, 0, 1}; - image_resolve.dstSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - image_resolve.dstOffset = offset; - - image_resolve.extent = extents; - dfn.vkCmdResolveImage(command_buffer, tile_view->image, - VK_IMAGE_LAYOUT_GENERAL, image, image_layout, 1, - &image_resolve); - } - - // Add another barrier on the tile view. - image_barrier.srcAccessMask = image_barrier.dstAccessMask; - image_barrier.dstAccessMask = - color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - std::swap(image_barrier.oldLayout, image_barrier.newLayout); - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); -} - -void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, - uint32_t edram_base, - xenos::ColorRenderTargetFormat format, - uint32_t pitch, uint32_t height, - xenos::MsaaSamples num_samples, - float* color) { - // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just - // need to detect this and calculate a value. - - // Adjust similar formats for easier matching. - format = GetBaseRTFormat(static_cast(format)); - - uint32_t tile_width = num_samples == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = num_samples != xenos::MsaaSamples::k1X ? 8 : 16; - - // Grab a tile view (as we need to clear an image first) - TileViewKey key; - key.color_or_depth = 1; - key.msaa_samples = 0; // static_cast(num_samples); - key.edram_format = static_cast(format); - key.tile_offset = edram_base; - key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - // key.tile_height = xe::round_up(height, tile_height) / tile_height; - key.tile_height = 160; - auto tile_view = FindOrCreateTileView(command_buffer, key); - assert_not_null(tile_view); - - VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - VkClearColorValue clear_value; - std::memcpy(clear_value.float32, color, sizeof(float) * 4); - - // Issue a clear command - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - dfn.vkCmdClearColorImage(command_buffer, tile_view->image, - VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range); - - // Copy image back into EDRAM buffer - // UpdateTileView(command_buffer, tile_view, false, false); -} - -void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, - uint32_t edram_base, - xenos::DepthRenderTargetFormat format, - uint32_t pitch, uint32_t height, - xenos::MsaaSamples num_samples, - float depth, uint32_t stencil) { - // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just - // need to detect this and calculate a value. - - uint32_t tile_width = num_samples == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = num_samples != xenos::MsaaSamples::k1X ? 8 : 16; - - // Grab a tile view (as we need to clear an image first) - TileViewKey key; - key.color_or_depth = 0; - key.msaa_samples = 0; // static_cast(num_samples); - key.edram_format = static_cast(format); - key.tile_offset = edram_base; - key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - // key.tile_height = xe::round_up(height, tile_height) / tile_height; - key.tile_height = 160; - auto tile_view = FindOrCreateTileView(command_buffer, key); - assert_not_null(tile_view); - - VkImageSubresourceRange range = { - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1, - }; - VkClearDepthStencilValue clear_value; - clear_value.depth = depth; - clear_value.stencil = stencil; - - // Issue a clear command - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - dfn.vkCmdClearDepthStencilImage(command_buffer, tile_view->image, - VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, - &range); - - // Copy image back into EDRAM buffer - // UpdateTileView(command_buffer, tile_view, false, false); -} - -void RenderCache::FillEDRAM(VkCommandBuffer command_buffer, uint32_t value) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - dfn.vkCmdFillBuffer(command_buffer, edram_buffer_, 0, kEdramBufferCapacity, - value); -} - -bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { - uint32_t value = register_file_->values[register_name].u32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -} // namespace vulkan -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h deleted file mode 100644 index f73fa39e5..000000000 --- a/src/xenia/gpu/vulkan/render_cache.h +++ /dev/null @@ -1,406 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_ -#define XENIA_GPU_VULKAN_RENDER_CACHE_H_ - -#include "xenia/gpu/register_file.h" -#include "xenia/gpu/registers.h" -#include "xenia/gpu/shader.h" -#include "xenia/gpu/texture_info.h" -#include "xenia/gpu/vulkan/vulkan_shader.h" -#include "xenia/gpu/xenos.h" -#include "xenia/ui/vulkan/vulkan_provider.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -// TODO(benvanik): make public API? -class CachedTileView; -class CachedFramebuffer; -class CachedRenderPass; - -// Uniquely identifies EDRAM tiles. -struct TileViewKey { - // Offset into EDRAM in 5120b tiles. - uint16_t tile_offset; - // Tile width of the view in base 80x16 tiles. - uint16_t tile_width; - // Tile height of the view in base 80x16 tiles. - uint16_t tile_height; - // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat. - uint16_t color_or_depth : 1; - // Surface MSAA samples - uint16_t msaa_samples : 2; - // Either ColorRenderTargetFormat or DepthRenderTargetFormat. - uint16_t edram_format : 13; -}; -static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); - -// Cached view representing EDRAM memory. -// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible -// formats? -class CachedTileView { - public: - // Key identifying the view in the cache. - TileViewKey key; - // Image - VkImage image = nullptr; - // Simple view on the image matching the format. - VkImageView image_view = nullptr; - // Image layout - VkImageLayout image_layout = VK_IMAGE_LAYOUT_UNDEFINED; - // Memory buffer - VkDeviceMemory memory = nullptr; - // Image sample count - VkSampleCountFlagBits sample_count = VK_SAMPLE_COUNT_1_BIT; - - // (if a depth view) Image view of depth aspect - VkImageView image_view_depth = nullptr; - // (if a depth view) Image view of stencil aspect - VkImageView image_view_stencil = nullptr; - - CachedTileView(const ui::vulkan::VulkanProvider& provider, - VkDeviceMemory edram_memory, TileViewKey view_key); - ~CachedTileView(); - - VkResult Initialize(VkCommandBuffer command_buffer); - - bool IsEqual(const TileViewKey& other_key) const { - auto a = reinterpret_cast(&key); - auto b = reinterpret_cast(&other_key); - return *a == *b; - } - - bool operator<(const CachedTileView& other) const { - return key.tile_offset < other.key.tile_offset; - } - - VkExtent2D GetSize() const { - return {key.tile_width * 80u, key.tile_height * 16u}; - } - - private: - const ui::vulkan::VulkanProvider& provider_; -}; - -// Parsed render configuration from the current render state. -struct RenderConfiguration { - // Render mode (color+depth, depth-only, etc). - xenos::ModeControl mode_control; - // Target surface pitch multiplied by MSAA, in pixels. - uint32_t surface_pitch_px; - // ESTIMATED target surface height multiplied by MSAA, in pixels. - uint32_t surface_height_px; - // Surface MSAA setting. - xenos::MsaaSamples surface_msaa; - // Color attachments for the 4 render targets. - struct { - bool used; - uint32_t edram_base; - xenos::ColorRenderTargetFormat format; - } color[4]; - // Depth/stencil attachment. - struct { - bool used; - uint32_t edram_base; - xenos::DepthRenderTargetFormat format; - } depth_stencil; -}; - -// Current render state based on the register-specified configuration. -struct RenderState { - // Parsed configuration. - RenderConfiguration config; - // Render pass (to be used with pipelines/etc). - CachedRenderPass* render_pass = nullptr; - VkRenderPass render_pass_handle = nullptr; - // Target framebuffer bound to the render pass. - CachedFramebuffer* framebuffer = nullptr; - VkFramebuffer framebuffer_handle = nullptr; - - bool color_attachment_written[4] = {false}; - bool depth_attachment_written = false; -}; - -// Manages the virtualized EDRAM and the render target cache. -// -// On the 360 the render target is an opaque block of memory in EDRAM that's -// only accessible via resolves. We use this to our advantage to simulate -// something like it as best we can by having a shared backing memory with -// a multitude of views for each tile location in EDRAM. -// -// This allows us to have the same base address write to the same memory -// regardless of framebuffer format. Resolving then uses whatever format the -// resolve requests straight from the backing memory. -// -// EDRAM is a beast and we only approximate it as best we can. Basically, -// the 10MiB of EDRAM is composed of 2048 5120b tiles. Each tile is 80x16px. -// +-----+-----+-----+--- -// |tile0|tile1|tile2|... 2048 times -// +-----+-----+-----+--- -// Operations dealing with EDRAM deal in tile offsets, so base 0x100 is tile -// offset 256, 256*5120=1310720b into the buffer. All rendering operations are -// aligned to tiles so trying to draw at 256px wide will have a real width of -// 320px by rounding up to the next tile. -// -// MSAA and other settings will modify the exact pixel sizes, like 4X makes -// each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still -// all 5120b. As we try to emulate this we adjust our viewport when rendering to -// stretch pixels as needed. -// -// It appears that games also take advantage of MSAA stretching tiles when doing -// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then -// later draw to that view with 1X pitch/height and 1X MSAA. -// -// The good news is that games cannot read EDRAM directly but must use a copy -// operation to get the data out. That gives us a chance to do whatever we -// need to (re-tile, etc) only when requested. -// -// To approximate the tiled EDRAM layout we use a single large chunk of memory. -// From this memory we create many VkImages (and VkImageViews) of various -// formats and dimensions as requested by the game. These are used as -// attachments during rendering and as sources during copies. They are also -// heavily aliased - lots of images will reference the same locations in the -// underlying EDRAM buffer. The only requirement is that there are no hazards -// with specific tiles (reading/writing the same tile through different images) -// and otherwise it should be ok *fingers crossed*. -// -// One complication is the copy/resolve process itself: we need to give back -// the data asked for in the format desired and where it goes is arbitrary -// (any address in physical memory). If the game is good we get resolves of -// EDRAM into fixed base addresses with scissored regions. If the game is bad -// we are broken. -// -// Resolves from EDRAM result in tiled textures - that's texture tiles, not -// EDRAM tiles. If we wanted to ensure byte-for-byte correctness we'd need to -// then tile the images as we wrote them out. For now, we just attempt to -// get the (X, Y) in linear space and do that. This really comes into play -// when multiple resolves write to the same texture or memory aliased by -// multiple textures - which is common due to predicated tiling. The examples -// below demonstrate what this looks like, but the important thing is that -// we are aware of partial textures and overlapping regions. -// -// TODO(benvanik): what, if any, barriers do we need? any transitions? -// -// Example with multiple render targets: -// Two color targets of 256x256px tightly packed in EDRAM: -// color target 0: base 0x0, pitch 320, scissor 0,0, 256x256 -// starts at tile 0, buffer offset 0 -// contains 64 tiles (320/80)*(256/16) -// color target 1: base 0x40, pitch 320, scissor 256,0, 256x256 -// starts at tile 64 (after color target 0), buffer offset 327680b -// contains 64 tiles -// In EDRAM each set of 64 tiles is contiguous: -// +------+------+ +------+------+------+ -// |ct0.0 |ct0.1 |...|ct0.63|ct1.0 |ct1.1 |... -// +------+------+ +------+------+------+ -// To render into these, we setup two VkImages: -// image 0: bound to buffer offset 0, 320x256x4=327680b -// image 1: bound to buffer offset 327680b, 320x256x4=327680b -// So when we render to them: -// +------+-+ scissored to 256x256, actually 320x256 -// | . | | <- . appears at some untiled offset in the buffer, but -// | | | consistent if aliased with the same format -// +------+-+ -// In theory, this gives us proper aliasing in most cases. -// -// Example with horizontal predicated tiling: -// Trying to render 1024x576 @4X MSAA, splitting into two regions -// horizontally: -// +----------+ -// | 1024x288 | -// +----------+ -// | 1024x288 | -// +----------+ -// EDRAM configured for 1056x288px with tile size 2112x567px (4X MSAA): -// color target 0: base 0x0, pitch 1080, 26x36 tiles -// First render (top): -// window offset 0,0 -// scissor 0,0, 1024x288 -// First resolve (top): -// RB_COPY_DEST_BASE 0x1F45D000 -// RB_COPY_DEST_PITCH pitch=1024, height=576 -// vertices: 0,0, 1024,0, 1024,288 -// Second render (bottom): -// window offset 0,-288 -// scissor 0,288, 1024x288 -// Second resolve (bottom): -// RB_COPY_DEST_BASE 0x1F57D000 (+1179648b) -// RB_COPY_DEST_PITCH pitch=1024, height=576 -// (exactly 1024x288*4b after first resolve) -// vertices: 0,288, 1024,288, 1024,576 -// Resolving here is easy as the textures are contiguous in memory. We can -// snoop in the first resolve with the dest height to know the total size, -// and in the second resolve see that it overlaps and place it in the -// existing target. -// -// Example with vertical predicated tiling: -// Trying to render 1280x720 @2X MSAA, splitting into two regions -// vertically: -// +-----+-----+ -// | 640 | 640 | -// | x | x | -// | 720 | 720 | -// +-----+-----+ -// EDRAM configured for 640x736px with tile size 640x1472px (2X MSAA): -// color target 0: base 0x0, pitch 640, 8x92 tiles -// First render (left): -// window offset 0,0 -// scissor 0,0, 640x720 -// First resolve (left): -// RB_COPY_DEST_BASE 0x1BC6D000 -// RB_COPY_DEST_PITCH pitch=1280, height=720 -// vertices: 0,0, 640,0, 640,720 -// Second render (right): -// window offset -640,0 -// scissor 640,0, 640x720 -// Second resolve (right): -// RB_COPY_DEST_BASE 0x1BC81000 (+81920b) -// RB_COPY_DEST_PITCH pitch=1280, height=720 -// vertices: 640,0, 1280,0, 1280,720 -// Resolving here is much more difficult as resolves are tiled and the right -// half of the texture is 81920b away: -// 81920/4bpp=20480px, /32 (texture tile size)=640px -// We know the texture size with the first resolve and with the second we -// must check for overlap then compute the offset (in both X and Y). -class RenderCache { - public: - RenderCache(RegisterFile* register_file, - const ui::vulkan::VulkanProvider& provider); - ~RenderCache(); - - VkResult Initialize(); - void Shutdown(); - - // Call this to determine if you should start a new render pass or continue - // with an already open pass. - bool dirty() const; - - CachedTileView* FindTileView(uint32_t base, uint32_t pitch, - xenos::MsaaSamples samples, bool color_or_depth, - uint32_t format); - - // Begins a render pass targeting the state-specified framebuffer formats. - // The command buffer will be transitioned into the render pass phase. - const RenderState* BeginRenderPass(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); - - // Ends the current render pass. - // The command buffer will be transitioned out of the render pass phase. - void EndRenderPass(); - - // Clears all cached content. - void ClearCache(); - - // Queues commands to copy EDRAM contents into an image. - // The command buffer must not be inside of a render pass when calling this. - void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base, - VkImage image, VkImageLayout image_layout, - bool color_or_depth, VkOffset3D offset, - VkExtent3D extents); - - // Queues commands to blit EDRAM contents into an image. - // The command buffer must not be inside of a render pass when calling this. - void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base, - uint32_t pitch, uint32_t height, - xenos::MsaaSamples num_samples, VkImage image, - VkImageLayout image_layout, bool color_or_depth, - uint32_t format, VkFilter filter, VkOffset3D offset, - VkExtent3D extents); - - // Queues commands to clear EDRAM contents with a solid color. - // The command buffer must not be inside of a render pass when calling this. - void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, - xenos::ColorRenderTargetFormat format, uint32_t pitch, - uint32_t height, xenos::MsaaSamples num_samples, - float* color); - // Queues commands to clear EDRAM contents with depth/stencil values. - // The command buffer must not be inside of a render pass when calling this. - void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, - uint32_t edram_base, - xenos::DepthRenderTargetFormat format, - uint32_t pitch, uint32_t height, - xenos::MsaaSamples num_samples, float depth, - uint32_t stencil); - // Queues commands to fill EDRAM contents with a constant value. - // The command buffer must not be inside of a render pass when calling this. - void FillEDRAM(VkCommandBuffer command_buffer, uint32_t value); - - private: - // Parses the current state into a configuration object. - bool ParseConfiguration(RenderConfiguration* config); - - // Finds a tile view. Returns nullptr if none found matching the key. - CachedTileView* FindTileView(const TileViewKey& view_key) const; - - // Gets or creates a tile view with the given parameters. - CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer, - const TileViewKey& view_key); - - void UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view, - bool load, bool insert_barrier = true); - - // Gets or creates a render pass and frame buffer for the given configuration. - // This attempts to reuse as much as possible across render passes and - // framebuffers. - bool ConfigureRenderPass(VkCommandBuffer command_buffer, - RenderConfiguration* config, - CachedRenderPass** out_render_pass, - CachedFramebuffer** out_framebuffer); - - RegisterFile* register_file_ = nullptr; - const ui::vulkan::VulkanProvider& provider_; - - // Entire 10MiB of EDRAM. - VkDeviceMemory edram_memory_ = nullptr; - // Buffer overlayed 1:1 with edram_memory_ to allow raw access. - VkBuffer edram_buffer_ = nullptr; - - // Cache of VkImage and VkImageView's for all of our EDRAM tilings. - // TODO(benvanik): non-linear lookup? Should only be a small number of these. - std::vector cached_tile_views_; - - // Cache of render passes based on formats. - std::vector cached_render_passes_; - - // Shadows of the registers that impact the render pass we choose. - // If the registers don't change between passes we can quickly reuse the - // previous one. - struct ShadowRegisters { - reg::RB_MODECONTROL rb_modecontrol; - reg::RB_SURFACE_INFO rb_surface_info; - reg::RB_COLOR_INFO rb_color_info; - reg::RB_COLOR_INFO rb_color1_info; - reg::RB_COLOR_INFO rb_color2_info; - reg::RB_COLOR_INFO rb_color3_info; - reg::RB_DEPTH_INFO rb_depth_info; - uint32_t pa_sc_window_scissor_tl; - uint32_t pa_sc_window_scissor_br; - - ShadowRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } shadow_registers_; - bool SetShadowRegister(uint32_t* dest, uint32_t register_name); - - // Configuration used for the current/previous Begin/End, representing the - // current shadow register state. - RenderState current_state_; - - // Only valid during a BeginRenderPass/EndRenderPass block. - VkCommandBuffer current_command_buffer_ = nullptr; -}; - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_VULKAN_RENDER_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/shaders/bytecode/.clang-format b/src/xenia/gpu/vulkan/shaders/bytecode/.clang-format deleted file mode 100644 index 9d159247d..000000000 --- a/src/xenia/gpu/vulkan/shaders/bytecode/.clang-format +++ /dev/null @@ -1,2 +0,0 @@ -DisableFormat: true -SortIncludes: false diff --git a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/dummy_ps.h b/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/dummy_ps.h deleted file mode 100644 index 4c6e77c40..000000000 --- a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/dummy_ps.h +++ /dev/null @@ -1,52 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 16104 -; Schema: 0 - OpCapability Shader - OpCapability Sampled1D - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Fragment %5663 "main" %3302 %4841 - OpExecutionMode %5663 OriginUpperLeft - OpDecorate %3302 Location 0 - OpDecorate %4841 Location 0 - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 - %uint = OpTypeInt 32 0 - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 - %3302 = OpVariable %_ptr_Input__arr_v4float_uint_16 Input - %uint_4 = OpConstant %uint 4 -%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 -%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 - %4841 = OpVariable %_ptr_Output__arr_v4float_uint_4 Output - %5663 = OpFunction %void None %1282 - %16103 = OpLabel - OpReturn - OpFunctionEnd -#endif - -const uint32_t dummy_ps[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x00003EE8, 0x00000000, 0x00020011, - 0x00000001, 0x00020011, 0x0000002B, 0x0006000B, 0x00000001, 0x4C534C47, - 0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001, - 0x0007000F, 0x00000004, 0x0000161F, 0x6E69616D, 0x00000000, 0x00000CE6, - 0x000012E9, 0x00030010, 0x0000161F, 0x00000007, 0x00040047, 0x00000CE6, - 0x0000001E, 0x00000000, 0x00040047, 0x000012E9, 0x0000001E, 0x00000000, - 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00030016, - 0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, - 0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B, - 0x00000A3A, 0x00000010, 0x0004001C, 0x0000056F, 0x0000001D, 0x00000A3A, - 0x00040020, 0x000007EC, 0x00000001, 0x0000056F, 0x0004003B, 0x000007EC, - 0x00000CE6, 0x00000001, 0x0004002B, 0x0000000B, 0x00000A16, 0x00000004, - 0x0004001C, 0x000005C3, 0x0000001D, 0x00000A16, 0x00040020, 0x00000840, - 0x00000003, 0x000005C3, 0x0004003B, 0x00000840, 0x000012E9, 0x00000003, - 0x00050036, 0x00000008, 0x0000161F, 0x00000000, 0x00000502, 0x000200F8, - 0x00003EE7, 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/line_quad_list_gs.h b/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/line_quad_list_gs.h deleted file mode 100644 index 1ccc3b88e..000000000 --- a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/line_quad_list_gs.h +++ /dev/null @@ -1,193 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 23916 -; Schema: 0 - OpCapability Geometry - OpCapability GeometryPointSize - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %5663 "main" %4930 %5305 %5430 %3302 %4044 %4656 %3736 - OpExecutionMode %5663 InputLinesAdjacency - OpExecutionMode %5663 Invocations 1 - OpExecutionMode %5663 OutputLineStrip - OpExecutionMode %5663 OutputVertices 5 - OpMemberDecorate %_struct_1032 0 BuiltIn Position - OpMemberDecorate %_struct_1032 1 BuiltIn PointSize - OpDecorate %_struct_1032 Block - OpMemberDecorate %_struct_1033 0 BuiltIn Position - OpMemberDecorate %_struct_1033 1 BuiltIn PointSize - OpDecorate %_struct_1033 Block - OpDecorate %5430 Location 0 - OpDecorate %3302 Location 0 - OpDecorate %4044 Location 16 - OpDecorate %4656 Location 17 - OpDecorate %3736 Location 16 - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%_struct_1032 = OpTypeStruct %v4float %float -%_ptr_Output__struct_1032 = OpTypePointer Output %_struct_1032 - %4930 = OpVariable %_ptr_Output__struct_1032 Output - %int = OpTypeInt 32 1 - %int_0 = OpConstant %int 0 -%_struct_1033 = OpTypeStruct %v4float %float - %uint = OpTypeInt 32 0 - %uint_4 = OpConstant %uint 4 -%_arr__struct_1033_uint_4 = OpTypeArray %_struct_1033 %uint_4 -%_ptr_Input__arr__struct_1033_uint_4 = OpTypePointer Input %_arr__struct_1033_uint_4 - %5305 = OpVariable %_ptr_Input__arr__struct_1033_uint_4 Input -%_ptr_Input_v4float = OpTypePointer Input %v4float -%_ptr_Output_v4float = OpTypePointer Output %v4float - %int_1 = OpConstant %int 1 -%_ptr_Input_float = OpTypePointer Input %float -%_ptr_Output_float = OpTypePointer Output %float - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 - %5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output -%_arr__arr_v4float_uint_16_uint_4 = OpTypeArray %_arr_v4float_uint_16 %uint_4 -%_ptr_Input__arr__arr_v4float_uint_16_uint_4 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_4 - %3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_4 Input -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 - %int_2 = OpConstant %int 2 - %int_3 = OpConstant %int 3 - %v2float = OpTypeVector %float 2 -%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 -%_ptr_Input__arr_v2float_uint_4 = OpTypePointer Input %_arr_v2float_uint_4 - %4044 = OpVariable %_ptr_Input__arr_v2float_uint_4 Input -%_arr_float_uint_4 = OpTypeArray %float %uint_4 -%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4 - %4656 = OpVariable %_ptr_Input__arr_float_uint_4 Input -%_ptr_Output_v2float = OpTypePointer Output %v2float - %3736 = OpVariable %_ptr_Output_v2float Output - %5663 = OpFunction %void None %1282 - %23915 = OpLabel - %7129 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0 - %15646 = OpLoad %v4float %7129 - %19981 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %19981 %15646 - %19905 = OpAccessChain %_ptr_Input_float %5305 %int_0 %int_1 - %7391 = OpLoad %float %19905 - %19982 = OpAccessChain %_ptr_Output_float %4930 %int_1 - OpStore %19982 %7391 - %19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0 - %10874 = OpLoad %_arr_v4float_uint_16 %19848 - OpStore %5430 %10874 - OpEmitVertex - %22812 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0 - %11398 = OpLoad %v4float %22812 - OpStore %19981 %11398 - %16622 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_1 - %7967 = OpLoad %float %16622 - OpStore %19982 %7967 - %16623 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_1 - %10875 = OpLoad %_arr_v4float_uint_16 %16623 - OpStore %5430 %10875 - OpEmitVertex - %22813 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0 - %11399 = OpLoad %v4float %22813 - OpStore %19981 %11399 - %16624 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_1 - %7968 = OpLoad %float %16624 - OpStore %19982 %7968 - %16625 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_2 - %10876 = OpLoad %_arr_v4float_uint_16 %16625 - OpStore %5430 %10876 - OpEmitVertex - %22814 = OpAccessChain %_ptr_Input_v4float %5305 %int_3 %int_0 - %11400 = OpLoad %v4float %22814 - OpStore %19981 %11400 - %16626 = OpAccessChain %_ptr_Input_float %5305 %int_3 %int_1 - %7969 = OpLoad %float %16626 - OpStore %19982 %7969 - %16627 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_3 - %10877 = OpLoad %_arr_v4float_uint_16 %16627 - OpStore %5430 %10877 - OpEmitVertex - OpStore %19981 %15646 - OpStore %19982 %7391 - OpStore %5430 %10874 - OpEmitVertex - OpEndPrimitive - OpReturn - OpFunctionEnd -#endif - -const uint32_t line_quad_list_gs[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x00005D6C, 0x00000000, 0x00020011, - 0x00000002, 0x00020011, 0x00000018, 0x0006000B, 0x00000001, 0x4C534C47, - 0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001, - 0x000C000F, 0x00000003, 0x0000161F, 0x6E69616D, 0x00000000, 0x00001342, - 0x000014B9, 0x00001536, 0x00000CE6, 0x00000FCC, 0x00001230, 0x00000E98, - 0x00030010, 0x0000161F, 0x00000015, 0x00040010, 0x0000161F, 0x00000000, - 0x00000001, 0x00030010, 0x0000161F, 0x0000001C, 0x00040010, 0x0000161F, - 0x0000001A, 0x00000005, 0x00050048, 0x00000408, 0x00000000, 0x0000000B, - 0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001, - 0x00030047, 0x00000408, 0x00000002, 0x00050048, 0x00000409, 0x00000000, - 0x0000000B, 0x00000000, 0x00050048, 0x00000409, 0x00000001, 0x0000000B, - 0x00000001, 0x00030047, 0x00000409, 0x00000002, 0x00040047, 0x00001536, - 0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, 0x0000001E, 0x00000000, - 0x00040047, 0x00000FCC, 0x0000001E, 0x00000010, 0x00040047, 0x00001230, - 0x0000001E, 0x00000011, 0x00040047, 0x00000E98, 0x0000001E, 0x00000010, - 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00030016, - 0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, - 0x0004001E, 0x00000408, 0x0000001D, 0x0000000D, 0x00040020, 0x00000685, - 0x00000003, 0x00000408, 0x0004003B, 0x00000685, 0x00001342, 0x00000003, - 0x00040015, 0x0000000C, 0x00000020, 0x00000001, 0x0004002B, 0x0000000C, - 0x00000A0B, 0x00000000, 0x0004001E, 0x00000409, 0x0000001D, 0x0000000D, - 0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B, - 0x00000A16, 0x00000004, 0x0004001C, 0x0000032E, 0x00000409, 0x00000A16, - 0x00040020, 0x000005AB, 0x00000001, 0x0000032E, 0x0004003B, 0x000005AB, - 0x000014B9, 0x00000001, 0x00040020, 0x0000029A, 0x00000001, 0x0000001D, - 0x00040020, 0x0000029B, 0x00000003, 0x0000001D, 0x0004002B, 0x0000000C, - 0x00000A0E, 0x00000001, 0x00040020, 0x0000028A, 0x00000001, 0x0000000D, - 0x00040020, 0x0000028B, 0x00000003, 0x0000000D, 0x0004002B, 0x0000000B, - 0x00000A3A, 0x00000010, 0x0004001C, 0x00000473, 0x0000001D, 0x00000A3A, - 0x00040020, 0x000006F0, 0x00000003, 0x00000473, 0x0004003B, 0x000006F0, - 0x00001536, 0x00000003, 0x0004001C, 0x00000973, 0x00000473, 0x00000A16, - 0x00040020, 0x0000002D, 0x00000001, 0x00000973, 0x0004003B, 0x0000002D, - 0x00000CE6, 0x00000001, 0x00040020, 0x000006F1, 0x00000001, 0x00000473, - 0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, 0x0004002B, 0x0000000C, - 0x00000A14, 0x00000003, 0x00040017, 0x00000013, 0x0000000D, 0x00000002, - 0x0004001C, 0x000002A2, 0x00000013, 0x00000A16, 0x00040020, 0x0000051F, - 0x00000001, 0x000002A2, 0x0004003B, 0x0000051F, 0x00000FCC, 0x00000001, - 0x0004001C, 0x00000248, 0x0000000D, 0x00000A16, 0x00040020, 0x000004C5, - 0x00000001, 0x00000248, 0x0004003B, 0x000004C5, 0x00001230, 0x00000001, - 0x00040020, 0x00000290, 0x00000003, 0x00000013, 0x0004003B, 0x00000290, - 0x00000E98, 0x00000003, 0x00050036, 0x00000008, 0x0000161F, 0x00000000, - 0x00000502, 0x000200F8, 0x00005D6B, 0x00060041, 0x0000029A, 0x00001BD9, - 0x000014B9, 0x00000A0B, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00003D1E, - 0x00001BD9, 0x00050041, 0x0000029B, 0x00004E0D, 0x00001342, 0x00000A0B, - 0x0003003E, 0x00004E0D, 0x00003D1E, 0x00060041, 0x0000028A, 0x00004DC1, - 0x000014B9, 0x00000A0B, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001CDF, - 0x00004DC1, 0x00050041, 0x0000028B, 0x00004E0E, 0x00001342, 0x00000A0E, - 0x0003003E, 0x00004E0E, 0x00001CDF, 0x00050041, 0x000006F1, 0x00004D88, - 0x00000CE6, 0x00000A0B, 0x0004003D, 0x00000473, 0x00002A7A, 0x00004D88, - 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x00060041, 0x0000029A, - 0x0000591C, 0x000014B9, 0x00000A0E, 0x00000A0B, 0x0004003D, 0x0000001D, - 0x00002C86, 0x0000591C, 0x0003003E, 0x00004E0D, 0x00002C86, 0x00060041, - 0x0000028A, 0x000040EE, 0x000014B9, 0x00000A0E, 0x00000A0E, 0x0004003D, - 0x0000000D, 0x00001F1F, 0x000040EE, 0x0003003E, 0x00004E0E, 0x00001F1F, - 0x00050041, 0x000006F1, 0x000040EF, 0x00000CE6, 0x00000A0E, 0x0004003D, - 0x00000473, 0x00002A7B, 0x000040EF, 0x0003003E, 0x00001536, 0x00002A7B, - 0x000100DA, 0x00060041, 0x0000029A, 0x0000591D, 0x000014B9, 0x00000A11, - 0x00000A0B, 0x0004003D, 0x0000001D, 0x00002C87, 0x0000591D, 0x0003003E, - 0x00004E0D, 0x00002C87, 0x00060041, 0x0000028A, 0x000040F0, 0x000014B9, - 0x00000A11, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001F20, 0x000040F0, - 0x0003003E, 0x00004E0E, 0x00001F20, 0x00050041, 0x000006F1, 0x000040F1, - 0x00000CE6, 0x00000A11, 0x0004003D, 0x00000473, 0x00002A7C, 0x000040F1, - 0x0003003E, 0x00001536, 0x00002A7C, 0x000100DA, 0x00060041, 0x0000029A, - 0x0000591E, 0x000014B9, 0x00000A14, 0x00000A0B, 0x0004003D, 0x0000001D, - 0x00002C88, 0x0000591E, 0x0003003E, 0x00004E0D, 0x00002C88, 0x00060041, - 0x0000028A, 0x000040F2, 0x000014B9, 0x00000A14, 0x00000A0E, 0x0004003D, - 0x0000000D, 0x00001F21, 0x000040F2, 0x0003003E, 0x00004E0E, 0x00001F21, - 0x00050041, 0x000006F1, 0x000040F3, 0x00000CE6, 0x00000A14, 0x0004003D, - 0x00000473, 0x00002A7D, 0x000040F3, 0x0003003E, 0x00001536, 0x00002A7D, - 0x000100DA, 0x0003003E, 0x00004E0D, 0x00003D1E, 0x0003003E, 0x00004E0E, - 0x00001CDF, 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x000100DB, - 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/point_list_gs.h b/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/point_list_gs.h deleted file mode 100644 index 4ca4fcd80..000000000 --- a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/point_list_gs.h +++ /dev/null @@ -1,244 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 24916 -; Schema: 0 - OpCapability Geometry - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %5663 "main" %5305 %4065 %4930 %5430 %3302 %5753 %5479 - OpExecutionMode %5663 InputPoints - OpExecutionMode %5663 Invocations 1 - OpExecutionMode %5663 OutputTriangleStrip - OpExecutionMode %5663 OutputVertices 4 - OpMemberDecorate %_struct_1017 0 BuiltIn Position - OpDecorate %_struct_1017 Block - OpMemberDecorate %_struct_1287 0 Offset 0 - OpMemberDecorate %_struct_1287 1 Offset 16 - OpMemberDecorate %_struct_1287 2 Offset 32 - OpMemberDecorate %_struct_1287 3 Offset 48 - OpMemberDecorate %_struct_1287 4 Offset 64 - OpDecorate %_struct_1287 Block - OpDecorate %4065 Location 17 - OpMemberDecorate %_struct_1018 0 BuiltIn Position - OpDecorate %_struct_1018 Block - OpDecorate %5430 Location 0 - OpDecorate %3302 Location 0 - OpDecorate %5753 Location 16 - OpDecorate %5479 Location 16 - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%_struct_1017 = OpTypeStruct %v4float - %uint = OpTypeInt 32 0 - %uint_1 = OpConstant %uint 1 -%_arr__struct_1017_uint_1 = OpTypeArray %_struct_1017 %uint_1 -%_ptr_Input__arr__struct_1017_uint_1 = OpTypePointer Input %_arr__struct_1017_uint_1 - %5305 = OpVariable %_ptr_Input__arr__struct_1017_uint_1 Input - %int = OpTypeInt 32 1 - %int_0 = OpConstant %int 0 -%_ptr_Input_v4float = OpTypePointer Input %v4float - %v2float = OpTypeVector %float 2 -%_ptr_Function_v2float = OpTypePointer Function %v2float -%_struct_1287 = OpTypeStruct %v4float %v4float %v4float %v4float %uint -%_ptr_PushConstant__struct_1287 = OpTypePointer PushConstant %_struct_1287 - %3463 = OpVariable %_ptr_PushConstant__struct_1287 PushConstant - %int_2 = OpConstant %int 2 -%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float -%_arr_float_uint_1 = OpTypeArray %float %uint_1 -%_ptr_Input__arr_float_uint_1 = OpTypePointer Input %_arr_float_uint_1 - %4065 = OpVariable %_ptr_Input__arr_float_uint_1 Input -%_ptr_Input_float = OpTypePointer Input %float - %float_0 = OpConstant %float 0 - %bool = OpTypeBool - %int_4 = OpConstant %int 4 -%_struct_1018 = OpTypeStruct %v4float -%_ptr_Output__struct_1018 = OpTypePointer Output %_struct_1018 - %4930 = OpVariable %_ptr_Output__struct_1018 Output - %uint_4 = OpConstant %uint 4 -%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 - %float_n1 = OpConstant %float -1 - %float_1 = OpConstant %float 1 - %73 = OpConstantComposite %v2float %float_n1 %float_1 - %768 = OpConstantComposite %v2float %float_1 %float_1 - %74 = OpConstantComposite %v2float %float_n1 %float_n1 - %769 = OpConstantComposite %v2float %float_1 %float_n1 - %2941 = OpConstantComposite %_arr_v2float_uint_4 %73 %768 %74 %769 -%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4 -%_ptr_Output_v4float = OpTypePointer Output %v4float - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 - %5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output -%_arr__arr_v4float_uint_16_uint_1 = OpTypeArray %_arr_v4float_uint_16 %uint_1 -%_ptr_Input__arr__arr_v4float_uint_16_uint_1 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_1 - %3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_1 Input -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 -%_ptr_Output_v2float = OpTypePointer Output %v2float - %5753 = OpVariable %_ptr_Output_v2float Output - %1823 = OpConstantComposite %v2float %float_0 %float_0 - %int_1 = OpConstant %int 1 -%_arr_v2float_uint_1 = OpTypeArray %v2float %uint_1 -%_ptr_Input__arr_v2float_uint_1 = OpTypePointer Input %_arr_v2float_uint_1 - %5479 = OpVariable %_ptr_Input__arr_v2float_uint_1 Input - %5663 = OpFunction %void None %1282 - %24915 = OpLabel - %18491 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function - %5238 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function - %22270 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0 - %8181 = OpLoad %v4float %22270 - %20420 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_2 - %20062 = OpLoad %v4float %20420 - %19110 = OpVectorShuffle %v2float %20062 %20062 0 1 - %7988 = OpAccessChain %_ptr_Input_float %4065 %int_0 - %13069 = OpLoad %float %7988 - %23515 = OpFOrdGreaterThan %bool %13069 %float_0 - OpSelectionMerge %16839 None - OpBranchConditional %23515 %13106 %16839 - %13106 = OpLabel - %18836 = OpCompositeConstruct %v2float %13069 %13069 - OpBranch %16839 - %16839 = OpLabel - %19748 = OpPhi %v2float %19110 %24915 %18836 %13106 - %24067 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_0 - %15439 = OpLoad %v4float %24067 - %10399 = OpVectorShuffle %v2float %15439 %15439 2 3 - %24282 = OpFDiv %v2float %19748 %10399 - OpBranch %6318 - %6318 = OpLabel - %22958 = OpPhi %int %int_0 %16839 %11651 %12148 - %24788 = OpSLessThan %bool %22958 %int_4 - OpLoopMerge %12265 %12148 None - OpBranchConditional %24788 %12148 %12265 - %12148 = OpLabel - %17761 = OpVectorShuffle %v2float %8181 %8181 0 1 - OpStore %18491 %2941 - %19574 = OpAccessChain %_ptr_Function_v2float %18491 %22958 - %15971 = OpLoad %v2float %19574 - %17243 = OpFMul %v2float %15971 %24282 - %16594 = OpFAdd %v2float %17761 %17243 - %10618 = OpCompositeExtract %float %16594 0 - %14087 = OpCompositeExtract %float %16594 1 - %7641 = OpCompositeExtract %float %8181 2 - %7529 = OpCompositeExtract %float %8181 3 - %18260 = OpCompositeConstruct %v4float %10618 %14087 %7641 %7529 - %8483 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %8483 %18260 - %19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0 - %7910 = OpLoad %_arr_v4float_uint_16 %19848 - OpStore %5430 %7910 - OpStore %5238 %2941 - %13290 = OpAccessChain %_ptr_Function_v2float %5238 %22958 - %19207 = OpLoad %v2float %13290 - %8973 = OpExtInst %v2float %1 FMax %19207 %1823 - OpStore %5753 %8973 - OpEmitVertex - %11651 = OpIAdd %int %22958 %int_1 - OpBranch %6318 - %12265 = OpLabel - OpEndPrimitive - OpReturn - OpFunctionEnd -#endif - -const uint32_t point_list_gs[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x00006154, 0x00000000, 0x00020011, - 0x00000002, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, - 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x000C000F, 0x00000003, - 0x0000161F, 0x6E69616D, 0x00000000, 0x000014B9, 0x00000FE1, 0x00001342, - 0x00001536, 0x00000CE6, 0x00001679, 0x00001567, 0x00030010, 0x0000161F, - 0x00000013, 0x00040010, 0x0000161F, 0x00000000, 0x00000001, 0x00030010, - 0x0000161F, 0x0000001D, 0x00040010, 0x0000161F, 0x0000001A, 0x00000004, - 0x00050048, 0x000003F9, 0x00000000, 0x0000000B, 0x00000000, 0x00030047, - 0x000003F9, 0x00000002, 0x00050048, 0x00000507, 0x00000000, 0x00000023, - 0x00000000, 0x00050048, 0x00000507, 0x00000001, 0x00000023, 0x00000010, - 0x00050048, 0x00000507, 0x00000002, 0x00000023, 0x00000020, 0x00050048, - 0x00000507, 0x00000003, 0x00000023, 0x00000030, 0x00050048, 0x00000507, - 0x00000004, 0x00000023, 0x00000040, 0x00030047, 0x00000507, 0x00000002, - 0x00040047, 0x00000FE1, 0x0000001E, 0x00000011, 0x00050048, 0x000003FA, - 0x00000000, 0x0000000B, 0x00000000, 0x00030047, 0x000003FA, 0x00000002, - 0x00040047, 0x00001536, 0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, - 0x0000001E, 0x00000000, 0x00040047, 0x00001679, 0x0000001E, 0x00000010, - 0x00040047, 0x00001567, 0x0000001E, 0x00000010, 0x00020013, 0x00000008, - 0x00030021, 0x00000502, 0x00000008, 0x00030016, 0x0000000D, 0x00000020, - 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0003001E, 0x000003F9, - 0x0000001D, 0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, - 0x0000000B, 0x00000A0D, 0x00000001, 0x0004001C, 0x0000023D, 0x000003F9, - 0x00000A0D, 0x00040020, 0x000004BA, 0x00000001, 0x0000023D, 0x0004003B, - 0x000004BA, 0x000014B9, 0x00000001, 0x00040015, 0x0000000C, 0x00000020, - 0x00000001, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x00040020, - 0x0000029A, 0x00000001, 0x0000001D, 0x00040017, 0x00000013, 0x0000000D, - 0x00000002, 0x00040020, 0x00000290, 0x00000007, 0x00000013, 0x0007001E, - 0x00000507, 0x0000001D, 0x0000001D, 0x0000001D, 0x0000001D, 0x0000000B, - 0x00040020, 0x00000784, 0x00000009, 0x00000507, 0x0004003B, 0x00000784, - 0x00000D87, 0x00000009, 0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, - 0x00040020, 0x0000029B, 0x00000009, 0x0000001D, 0x0004001C, 0x00000239, - 0x0000000D, 0x00000A0D, 0x00040020, 0x000004B6, 0x00000001, 0x00000239, - 0x0004003B, 0x000004B6, 0x00000FE1, 0x00000001, 0x00040020, 0x0000028A, - 0x00000001, 0x0000000D, 0x0004002B, 0x0000000D, 0x00000A0C, 0x00000000, - 0x00020014, 0x00000009, 0x0004002B, 0x0000000C, 0x00000A17, 0x00000004, - 0x0003001E, 0x000003FA, 0x0000001D, 0x00040020, 0x00000676, 0x00000003, - 0x000003FA, 0x0004003B, 0x00000676, 0x00001342, 0x00000003, 0x0004002B, - 0x0000000B, 0x00000A16, 0x00000004, 0x0004001C, 0x000004D3, 0x00000013, - 0x00000A16, 0x0004002B, 0x0000000D, 0x00000341, 0xBF800000, 0x0004002B, - 0x0000000D, 0x0000008A, 0x3F800000, 0x0005002C, 0x00000013, 0x00000049, - 0x00000341, 0x0000008A, 0x0005002C, 0x00000013, 0x00000300, 0x0000008A, - 0x0000008A, 0x0005002C, 0x00000013, 0x0000004A, 0x00000341, 0x00000341, - 0x0005002C, 0x00000013, 0x00000301, 0x0000008A, 0x00000341, 0x0007002C, - 0x000004D3, 0x00000B7D, 0x00000049, 0x00000300, 0x0000004A, 0x00000301, - 0x00040020, 0x00000750, 0x00000007, 0x000004D3, 0x00040020, 0x0000029C, - 0x00000003, 0x0000001D, 0x0004002B, 0x0000000B, 0x00000A3A, 0x00000010, - 0x0004001C, 0x00000989, 0x0000001D, 0x00000A3A, 0x00040020, 0x00000043, - 0x00000003, 0x00000989, 0x0004003B, 0x00000043, 0x00001536, 0x00000003, - 0x0004001C, 0x00000A2E, 0x00000989, 0x00000A0D, 0x00040020, 0x000000E8, - 0x00000001, 0x00000A2E, 0x0004003B, 0x000000E8, 0x00000CE6, 0x00000001, - 0x00040020, 0x00000044, 0x00000001, 0x00000989, 0x00040020, 0x00000291, - 0x00000003, 0x00000013, 0x0004003B, 0x00000291, 0x00001679, 0x00000003, - 0x0005002C, 0x00000013, 0x0000071F, 0x00000A0C, 0x00000A0C, 0x0004002B, - 0x0000000C, 0x00000A0E, 0x00000001, 0x0004001C, 0x00000281, 0x00000013, - 0x00000A0D, 0x00040020, 0x000004FE, 0x00000001, 0x00000281, 0x0004003B, - 0x000004FE, 0x00001567, 0x00000001, 0x00050036, 0x00000008, 0x0000161F, - 0x00000000, 0x00000502, 0x000200F8, 0x00006153, 0x0004003B, 0x00000750, - 0x0000483B, 0x00000007, 0x0004003B, 0x00000750, 0x00001476, 0x00000007, - 0x00060041, 0x0000029A, 0x000056FE, 0x000014B9, 0x00000A0B, 0x00000A0B, - 0x0004003D, 0x0000001D, 0x00001FF5, 0x000056FE, 0x00050041, 0x0000029B, - 0x00004FC4, 0x00000D87, 0x00000A11, 0x0004003D, 0x0000001D, 0x00004E5E, - 0x00004FC4, 0x0007004F, 0x00000013, 0x00004AA6, 0x00004E5E, 0x00004E5E, - 0x00000000, 0x00000001, 0x00050041, 0x0000028A, 0x00001F34, 0x00000FE1, - 0x00000A0B, 0x0004003D, 0x0000000D, 0x0000330D, 0x00001F34, 0x000500BA, - 0x00000009, 0x00005BDB, 0x0000330D, 0x00000A0C, 0x000300F7, 0x000041C7, - 0x00000000, 0x000400FA, 0x00005BDB, 0x00003332, 0x000041C7, 0x000200F8, - 0x00003332, 0x00050050, 0x00000013, 0x00004994, 0x0000330D, 0x0000330D, - 0x000200F9, 0x000041C7, 0x000200F8, 0x000041C7, 0x000700F5, 0x00000013, - 0x00004D24, 0x00004AA6, 0x00006153, 0x00004994, 0x00003332, 0x00050041, - 0x0000029B, 0x00005E03, 0x00000D87, 0x00000A0B, 0x0004003D, 0x0000001D, - 0x00003C4F, 0x00005E03, 0x0007004F, 0x00000013, 0x0000289F, 0x00003C4F, - 0x00003C4F, 0x00000002, 0x00000003, 0x00050088, 0x00000013, 0x00005EDA, - 0x00004D24, 0x0000289F, 0x000200F9, 0x000018AE, 0x000200F8, 0x000018AE, - 0x000700F5, 0x0000000C, 0x000059AE, 0x00000A0B, 0x000041C7, 0x00002D83, - 0x00002F74, 0x000500B1, 0x00000009, 0x000060D4, 0x000059AE, 0x00000A17, - 0x000400F6, 0x00002FE9, 0x00002F74, 0x00000000, 0x000400FA, 0x000060D4, - 0x00002F74, 0x00002FE9, 0x000200F8, 0x00002F74, 0x0007004F, 0x00000013, - 0x00004561, 0x00001FF5, 0x00001FF5, 0x00000000, 0x00000001, 0x0003003E, - 0x0000483B, 0x00000B7D, 0x00050041, 0x00000290, 0x00004C76, 0x0000483B, - 0x000059AE, 0x0004003D, 0x00000013, 0x00003E63, 0x00004C76, 0x00050085, - 0x00000013, 0x0000435B, 0x00003E63, 0x00005EDA, 0x00050081, 0x00000013, - 0x000040D2, 0x00004561, 0x0000435B, 0x00050051, 0x0000000D, 0x0000297A, - 0x000040D2, 0x00000000, 0x00050051, 0x0000000D, 0x00003707, 0x000040D2, - 0x00000001, 0x00050051, 0x0000000D, 0x00001DD9, 0x00001FF5, 0x00000002, - 0x00050051, 0x0000000D, 0x00001D69, 0x00001FF5, 0x00000003, 0x00070050, - 0x0000001D, 0x00004754, 0x0000297A, 0x00003707, 0x00001DD9, 0x00001D69, - 0x00050041, 0x0000029C, 0x00002123, 0x00001342, 0x00000A0B, 0x0003003E, - 0x00002123, 0x00004754, 0x00050041, 0x00000044, 0x00004D88, 0x00000CE6, - 0x00000A0B, 0x0004003D, 0x00000989, 0x00001EE6, 0x00004D88, 0x0003003E, - 0x00001536, 0x00001EE6, 0x0003003E, 0x00001476, 0x00000B7D, 0x00050041, - 0x00000290, 0x000033EA, 0x00001476, 0x000059AE, 0x0004003D, 0x00000013, - 0x00004B07, 0x000033EA, 0x0007000C, 0x00000013, 0x0000230D, 0x00000001, - 0x00000028, 0x00004B07, 0x0000071F, 0x0003003E, 0x00001679, 0x0000230D, - 0x000100DA, 0x00050080, 0x0000000C, 0x00002D83, 0x000059AE, 0x00000A0E, - 0x000200F9, 0x000018AE, 0x000200F8, 0x00002FE9, 0x000100DB, 0x000100FD, - 0x00010038, -}; diff --git a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/quad_list_gs.h b/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/quad_list_gs.h deleted file mode 100644 index 8691cd5e9..000000000 --- a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/quad_list_gs.h +++ /dev/null @@ -1,170 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 24789 -; Schema: 0 - OpCapability Geometry - OpCapability GeometryPointSize - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %5663 "main" %4930 %5305 %5430 %3302 %4044 %4656 %3736 - OpExecutionMode %5663 InputLinesAdjacency - OpExecutionMode %5663 Invocations 1 - OpExecutionMode %5663 OutputTriangleStrip - OpExecutionMode %5663 OutputVertices 4 - OpMemberDecorate %_struct_1032 0 BuiltIn Position - OpMemberDecorate %_struct_1032 1 BuiltIn PointSize - OpDecorate %_struct_1032 Block - OpMemberDecorate %_struct_1033 0 BuiltIn Position - OpMemberDecorate %_struct_1033 1 BuiltIn PointSize - OpDecorate %_struct_1033 Block - OpDecorate %5430 Location 0 - OpDecorate %3302 Location 0 - OpDecorate %4044 Location 16 - OpDecorate %4656 Location 17 - OpDecorate %3736 Location 16 - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %int = OpTypeInt 32 1 -%_ptr_Function_int = OpTypePointer Function %int - %int_0 = OpConstant %int 0 - %int_4 = OpConstant %int 4 - %bool = OpTypeBool - %uint = OpTypeInt 32 0 - %uint_4 = OpConstant %uint 4 -%_arr_int_uint_4 = OpTypeArray %int %uint_4 - %int_1 = OpConstant %int 1 - %int_3 = OpConstant %int 3 - %int_2 = OpConstant %int 2 - %566 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_3 %int_2 -%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4 - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%_struct_1032 = OpTypeStruct %v4float %float -%_ptr_Output__struct_1032 = OpTypePointer Output %_struct_1032 - %4930 = OpVariable %_ptr_Output__struct_1032 Output -%_struct_1033 = OpTypeStruct %v4float %float -%_arr__struct_1033_uint_4 = OpTypeArray %_struct_1033 %uint_4 -%_ptr_Input__arr__struct_1033_uint_4 = OpTypePointer Input %_arr__struct_1033_uint_4 - %5305 = OpVariable %_ptr_Input__arr__struct_1033_uint_4 Input -%_ptr_Input_v4float = OpTypePointer Input %v4float -%_ptr_Output_v4float = OpTypePointer Output %v4float -%_ptr_Input_float = OpTypePointer Input %float -%_ptr_Output_float = OpTypePointer Output %float - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 - %5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output -%_arr__arr_v4float_uint_16_uint_4 = OpTypeArray %_arr_v4float_uint_16 %uint_4 -%_ptr_Input__arr__arr_v4float_uint_16_uint_4 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_4 - %3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_4 Input -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 - %v2float = OpTypeVector %float 2 -%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 -%_ptr_Input__arr_v2float_uint_4 = OpTypePointer Input %_arr_v2float_uint_4 - %4044 = OpVariable %_ptr_Input__arr_v2float_uint_4 Input -%_arr_float_uint_4 = OpTypeArray %float %uint_4 -%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4 - %4656 = OpVariable %_ptr_Input__arr_float_uint_4 Input -%_ptr_Output_v2float = OpTypePointer Output %v2float - %3736 = OpVariable %_ptr_Output_v2float Output - %5663 = OpFunction %void None %1282 - %9454 = OpLabel - %5238 = OpVariable %_ptr_Function__arr_int_uint_4 Function - OpBranch %18173 - %18173 = OpLabel - %22958 = OpPhi %int %int_0 %9454 %11651 %15146 - %24788 = OpSLessThan %bool %22958 %int_4 - OpLoopMerge %12265 %15146 None - OpBranchConditional %24788 %15146 %12265 - %15146 = OpLabel - OpStore %5238 %566 - %22512 = OpAccessChain %_ptr_Function_int %5238 %22958 - %7372 = OpLoad %int %22512 - %20154 = OpAccessChain %_ptr_Input_v4float %5305 %7372 %int_0 - %22427 = OpLoad %v4float %20154 - %19981 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %19981 %22427 - %19905 = OpAccessChain %_ptr_Input_float %5305 %7372 %int_1 - %7391 = OpLoad %float %19905 - %19982 = OpAccessChain %_ptr_Output_float %4930 %int_1 - OpStore %19982 %7391 - %19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %7372 - %10874 = OpLoad %_arr_v4float_uint_16 %19848 - OpStore %5430 %10874 - OpEmitVertex - %11651 = OpIAdd %int %22958 %int_1 - OpBranch %18173 - %12265 = OpLabel - OpEndPrimitive - OpReturn - OpFunctionEnd -#endif - -const uint32_t quad_list_gs[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x000060D5, 0x00000000, 0x00020011, - 0x00000002, 0x00020011, 0x00000018, 0x0006000B, 0x00000001, 0x4C534C47, - 0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001, - 0x000C000F, 0x00000003, 0x0000161F, 0x6E69616D, 0x00000000, 0x00001342, - 0x000014B9, 0x00001536, 0x00000CE6, 0x00000FCC, 0x00001230, 0x00000E98, - 0x00030010, 0x0000161F, 0x00000015, 0x00040010, 0x0000161F, 0x00000000, - 0x00000001, 0x00030010, 0x0000161F, 0x0000001D, 0x00040010, 0x0000161F, - 0x0000001A, 0x00000004, 0x00050048, 0x00000408, 0x00000000, 0x0000000B, - 0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001, - 0x00030047, 0x00000408, 0x00000002, 0x00050048, 0x00000409, 0x00000000, - 0x0000000B, 0x00000000, 0x00050048, 0x00000409, 0x00000001, 0x0000000B, - 0x00000001, 0x00030047, 0x00000409, 0x00000002, 0x00040047, 0x00001536, - 0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, 0x0000001E, 0x00000000, - 0x00040047, 0x00000FCC, 0x0000001E, 0x00000010, 0x00040047, 0x00001230, - 0x0000001E, 0x00000011, 0x00040047, 0x00000E98, 0x0000001E, 0x00000010, - 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00040015, - 0x0000000C, 0x00000020, 0x00000001, 0x00040020, 0x00000289, 0x00000007, - 0x0000000C, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x0004002B, - 0x0000000C, 0x00000A17, 0x00000004, 0x00020014, 0x00000009, 0x00040015, - 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B, 0x00000A16, - 0x00000004, 0x0004001C, 0x00000251, 0x0000000C, 0x00000A16, 0x0004002B, - 0x0000000C, 0x00000A0E, 0x00000001, 0x0004002B, 0x0000000C, 0x00000A14, - 0x00000003, 0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, 0x0007002C, - 0x00000251, 0x00000236, 0x00000A0B, 0x00000A0E, 0x00000A14, 0x00000A11, - 0x00040020, 0x000004CE, 0x00000007, 0x00000251, 0x00030016, 0x0000000D, - 0x00000020, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0004001E, - 0x00000408, 0x0000001D, 0x0000000D, 0x00040020, 0x00000685, 0x00000003, - 0x00000408, 0x0004003B, 0x00000685, 0x00001342, 0x00000003, 0x0004001E, - 0x00000409, 0x0000001D, 0x0000000D, 0x0004001C, 0x000003A8, 0x00000409, - 0x00000A16, 0x00040020, 0x00000625, 0x00000001, 0x000003A8, 0x0004003B, - 0x00000625, 0x000014B9, 0x00000001, 0x00040020, 0x0000029A, 0x00000001, - 0x0000001D, 0x00040020, 0x0000029B, 0x00000003, 0x0000001D, 0x00040020, - 0x0000028A, 0x00000001, 0x0000000D, 0x00040020, 0x0000028B, 0x00000003, - 0x0000000D, 0x0004002B, 0x0000000B, 0x00000A3A, 0x00000010, 0x0004001C, - 0x00000656, 0x0000001D, 0x00000A3A, 0x00040020, 0x000008D3, 0x00000003, - 0x00000656, 0x0004003B, 0x000008D3, 0x00001536, 0x00000003, 0x0004001C, - 0x00000503, 0x00000656, 0x00000A16, 0x00040020, 0x0000077F, 0x00000001, - 0x00000503, 0x0004003B, 0x0000077F, 0x00000CE6, 0x00000001, 0x00040020, - 0x000008D4, 0x00000001, 0x00000656, 0x00040017, 0x00000013, 0x0000000D, - 0x00000002, 0x0004001C, 0x000002E4, 0x00000013, 0x00000A16, 0x00040020, - 0x00000561, 0x00000001, 0x000002E4, 0x0004003B, 0x00000561, 0x00000FCC, - 0x00000001, 0x0004001C, 0x00000266, 0x0000000D, 0x00000A16, 0x00040020, - 0x000004E3, 0x00000001, 0x00000266, 0x0004003B, 0x000004E3, 0x00001230, - 0x00000001, 0x00040020, 0x00000290, 0x00000003, 0x00000013, 0x0004003B, - 0x00000290, 0x00000E98, 0x00000003, 0x00050036, 0x00000008, 0x0000161F, - 0x00000000, 0x00000502, 0x000200F8, 0x000024EE, 0x0004003B, 0x000004CE, - 0x00001476, 0x00000007, 0x000200F9, 0x000046FD, 0x000200F8, 0x000046FD, - 0x000700F5, 0x0000000C, 0x000059AE, 0x00000A0B, 0x000024EE, 0x00002D83, - 0x00003B2A, 0x000500B1, 0x00000009, 0x000060D4, 0x000059AE, 0x00000A17, - 0x000400F6, 0x00002FE9, 0x00003B2A, 0x00000000, 0x000400FA, 0x000060D4, - 0x00003B2A, 0x00002FE9, 0x000200F8, 0x00003B2A, 0x0003003E, 0x00001476, - 0x00000236, 0x00050041, 0x00000289, 0x000057F0, 0x00001476, 0x000059AE, - 0x0004003D, 0x0000000C, 0x00001CCC, 0x000057F0, 0x00060041, 0x0000029A, - 0x00004EBA, 0x000014B9, 0x00001CCC, 0x00000A0B, 0x0004003D, 0x0000001D, - 0x0000579B, 0x00004EBA, 0x00050041, 0x0000029B, 0x00004E0D, 0x00001342, - 0x00000A0B, 0x0003003E, 0x00004E0D, 0x0000579B, 0x00060041, 0x0000028A, - 0x00004DC1, 0x000014B9, 0x00001CCC, 0x00000A0E, 0x0004003D, 0x0000000D, - 0x00001CDF, 0x00004DC1, 0x00050041, 0x0000028B, 0x00004E0E, 0x00001342, - 0x00000A0E, 0x0003003E, 0x00004E0E, 0x00001CDF, 0x00050041, 0x000008D4, - 0x00004D88, 0x00000CE6, 0x00001CCC, 0x0004003D, 0x00000656, 0x00002A7A, - 0x00004D88, 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x00050080, - 0x0000000C, 0x00002D83, 0x000059AE, 0x00000A0E, 0x000200F9, 0x000046FD, - 0x000200F8, 0x00002FE9, 0x000100DB, 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/rect_list_gs.h b/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/rect_list_gs.h deleted file mode 100644 index 328f253dd..000000000 --- a/src/xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/rect_list_gs.h +++ /dev/null @@ -1,430 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 24790 -; Schema: 0 - OpCapability Geometry - OpCapability GeometryPointSize - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %5663 "main" %5305 %4930 %5430 %3302 %4044 %4656 %3736 - OpExecutionMode %5663 Triangles - OpExecutionMode %5663 Invocations 1 - OpExecutionMode %5663 OutputTriangleStrip - OpExecutionMode %5663 OutputVertices 6 - OpMemberDecorate %_struct_1032 0 BuiltIn Position - OpMemberDecorate %_struct_1032 1 BuiltIn PointSize - OpDecorate %_struct_1032 Block - OpMemberDecorate %_struct_1033 0 BuiltIn Position - OpMemberDecorate %_struct_1033 1 BuiltIn PointSize - OpDecorate %_struct_1033 Block - OpDecorate %5430 Location 0 - OpDecorate %3302 Location 0 - OpDecorate %4044 Location 16 - OpDecorate %4656 Location 17 - OpDecorate %3736 Location 16 - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v2float = OpTypeVector %float 2 - %bool = OpTypeBool - %v2bool = OpTypeVector %bool 2 - %v4float = OpTypeVector %float 4 -%_struct_1032 = OpTypeStruct %v4float %float - %uint = OpTypeInt 32 0 - %uint_3 = OpConstant %uint 3 -%_arr__struct_1032_uint_3 = OpTypeArray %_struct_1032 %uint_3 -%_ptr_Input__arr__struct_1032_uint_3 = OpTypePointer Input %_arr__struct_1032_uint_3 - %5305 = OpVariable %_ptr_Input__arr__struct_1032_uint_3 Input - %int = OpTypeInt 32 1 - %int_0 = OpConstant %int 0 - %int_2 = OpConstant %int 2 - %uint_0 = OpConstant %uint 0 -%_ptr_Input_float = OpTypePointer Input %float - %int_1 = OpConstant %int 1 - %uint_1 = OpConstant %uint 1 -%float_0_00100000005 = OpConstant %float 0.00100000005 -%_ptr_Input_v4float = OpTypePointer Input %v4float -%_struct_1033 = OpTypeStruct %v4float %float -%_ptr_Output__struct_1033 = OpTypePointer Output %_struct_1033 - %4930 = OpVariable %_ptr_Output__struct_1033 Output -%_ptr_Output_v4float = OpTypePointer Output %v4float -%_ptr_Output_float = OpTypePointer Output %float - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 - %5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output -%_arr__arr_v4float_uint_16_uint_3 = OpTypeArray %_arr_v4float_uint_16 %uint_3 -%_ptr_Input__arr__arr_v4float_uint_16_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_3 - %3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_3 Input -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 - %int_16 = OpConstant %int 16 -%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3 -%_ptr_Input__arr_v2float_uint_3 = OpTypePointer Input %_arr_v2float_uint_3 - %4044 = OpVariable %_ptr_Input__arr_v2float_uint_3 Input -%_arr_float_uint_3 = OpTypeArray %float %uint_3 -%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3 - %4656 = OpVariable %_ptr_Input__arr_float_uint_3 Input -%_ptr_Output_v2float = OpTypePointer Output %v2float - %3736 = OpVariable %_ptr_Output_v2float Output - %1759 = OpConstantComposite %v2float %float_0_00100000005 %float_0_00100000005 - %5663 = OpFunction %void None %1282 - %23915 = OpLabel - %7129 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_0 %uint_0 - %15627 = OpLoad %float %7129 - %20439 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_0 %uint_1 - %19889 = OpLoad %float %20439 - %10917 = OpCompositeConstruct %v2float %15627 %19889 - %24777 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0 - %7883 = OpLoad %v4float %24777 - %6765 = OpVectorShuffle %v2float %7883 %7883 0 1 - %15739 = OpFSub %v2float %6765 %10917 - %7757 = OpExtInst %v2float %1 FAbs %15739 - %19021 = OpFOrdLessThanEqual %v2bool %7757 %1759 - %15711 = OpAll %bool %19021 - %11402 = OpLogicalNot %bool %15711 - OpSelectionMerge %13286 None - OpBranchConditional %11402 %12129 %13286 - %12129 = OpLabel - %18210 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_0 %uint_0 - %15628 = OpLoad %float %18210 - %20440 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_0 %uint_1 - %21143 = OpLoad %float %20440 - %17643 = OpCompositeConstruct %v2float %15628 %21143 - %15490 = OpFSub %v2float %6765 %17643 - %24406 = OpExtInst %v2float %1 FAbs %15490 - %20560 = OpFOrdLessThanEqual %v2bool %24406 %1759 - %20788 = OpAll %bool %20560 - OpBranch %13286 - %13286 = OpLabel - %10924 = OpPhi %bool %15711 %23915 %20788 %12129 - OpSelectionMerge %23648 None - OpBranchConditional %10924 %12148 %9186 - %12148 = OpLabel - %18037 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %18037 %7883 - %19905 = OpAccessChain %_ptr_Input_float %5305 %int_0 %int_1 - %7391 = OpLoad %float %19905 - %19981 = OpAccessChain %_ptr_Output_float %4930 %int_1 - OpStore %19981 %7391 - %19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0 - %10874 = OpLoad %_arr_v4float_uint_16 %19848 - OpStore %5430 %10874 - OpEmitVertex - %22812 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0 - %11398 = OpLoad %v4float %22812 - OpStore %18037 %11398 - %16622 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_1 - %7967 = OpLoad %float %16622 - OpStore %19981 %7967 - %16623 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_1 - %10875 = OpLoad %_arr_v4float_uint_16 %16623 - OpStore %5430 %10875 - OpEmitVertex - %22813 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0 - %11399 = OpLoad %v4float %22813 - OpStore %18037 %11399 - %16624 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_1 - %7968 = OpLoad %float %16624 - OpStore %19981 %7968 - %16625 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_2 - %10876 = OpLoad %_arr_v4float_uint_16 %16625 - OpStore %5430 %10876 - OpEmitVertex - OpEndPrimitive - OpStore %18037 %11399 - OpStore %19981 %7968 - OpStore %5430 %10876 - OpEmitVertex - OpStore %18037 %11398 - OpStore %19981 %7967 - OpStore %5430 %10875 - OpEmitVertex - %8851 = OpFNegate %v2float %6765 - %13757 = OpVectorShuffle %v2float %11398 %11398 0 1 - %21457 = OpFAdd %v2float %8851 %13757 - %7434 = OpVectorShuffle %v2float %11399 %11399 0 1 - %21812 = OpFAdd %v2float %21457 %7434 - %18423 = OpCompositeExtract %float %21812 0 - %14087 = OpCompositeExtract %float %21812 1 - %7641 = OpCompositeExtract %float %11399 2 - %7472 = OpCompositeExtract %float %11399 3 - %18779 = OpCompositeConstruct %v4float %18423 %14087 %7641 %7472 - OpStore %18037 %18779 - OpStore %19981 %7968 - OpBranch %17364 - %17364 = OpLabel - %22958 = OpPhi %int %int_0 %12148 %21301 %14551 - %24788 = OpSLessThan %bool %22958 %int_16 - OpLoopMerge %11792 %14551 None - OpBranchConditional %24788 %14551 %11792 - %14551 = OpLabel - %19388 = OpAccessChain %_ptr_Input_v4float %3302 %int_0 %22958 - %24048 = OpLoad %v4float %19388 - %19880 = OpFNegate %v4float %24048 - %6667 = OpAccessChain %_ptr_Input_v4float %3302 %int_1 %22958 - %6828 = OpLoad %v4float %6667 - %22565 = OpFAdd %v4float %19880 %6828 - %18783 = OpAccessChain %_ptr_Input_v4float %3302 %int_2 %22958 - %21055 = OpLoad %v4float %18783 - %22584 = OpFAdd %v4float %22565 %21055 - %18591 = OpAccessChain %_ptr_Output_v4float %5430 %22958 - OpStore %18591 %22584 - %21301 = OpIAdd %int %22958 %int_1 - OpBranch %17364 - %11792 = OpLabel - OpEmitVertex - OpEndPrimitive - OpBranch %23648 - %9186 = OpLabel - %20459 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %20459 %7883 - %19906 = OpAccessChain %_ptr_Input_float %5305 %int_0 %int_1 - %7392 = OpLoad %float %19906 - %19982 = OpAccessChain %_ptr_Output_float %4930 %int_1 - OpStore %19982 %7392 - %19849 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0 - %10877 = OpLoad %_arr_v4float_uint_16 %19849 - OpStore %5430 %10877 - OpEmitVertex - %22814 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0 - %11400 = OpLoad %v4float %22814 - OpStore %20459 %11400 - %16626 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_1 - %7969 = OpLoad %float %16626 - OpStore %19982 %7969 - %16627 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_1 - %10878 = OpLoad %_arr_v4float_uint_16 %16627 - OpStore %5430 %10878 - OpEmitVertex - %22815 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0 - %11401 = OpLoad %v4float %22815 - OpStore %20459 %11401 - %16628 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_1 - %7970 = OpLoad %float %16628 - OpStore %19982 %7970 - %16629 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_2 - %10879 = OpLoad %_arr_v4float_uint_16 %16629 - OpStore %5430 %10879 - OpEmitVertex - OpEndPrimitive - OpStore %20459 %7883 - OpStore %19982 %7392 - OpStore %5430 %10877 - OpEmitVertex - OpStore %20459 %11401 - OpStore %19982 %7970 - OpStore %5430 %10879 - OpEmitVertex - %12391 = OpVectorShuffle %v2float %11400 %11400 0 1 - %21222 = OpFNegate %v2float %12391 - %8335 = OpFAdd %v2float %6765 %21222 - %13861 = OpVectorShuffle %v2float %11401 %11401 0 1 - %21813 = OpFAdd %v2float %8335 %13861 - %18424 = OpCompositeExtract %float %21813 0 - %14088 = OpCompositeExtract %float %21813 1 - %7642 = OpCompositeExtract %float %11401 2 - %7473 = OpCompositeExtract %float %11401 3 - %18780 = OpCompositeConstruct %v4float %18424 %14088 %7642 %7473 - OpStore %20459 %18780 - OpStore %19982 %7970 - OpBranch %17365 - %17365 = OpLabel - %22959 = OpPhi %int %int_0 %9186 %21302 %14552 - %24789 = OpSLessThan %bool %22959 %int_16 - OpLoopMerge %11793 %14552 None - OpBranchConditional %24789 %14552 %11793 - %14552 = OpLabel - %18211 = OpAccessChain %_ptr_Input_v4float %3302 %int_0 %22959 - %15629 = OpLoad %v4float %18211 - %21332 = OpAccessChain %_ptr_Input_v4float %3302 %int_1 %22959 - %12974 = OpLoad %v4float %21332 - %8884 = OpFNegate %v4float %12974 - %7862 = OpFAdd %v4float %15629 %8884 - %14199 = OpAccessChain %_ptr_Input_v4float %3302 %int_2 %22959 - %21056 = OpLoad %v4float %14199 - %22585 = OpFAdd %v4float %7862 %21056 - %18592 = OpAccessChain %_ptr_Output_v4float %5430 %22959 - OpStore %18592 %22585 - %21302 = OpIAdd %int %22959 %int_1 - OpBranch %17365 - %11793 = OpLabel - OpEmitVertex - OpEndPrimitive - OpBranch %23648 - %23648 = OpLabel - OpReturn - OpFunctionEnd -#endif - -const uint32_t rect_list_gs[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x000060D6, 0x00000000, 0x00020011, - 0x00000002, 0x00020011, 0x00000018, 0x0006000B, 0x00000001, 0x4C534C47, - 0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001, - 0x000C000F, 0x00000003, 0x0000161F, 0x6E69616D, 0x00000000, 0x000014B9, - 0x00001342, 0x00001536, 0x00000CE6, 0x00000FCC, 0x00001230, 0x00000E98, - 0x00030010, 0x0000161F, 0x00000016, 0x00040010, 0x0000161F, 0x00000000, - 0x00000001, 0x00030010, 0x0000161F, 0x0000001D, 0x00040010, 0x0000161F, - 0x0000001A, 0x00000006, 0x00050048, 0x00000408, 0x00000000, 0x0000000B, - 0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001, - 0x00030047, 0x00000408, 0x00000002, 0x00050048, 0x00000409, 0x00000000, - 0x0000000B, 0x00000000, 0x00050048, 0x00000409, 0x00000001, 0x0000000B, - 0x00000001, 0x00030047, 0x00000409, 0x00000002, 0x00040047, 0x00001536, - 0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, 0x0000001E, 0x00000000, - 0x00040047, 0x00000FCC, 0x0000001E, 0x00000010, 0x00040047, 0x00001230, - 0x0000001E, 0x00000011, 0x00040047, 0x00000E98, 0x0000001E, 0x00000010, - 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00030016, - 0x0000000D, 0x00000020, 0x00040017, 0x00000013, 0x0000000D, 0x00000002, - 0x00020014, 0x00000009, 0x00040017, 0x0000000F, 0x00000009, 0x00000002, - 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0004001E, 0x00000408, - 0x0000001D, 0x0000000D, 0x00040015, 0x0000000B, 0x00000020, 0x00000000, - 0x0004002B, 0x0000000B, 0x00000A13, 0x00000003, 0x0004001C, 0x0000085F, - 0x00000408, 0x00000A13, 0x00040020, 0x00000ADC, 0x00000001, 0x0000085F, - 0x0004003B, 0x00000ADC, 0x000014B9, 0x00000001, 0x00040015, 0x0000000C, - 0x00000020, 0x00000001, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, - 0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, 0x0004002B, 0x0000000B, - 0x00000A0A, 0x00000000, 0x00040020, 0x0000028A, 0x00000001, 0x0000000D, - 0x0004002B, 0x0000000C, 0x00000A0E, 0x00000001, 0x0004002B, 0x0000000B, - 0x00000A0D, 0x00000001, 0x0004002B, 0x0000000D, 0x00000030, 0x3A83126F, - 0x00040020, 0x0000029A, 0x00000001, 0x0000001D, 0x0004001E, 0x00000409, - 0x0000001D, 0x0000000D, 0x00040020, 0x00000685, 0x00000003, 0x00000409, - 0x0004003B, 0x00000685, 0x00001342, 0x00000003, 0x00040020, 0x0000029B, - 0x00000003, 0x0000001D, 0x00040020, 0x0000028B, 0x00000003, 0x0000000D, - 0x0004002B, 0x0000000B, 0x00000A3A, 0x00000010, 0x0004001C, 0x000008F6, - 0x0000001D, 0x00000A3A, 0x00040020, 0x00000B73, 0x00000003, 0x000008F6, - 0x0004003B, 0x00000B73, 0x00001536, 0x00000003, 0x0004001C, 0x0000084A, - 0x000008F6, 0x00000A13, 0x00040020, 0x00000AC7, 0x00000001, 0x0000084A, - 0x0004003B, 0x00000AC7, 0x00000CE6, 0x00000001, 0x00040020, 0x00000B74, - 0x00000001, 0x000008F6, 0x0004002B, 0x0000000C, 0x00000A3B, 0x00000010, - 0x0004001C, 0x00000352, 0x00000013, 0x00000A13, 0x00040020, 0x000005CF, - 0x00000001, 0x00000352, 0x0004003B, 0x000005CF, 0x00000FCC, 0x00000001, - 0x0004001C, 0x00000298, 0x0000000D, 0x00000A13, 0x00040020, 0x00000515, - 0x00000001, 0x00000298, 0x0004003B, 0x00000515, 0x00001230, 0x00000001, - 0x00040020, 0x00000290, 0x00000003, 0x00000013, 0x0004003B, 0x00000290, - 0x00000E98, 0x00000003, 0x0005002C, 0x00000013, 0x000006DF, 0x00000030, - 0x00000030, 0x00050036, 0x00000008, 0x0000161F, 0x00000000, 0x00000502, - 0x000200F8, 0x00005D6B, 0x00070041, 0x0000028A, 0x00001BD9, 0x000014B9, - 0x00000A11, 0x00000A0B, 0x00000A0A, 0x0004003D, 0x0000000D, 0x00003D0B, - 0x00001BD9, 0x00070041, 0x0000028A, 0x00004FD7, 0x000014B9, 0x00000A0E, - 0x00000A0B, 0x00000A0D, 0x0004003D, 0x0000000D, 0x00004DB1, 0x00004FD7, - 0x00050050, 0x00000013, 0x00002AA5, 0x00003D0B, 0x00004DB1, 0x00060041, - 0x0000029A, 0x000060C9, 0x000014B9, 0x00000A0B, 0x00000A0B, 0x0004003D, - 0x0000001D, 0x00001ECB, 0x000060C9, 0x0007004F, 0x00000013, 0x00001A6D, - 0x00001ECB, 0x00001ECB, 0x00000000, 0x00000001, 0x00050083, 0x00000013, - 0x00003D7B, 0x00001A6D, 0x00002AA5, 0x0006000C, 0x00000013, 0x00001E4D, - 0x00000001, 0x00000004, 0x00003D7B, 0x000500BC, 0x0000000F, 0x00004A4D, - 0x00001E4D, 0x000006DF, 0x0004009B, 0x00000009, 0x00003D5F, 0x00004A4D, - 0x000400A8, 0x00000009, 0x00002C8A, 0x00003D5F, 0x000300F7, 0x000033E6, - 0x00000000, 0x000400FA, 0x00002C8A, 0x00002F61, 0x000033E6, 0x000200F8, - 0x00002F61, 0x00070041, 0x0000028A, 0x00004722, 0x000014B9, 0x00000A0E, - 0x00000A0B, 0x00000A0A, 0x0004003D, 0x0000000D, 0x00003D0C, 0x00004722, - 0x00070041, 0x0000028A, 0x00004FD8, 0x000014B9, 0x00000A11, 0x00000A0B, - 0x00000A0D, 0x0004003D, 0x0000000D, 0x00005297, 0x00004FD8, 0x00050050, - 0x00000013, 0x000044EB, 0x00003D0C, 0x00005297, 0x00050083, 0x00000013, - 0x00003C82, 0x00001A6D, 0x000044EB, 0x0006000C, 0x00000013, 0x00005F56, - 0x00000001, 0x00000004, 0x00003C82, 0x000500BC, 0x0000000F, 0x00005050, - 0x00005F56, 0x000006DF, 0x0004009B, 0x00000009, 0x00005134, 0x00005050, - 0x000200F9, 0x000033E6, 0x000200F8, 0x000033E6, 0x000700F5, 0x00000009, - 0x00002AAC, 0x00003D5F, 0x00005D6B, 0x00005134, 0x00002F61, 0x000300F7, - 0x00005C60, 0x00000000, 0x000400FA, 0x00002AAC, 0x00002F74, 0x000023E2, - 0x000200F8, 0x00002F74, 0x00050041, 0x0000029B, 0x00004675, 0x00001342, - 0x00000A0B, 0x0003003E, 0x00004675, 0x00001ECB, 0x00060041, 0x0000028A, - 0x00004DC1, 0x000014B9, 0x00000A0B, 0x00000A0E, 0x0004003D, 0x0000000D, - 0x00001CDF, 0x00004DC1, 0x00050041, 0x0000028B, 0x00004E0D, 0x00001342, - 0x00000A0E, 0x0003003E, 0x00004E0D, 0x00001CDF, 0x00050041, 0x00000B74, - 0x00004D88, 0x00000CE6, 0x00000A0B, 0x0004003D, 0x000008F6, 0x00002A7A, - 0x00004D88, 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x00060041, - 0x0000029A, 0x0000591C, 0x000014B9, 0x00000A0E, 0x00000A0B, 0x0004003D, - 0x0000001D, 0x00002C86, 0x0000591C, 0x0003003E, 0x00004675, 0x00002C86, - 0x00060041, 0x0000028A, 0x000040EE, 0x000014B9, 0x00000A0E, 0x00000A0E, - 0x0004003D, 0x0000000D, 0x00001F1F, 0x000040EE, 0x0003003E, 0x00004E0D, - 0x00001F1F, 0x00050041, 0x00000B74, 0x000040EF, 0x00000CE6, 0x00000A0E, - 0x0004003D, 0x000008F6, 0x00002A7B, 0x000040EF, 0x0003003E, 0x00001536, - 0x00002A7B, 0x000100DA, 0x00060041, 0x0000029A, 0x0000591D, 0x000014B9, - 0x00000A11, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00002C87, 0x0000591D, - 0x0003003E, 0x00004675, 0x00002C87, 0x00060041, 0x0000028A, 0x000040F0, - 0x000014B9, 0x00000A11, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001F20, - 0x000040F0, 0x0003003E, 0x00004E0D, 0x00001F20, 0x00050041, 0x00000B74, - 0x000040F1, 0x00000CE6, 0x00000A11, 0x0004003D, 0x000008F6, 0x00002A7C, - 0x000040F1, 0x0003003E, 0x00001536, 0x00002A7C, 0x000100DA, 0x000100DB, - 0x0003003E, 0x00004675, 0x00002C87, 0x0003003E, 0x00004E0D, 0x00001F20, - 0x0003003E, 0x00001536, 0x00002A7C, 0x000100DA, 0x0003003E, 0x00004675, - 0x00002C86, 0x0003003E, 0x00004E0D, 0x00001F1F, 0x0003003E, 0x00001536, - 0x00002A7B, 0x000100DA, 0x0004007F, 0x00000013, 0x00002293, 0x00001A6D, - 0x0007004F, 0x00000013, 0x000035BD, 0x00002C86, 0x00002C86, 0x00000000, - 0x00000001, 0x00050081, 0x00000013, 0x000053D1, 0x00002293, 0x000035BD, - 0x0007004F, 0x00000013, 0x00001D0A, 0x00002C87, 0x00002C87, 0x00000000, - 0x00000001, 0x00050081, 0x00000013, 0x00005534, 0x000053D1, 0x00001D0A, - 0x00050051, 0x0000000D, 0x000047F7, 0x00005534, 0x00000000, 0x00050051, - 0x0000000D, 0x00003707, 0x00005534, 0x00000001, 0x00050051, 0x0000000D, - 0x00001DD9, 0x00002C87, 0x00000002, 0x00050051, 0x0000000D, 0x00001D30, - 0x00002C87, 0x00000003, 0x00070050, 0x0000001D, 0x0000495B, 0x000047F7, - 0x00003707, 0x00001DD9, 0x00001D30, 0x0003003E, 0x00004675, 0x0000495B, - 0x0003003E, 0x00004E0D, 0x00001F20, 0x000200F9, 0x000043D4, 0x000200F8, - 0x000043D4, 0x000700F5, 0x0000000C, 0x000059AE, 0x00000A0B, 0x00002F74, - 0x00005335, 0x000038D7, 0x000500B1, 0x00000009, 0x000060D4, 0x000059AE, - 0x00000A3B, 0x000400F6, 0x00002E10, 0x000038D7, 0x00000000, 0x000400FA, - 0x000060D4, 0x000038D7, 0x00002E10, 0x000200F8, 0x000038D7, 0x00060041, - 0x0000029A, 0x00004BBC, 0x00000CE6, 0x00000A0B, 0x000059AE, 0x0004003D, - 0x0000001D, 0x00005DF0, 0x00004BBC, 0x0004007F, 0x0000001D, 0x00004DA8, - 0x00005DF0, 0x00060041, 0x0000029A, 0x00001A0B, 0x00000CE6, 0x00000A0E, - 0x000059AE, 0x0004003D, 0x0000001D, 0x00001AAC, 0x00001A0B, 0x00050081, - 0x0000001D, 0x00005825, 0x00004DA8, 0x00001AAC, 0x00060041, 0x0000029A, - 0x0000495F, 0x00000CE6, 0x00000A11, 0x000059AE, 0x0004003D, 0x0000001D, - 0x0000523F, 0x0000495F, 0x00050081, 0x0000001D, 0x00005838, 0x00005825, - 0x0000523F, 0x00050041, 0x0000029B, 0x0000489F, 0x00001536, 0x000059AE, - 0x0003003E, 0x0000489F, 0x00005838, 0x00050080, 0x0000000C, 0x00005335, - 0x000059AE, 0x00000A0E, 0x000200F9, 0x000043D4, 0x000200F8, 0x00002E10, - 0x000100DA, 0x000100DB, 0x000200F9, 0x00005C60, 0x000200F8, 0x000023E2, - 0x00050041, 0x0000029B, 0x00004FEB, 0x00001342, 0x00000A0B, 0x0003003E, - 0x00004FEB, 0x00001ECB, 0x00060041, 0x0000028A, 0x00004DC2, 0x000014B9, - 0x00000A0B, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001CE0, 0x00004DC2, - 0x00050041, 0x0000028B, 0x00004E0E, 0x00001342, 0x00000A0E, 0x0003003E, - 0x00004E0E, 0x00001CE0, 0x00050041, 0x00000B74, 0x00004D89, 0x00000CE6, - 0x00000A0B, 0x0004003D, 0x000008F6, 0x00002A7D, 0x00004D89, 0x0003003E, - 0x00001536, 0x00002A7D, 0x000100DA, 0x00060041, 0x0000029A, 0x0000591E, - 0x000014B9, 0x00000A0E, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00002C88, - 0x0000591E, 0x0003003E, 0x00004FEB, 0x00002C88, 0x00060041, 0x0000028A, - 0x000040F2, 0x000014B9, 0x00000A0E, 0x00000A0E, 0x0004003D, 0x0000000D, - 0x00001F21, 0x000040F2, 0x0003003E, 0x00004E0E, 0x00001F21, 0x00050041, - 0x00000B74, 0x000040F3, 0x00000CE6, 0x00000A0E, 0x0004003D, 0x000008F6, - 0x00002A7E, 0x000040F3, 0x0003003E, 0x00001536, 0x00002A7E, 0x000100DA, - 0x00060041, 0x0000029A, 0x0000591F, 0x000014B9, 0x00000A11, 0x00000A0B, - 0x0004003D, 0x0000001D, 0x00002C89, 0x0000591F, 0x0003003E, 0x00004FEB, - 0x00002C89, 0x00060041, 0x0000028A, 0x000040F4, 0x000014B9, 0x00000A11, - 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001F22, 0x000040F4, 0x0003003E, - 0x00004E0E, 0x00001F22, 0x00050041, 0x00000B74, 0x000040F5, 0x00000CE6, - 0x00000A11, 0x0004003D, 0x000008F6, 0x00002A7F, 0x000040F5, 0x0003003E, - 0x00001536, 0x00002A7F, 0x000100DA, 0x000100DB, 0x0003003E, 0x00004FEB, - 0x00001ECB, 0x0003003E, 0x00004E0E, 0x00001CE0, 0x0003003E, 0x00001536, - 0x00002A7D, 0x000100DA, 0x0003003E, 0x00004FEB, 0x00002C89, 0x0003003E, - 0x00004E0E, 0x00001F22, 0x0003003E, 0x00001536, 0x00002A7F, 0x000100DA, - 0x0007004F, 0x00000013, 0x00003067, 0x00002C88, 0x00002C88, 0x00000000, - 0x00000001, 0x0004007F, 0x00000013, 0x000052E6, 0x00003067, 0x00050081, - 0x00000013, 0x0000208F, 0x00001A6D, 0x000052E6, 0x0007004F, 0x00000013, - 0x00003625, 0x00002C89, 0x00002C89, 0x00000000, 0x00000001, 0x00050081, - 0x00000013, 0x00005535, 0x0000208F, 0x00003625, 0x00050051, 0x0000000D, - 0x000047F8, 0x00005535, 0x00000000, 0x00050051, 0x0000000D, 0x00003708, - 0x00005535, 0x00000001, 0x00050051, 0x0000000D, 0x00001DDA, 0x00002C89, - 0x00000002, 0x00050051, 0x0000000D, 0x00001D31, 0x00002C89, 0x00000003, - 0x00070050, 0x0000001D, 0x0000495C, 0x000047F8, 0x00003708, 0x00001DDA, - 0x00001D31, 0x0003003E, 0x00004FEB, 0x0000495C, 0x0003003E, 0x00004E0E, - 0x00001F22, 0x000200F9, 0x000043D5, 0x000200F8, 0x000043D5, 0x000700F5, - 0x0000000C, 0x000059AF, 0x00000A0B, 0x000023E2, 0x00005336, 0x000038D8, - 0x000500B1, 0x00000009, 0x000060D5, 0x000059AF, 0x00000A3B, 0x000400F6, - 0x00002E11, 0x000038D8, 0x00000000, 0x000400FA, 0x000060D5, 0x000038D8, - 0x00002E11, 0x000200F8, 0x000038D8, 0x00060041, 0x0000029A, 0x00004723, - 0x00000CE6, 0x00000A0B, 0x000059AF, 0x0004003D, 0x0000001D, 0x00003D0D, - 0x00004723, 0x00060041, 0x0000029A, 0x00005354, 0x00000CE6, 0x00000A0E, - 0x000059AF, 0x0004003D, 0x0000001D, 0x000032AE, 0x00005354, 0x0004007F, - 0x0000001D, 0x000022B4, 0x000032AE, 0x00050081, 0x0000001D, 0x00001EB6, - 0x00003D0D, 0x000022B4, 0x00060041, 0x0000029A, 0x00003777, 0x00000CE6, - 0x00000A11, 0x000059AF, 0x0004003D, 0x0000001D, 0x00005240, 0x00003777, - 0x00050081, 0x0000001D, 0x00005839, 0x00001EB6, 0x00005240, 0x00050041, - 0x0000029B, 0x000048A0, 0x00001536, 0x000059AF, 0x0003003E, 0x000048A0, - 0x00005839, 0x00050080, 0x0000000C, 0x00005336, 0x000059AF, 0x00000A0E, - 0x000200F9, 0x000043D5, 0x000200F8, 0x00002E11, 0x000100DA, 0x000100DB, - 0x000200F9, 0x00005C60, 0x000200F8, 0x00005C60, 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/gpu/vulkan/shaders/dummy.ps.glsl b/src/xenia/gpu/vulkan/shaders/dummy.ps.glsl deleted file mode 100644 index 325576f0f..000000000 --- a/src/xenia/gpu/vulkan/shaders/dummy.ps.glsl +++ /dev/null @@ -1,35 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_shading_language_420pack : require -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -layout(set = 0, binding = 1) uniform consts_type { - vec4 float_consts[512]; - uint loop_consts[32]; - uint bool_consts[8]; -} consts; - -layout(push_constant) uniform push_consts_type { - vec4 window_scale; - vec4 vtx_fmt; - vec4 point_size; - vec4 alpha_test; - uint ps_param_gen; -} push_constants; - -layout(set = 1, binding = 0) uniform sampler1D textures1D[32]; -layout(set = 1, binding = 1) uniform sampler2D textures2D[32]; -layout(set = 1, binding = 2) uniform sampler3D textures3D[32]; -layout(set = 1, binding = 3) uniform samplerCube textures4D[32]; - -layout(location = 0) in vec4 in_interpolators[16]; -layout(location = 0) out vec4 oC[4]; - -void main() { - // This shader does absolutely nothing! - return; -} diff --git a/src/xenia/gpu/vulkan/shaders/line_quad_list.gs.glsl b/src/xenia/gpu/vulkan/shaders/line_quad_list.gs.glsl deleted file mode 100644 index 7f8863853..000000000 --- a/src/xenia/gpu/vulkan/shaders/line_quad_list.gs.glsl +++ /dev/null @@ -1,53 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -in gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -} gl_in[]; - -out gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -}; - -layout(location = 0) in vec4 in_interpolators[][16]; -layout(location = 0) out vec4 out_interpolators[16]; - -layout(location = 16) in vec2 _in_point_coord_unused[]; -layout(location = 17) in float _in_point_size_unused[]; - -layout(location = 16) out vec2 _out_point_coord_unused; - -layout(lines_adjacency) in; -layout(line_strip, max_vertices = 5) out; -void main() { - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - gl_Position = gl_in[1].gl_Position; - gl_PointSize = gl_in[1].gl_PointSize; - out_interpolators = in_interpolators[1]; - EmitVertex(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - gl_Position = gl_in[3].gl_Position; - gl_PointSize = gl_in[3].gl_PointSize; - out_interpolators = in_interpolators[3]; - EmitVertex(); - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - EndPrimitive(); -} diff --git a/src/xenia/gpu/vulkan/shaders/point_list.gs.glsl b/src/xenia/gpu/vulkan/shaders/point_list.gs.glsl deleted file mode 100644 index 52b29581e..000000000 --- a/src/xenia/gpu/vulkan/shaders/point_list.gs.glsl +++ /dev/null @@ -1,63 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_shading_language_420pack : require -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -layout(push_constant) uniform push_consts_type { - vec4 window_scale; - vec4 vtx_fmt; - vec4 point_size; - vec4 alpha_test; - uint ps_param_gen; -} push_constants; - -in gl_PerVertex { - vec4 gl_Position; - // float gl_ClipDistance[]; -} gl_in[]; - -out gl_PerVertex { - vec4 gl_Position; - // float gl_ClipDistance[]; -}; - -layout(location = 0) in vec4 in_interpolators[][16]; -layout(location = 16) in vec2 in_point_coord_unused[]; -layout(location = 17) in float point_size[]; - -layout(location = 0) out vec4 out_interpolators[16]; -layout(location = 16) out vec2 point_coord; - -// TODO(benvanik): clamp to min/max. -// TODO(benvanik): figure out how to see which interpolator gets adjusted. - -layout(points) in; -layout(triangle_strip, max_vertices = 4) out; - -void main() { - const vec2 offsets[4] = { - vec2(-1.0, 1.0), - vec2( 1.0, 1.0), - vec2(-1.0, -1.0), - vec2( 1.0, -1.0), - }; - vec4 pos = gl_in[0].gl_Position; - vec2 window_scaled_psize = push_constants.point_size.xy; - // Shader header writes -1.0f to pointSize by default, so any positive value - // means that it was overwritten by the translated vertex shader. - if (point_size[0] > 0.0f) { - window_scaled_psize = vec2(point_size[0]); - } - window_scaled_psize /= push_constants.window_scale.zw; - for (int i = 0; i < 4; ++i) { - gl_Position = vec4(pos.xy + (offsets[i] * window_scaled_psize), pos.zw); - out_interpolators = in_interpolators[0]; - point_coord = max(offsets[i], vec2(0.0f)); - EmitVertex(); - } - EndPrimitive(); -} diff --git a/src/xenia/gpu/vulkan/shaders/quad_list.gs.glsl b/src/xenia/gpu/vulkan/shaders/quad_list.gs.glsl deleted file mode 100644 index b340b55da..000000000 --- a/src/xenia/gpu/vulkan/shaders/quad_list.gs.glsl +++ /dev/null @@ -1,42 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_shading_language_420pack : require -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -in gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -} gl_in[]; - -out gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -}; - -layout(location = 0) in vec4 in_interpolators[][16]; -layout(location = 0) out vec4 out_interpolators[16]; - -layout(location = 16) in vec2 _in_point_coord_unused[]; -layout(location = 17) in float _in_point_size_unused[]; - -layout(location = 16) out vec2 _out_point_coord_unused; - -layout(lines_adjacency) in; -layout(triangle_strip, max_vertices = 4) out; -void main() { - const int order[4] = { 0, 1, 3, 2 }; - for (int i = 0; i < 4; ++i) { - int input_index = order[i]; - gl_Position = gl_in[input_index].gl_Position; - gl_PointSize = gl_in[input_index].gl_PointSize; - out_interpolators = in_interpolators[input_index]; - EmitVertex(); - } - EndPrimitive(); -} diff --git a/src/xenia/gpu/vulkan/shaders/rect_list.gs.glsl b/src/xenia/gpu/vulkan/shaders/rect_list.gs.glsl deleted file mode 100644 index 515e1b576..000000000 --- a/src/xenia/gpu/vulkan/shaders/rect_list.gs.glsl +++ /dev/null @@ -1,124 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -in gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -} gl_in[]; - -out gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -}; - -layout(location = 0) in vec4 in_interpolators[][16]; -layout(location = 0) out vec4 out_interpolators[16]; - -layout(location = 16) in vec2 _in_point_coord_unused[]; -layout(location = 17) in float _in_point_size_unused[]; - -layout(location = 16) out vec2 _out_point_coord_unused; - -layout(triangles) in; -layout(triangle_strip, max_vertices = 6) out; - -bool equalsEpsilon(vec2 left, vec2 right, float epsilon) { - return all(lessThanEqual(abs(left - right), vec2(epsilon))); -} - -void main() { - // Most games use a left-aligned form. - if (equalsEpsilon(gl_in[0].gl_Position.xy, vec2(gl_in[2].gl_Position.x, gl_in[1].gl_Position.y), 0.001) || - equalsEpsilon(gl_in[0].gl_Position.xy, vec2(gl_in[1].gl_Position.x, gl_in[2].gl_Position.y), 0.001)) { - // 0 ------ 1 0: -1,-1 - // | - | 1: 1,-1 - // | // | 2: -1, 1 - // | - | 3: [ 1, 1 ] - // 2 ----- [3] - // - // 0 ------ 2 0: -1,-1 - // | - | 1: -1, 1 - // | // | 2: 1,-1 - // | - | 3: [ 1, 1 ] - // 1 ------[3] - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - gl_Position = gl_in[1].gl_Position; - gl_PointSize = gl_in[1].gl_PointSize; - out_interpolators = in_interpolators[1]; - EmitVertex(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - EndPrimitive(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - gl_Position = gl_in[1].gl_Position; - gl_PointSize = gl_in[1].gl_PointSize; - out_interpolators = in_interpolators[1]; - EmitVertex(); - gl_Position = vec4((-gl_in[0].gl_Position.xy) + - gl_in[1].gl_Position.xy + - gl_in[2].gl_Position.xy, - gl_in[2].gl_Position.zw); - gl_PointSize = gl_in[2].gl_PointSize; - for (int i = 0; i < 16; ++i) { - out_interpolators[i] = (-in_interpolators[0][i]) + - in_interpolators[1][i] + - in_interpolators[2][i]; - } - EmitVertex(); - EndPrimitive(); - } else { - // 0 ------ 1 0: -1,-1 - // | - | 1: 1,-1 - // | \\ | 2: 1, 1 - // | - | 3: [-1, 1 ] - // [3] ----- 2 - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - gl_Position = gl_in[1].gl_Position; - gl_PointSize = gl_in[1].gl_PointSize; - out_interpolators = in_interpolators[1]; - EmitVertex(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - EndPrimitive(); - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - gl_Position = vec4( gl_in[0].gl_Position.xy + - (-gl_in[1].gl_Position.xy) + - gl_in[2].gl_Position.xy, - gl_in[2].gl_Position.zw); - gl_PointSize = gl_in[2].gl_PointSize; - for (int i = 0; i < 16; ++i) { - out_interpolators[i] = in_interpolators[0][i] + - (-in_interpolators[1][i]) + - in_interpolators[2][i]; - } - EmitVertex(); - EndPrimitive(); - } -} diff --git a/src/xenia/gpu/vulkan/texture_config.cc b/src/xenia/gpu/vulkan/texture_config.cc deleted file mode 100644 index 60098322c..000000000 --- a/src/xenia/gpu/vulkan/texture_config.cc +++ /dev/null @@ -1,146 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/texture_config.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -#define COMP_SWIZ(r, g, b, a) \ - { \ - VK_COMPONENT_SWIZZLE_##r, VK_COMPONENT_SWIZZLE_##g, \ - VK_COMPONENT_SWIZZLE_##b, VK_COMPONENT_SWIZZLE_##a \ - } -#define VEC_SWIZ(x, y, z, w) \ - { \ - VECTOR_SWIZZLE_##x, VECTOR_SWIZZLE_##y, VECTOR_SWIZZLE_##z, \ - VECTOR_SWIZZLE_##w \ - } - -#define RGBA COMP_SWIZ(R, G, B, A) -#define ___R COMP_SWIZ(IDENTITY, IDENTITY, IDENTITY, R) -#define RRRR COMP_SWIZ(R, R, R, R) - -#define XYZW VEC_SWIZ(X, Y, Z, W) -#define YXWZ VEC_SWIZ(Y, X, W, Z) -#define ZYXW VEC_SWIZ(Z, Y, X, W) - -#define ___(format) \ - { VK_FORMAT_##format } -#define _c_(format, component_swizzle) \ - { VK_FORMAT_##format, component_swizzle, XYZW } -#define __v(format, vector_swizzle) \ - { VK_FORMAT_##format, RGBA, vector_swizzle } -#define _cv(format, component_swizzle, vector_swizzle) \ - { VK_FORMAT_##format, component_swizzle, vector_swizzle } - -// https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkFormat.html -const TextureConfig texture_configs[64] = { - /* k_1_REVERSE */ ___(UNDEFINED), - /* k_1 */ ___(UNDEFINED), - /* k_8 */ ___(R8_UNORM), - /* k_1_5_5_5 */ __v(A1R5G5B5_UNORM_PACK16, ZYXW), - /* k_5_6_5 */ __v(R5G6B5_UNORM_PACK16, ZYXW), - /* k_6_5_5 */ ___(UNDEFINED), - /* k_8_8_8_8 */ ___(R8G8B8A8_UNORM), - /* k_2_10_10_10 */ ___(A2R10G10B10_UNORM_PACK32), - /* k_8_A */ ___(R8_UNORM), - /* k_8_B */ ___(UNDEFINED), - /* k_8_8 */ ___(R8G8_UNORM), - /* k_Cr_Y1_Cb_Y0_REP */ ___(UNDEFINED), - /* k_Y1_Cr_Y0_Cb_REP */ ___(UNDEFINED), - /* k_16_16_EDRAM */ ___(UNDEFINED), - /* k_8_8_8_8_A */ ___(UNDEFINED), - /* k_4_4_4_4 */ __v(R4G4B4A4_UNORM_PACK16, YXWZ), - // TODO: Verify if these two are correct (I think not). - /* k_10_11_11 */ ___(B10G11R11_UFLOAT_PACK32), - /* k_11_11_10 */ ___(B10G11R11_UFLOAT_PACK32), - - /* k_DXT1 */ ___(BC1_RGBA_UNORM_BLOCK), - /* k_DXT2_3 */ ___(BC2_UNORM_BLOCK), - /* k_DXT4_5 */ ___(BC3_UNORM_BLOCK), - /* k_16_16_16_16_EDRAM */ ___(UNDEFINED), - - // TODO: D24 unsupported on AMD. - /* k_24_8 */ ___(D24_UNORM_S8_UINT), - /* k_24_8_FLOAT */ ___(D32_SFLOAT_S8_UINT), - /* k_16 */ ___(R16_UNORM), - /* k_16_16 */ ___(R16G16_UNORM), - /* k_16_16_16_16 */ ___(R16G16B16A16_UNORM), - /* k_16_EXPAND */ ___(R16_SFLOAT), - /* k_16_16_EXPAND */ ___(R16G16_SFLOAT), - /* k_16_16_16_16_EXPAND */ ___(R16G16B16A16_SFLOAT), - /* k_16_FLOAT */ ___(R16_SFLOAT), - /* k_16_16_FLOAT */ ___(R16G16_SFLOAT), - /* k_16_16_16_16_FLOAT */ ___(R16G16B16A16_SFLOAT), - - // ! These are UNORM formats, not SINT. - /* k_32 */ ___(R32_SINT), - /* k_32_32 */ ___(R32G32_SINT), - /* k_32_32_32_32 */ ___(R32G32B32A32_SINT), - /* k_32_FLOAT */ ___(R32_SFLOAT), - /* k_32_32_FLOAT */ ___(R32G32_SFLOAT), - /* k_32_32_32_32_FLOAT */ ___(R32G32B32A32_SFLOAT), - /* k_32_AS_8 */ ___(UNDEFINED), - /* k_32_AS_8_8 */ ___(UNDEFINED), - /* k_16_MPEG */ ___(UNDEFINED), - /* k_16_16_MPEG */ ___(UNDEFINED), - /* k_8_INTERLACED */ ___(UNDEFINED), - /* k_32_AS_8_INTERLACED */ ___(UNDEFINED), - /* k_32_AS_8_8_INTERLACED */ ___(UNDEFINED), - /* k_16_INTERLACED */ ___(UNDEFINED), - /* k_16_MPEG_INTERLACED */ ___(UNDEFINED), - /* k_16_16_MPEG_INTERLACED */ ___(UNDEFINED), - - // https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf - /* k_DXN */ ___(BC5_UNORM_BLOCK), // ? - - /* k_8_8_8_8_AS_16_16_16_16 */ ___(R8G8B8A8_UNORM), - /* k_DXT1_AS_16_16_16_16 */ ___(BC1_RGBA_UNORM_BLOCK), - /* k_DXT2_3_AS_16_16_16_16 */ ___(BC2_UNORM_BLOCK), - /* k_DXT4_5_AS_16_16_16_16 */ ___(BC3_UNORM_BLOCK), - - /* k_2_10_10_10_AS_16_16_16_16 */ ___(A2R10G10B10_UNORM_PACK32), - - // TODO: Verify if these two are correct (I think not). - /* k_10_11_11_AS_16_16_16_16 */ ___(B10G11R11_UFLOAT_PACK32), // ? - /* k_11_11_10_AS_16_16_16_16 */ ___(B10G11R11_UFLOAT_PACK32), // ? - /* k_32_32_32_FLOAT */ ___(R32G32B32_SFLOAT), - /* k_DXT3A */ _c_(BC2_UNORM_BLOCK, ___R), - /* k_DXT5A */ _c_(BC4_UNORM_BLOCK, RRRR), // ATI1N - - // https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf - /* k_CTX1 */ ___(R8G8_UINT), - - /* k_DXT3A_AS_1_1_1_1 */ ___(UNDEFINED), - - /* k_8_8_8_8_GAMMA_EDRAM */ ___(UNDEFINED), - /* k_2_10_10_10_FLOAT_EDRAM */ ___(UNDEFINED), -}; - -#undef _cv -#undef __v -#undef _c_ -#undef ___ - -#undef ZYXW -#undef YXWZ -#undef XYZW - -#undef RRRR -#undef ___R -#undef RGBA - -#undef VEC_SWIZ -#undef COMP_SWIZ - -} // namespace vulkan -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/vulkan/texture_config.h b/src/xenia/gpu/vulkan/texture_config.h deleted file mode 100644 index dad6f1aec..000000000 --- a/src/xenia/gpu/vulkan/texture_config.h +++ /dev/null @@ -1,50 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_ -#define XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_ - -#include "xenia/gpu/texture_info.h" -#include "xenia/gpu/xenos.h" -#include "xenia/ui/vulkan/vulkan_provider.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -typedef enum VectorSwizzle { - VECTOR_SWIZZLE_X = 0, - VECTOR_SWIZZLE_Y = 1, - VECTOR_SWIZZLE_Z = 2, - VECTOR_SWIZZLE_W = 3, -} VectorSwizzle; - -struct TextureConfig { - VkFormat host_format; - struct { - VkComponentSwizzle r = VK_COMPONENT_SWIZZLE_R; - VkComponentSwizzle g = VK_COMPONENT_SWIZZLE_G; - VkComponentSwizzle b = VK_COMPONENT_SWIZZLE_B; - VkComponentSwizzle a = VK_COMPONENT_SWIZZLE_A; - } component_swizzle; - struct { - VectorSwizzle x = VECTOR_SWIZZLE_X; - VectorSwizzle y = VECTOR_SWIZZLE_Y; - VectorSwizzle z = VECTOR_SWIZZLE_Z; - VectorSwizzle w = VECTOR_SWIZZLE_W; - } vector_swizzle; -}; - -extern const TextureConfig texture_configs[64]; - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 2dcf09f0e..aa9f2e4ee 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,253 +10,1180 @@ #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include +#include +#include +#include +#include +#include +#include +#include "xenia/base/assert.h" +#include "xenia/base/byte_order.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" +#include "xenia/gpu/draw_util.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/registers.h" -#include "xenia/gpu/sampler_info.h" -#include "xenia/gpu/texture_info.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" -#include "xenia/gpu/vulkan/vulkan_graphics_system.h" +#include "xenia/gpu/shader.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan_presenter.h" +#include "xenia/ui/vulkan/vulkan_provider.h" #include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace gpu { namespace vulkan { -using namespace xe::literals; -using namespace xe::gpu::xenos; -using xe::ui::vulkan::util::CheckResult; - -constexpr size_t kDefaultBufferCacheCapacity = 256_MiB; +// Generated with `xb buildshaders`. +namespace shaders { +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_pwl_fxaa_luma_ps.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_pwl_ps.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_table_fxaa_luma_ps.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_table_ps.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/fullscreen_cw_vs.h" +} // namespace shaders +// No specific reason for 32768 descriptors, just the "too much" amount from +// Direct3D 12 PIX warnings. 2x descriptors for textures because of unsigned and +// signed bindings. VulkanCommandProcessor::VulkanCommandProcessor( VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) : CommandProcessor(graphics_system, kernel_state), - swap_submission_tracker_(GetVulkanProvider()) {} + deferred_command_buffer_(*this), + transient_descriptor_allocator_uniform_buffer_( + *static_cast( + graphics_system->provider()), + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768), + transient_descriptor_allocator_storage_buffer_( + *static_cast( + graphics_system->provider()), + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 32768, 32768), + transient_descriptor_allocator_sampled_image_( + *static_cast( + graphics_system->provider()), + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 2 * 32768, 32768), + transient_descriptor_allocator_sampler_( + *static_cast( + graphics_system->provider()), + VK_DESCRIPTOR_TYPE_SAMPLER, 32768, 32768) {} VulkanCommandProcessor::~VulkanCommandProcessor() = default; -void VulkanCommandProcessor::RequestFrameTrace( - const std::filesystem::path& root_path) { - // Override traces if renderdoc is attached. - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - if (provider.renderdoc_api().api_1_0_0()) { - trace_requested_ = true; - return; - } - - return CommandProcessor::RequestFrameTrace(root_path); -} - -void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, - uint32_t length) {} - -void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {} - void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); cache_clear_requested_ = true; } +void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, + uint32_t length) { + shared_memory_->MemoryInvalidationCallback(base_ptr, length, true); + primitive_processor_->MemoryInvalidationCallback(base_ptr, length, true); +} + +void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {} + bool VulkanCommandProcessor::SetupContext() { if (!CommandProcessor::SetupContext()) { - XELOGE("Unable to initialize base command processor context"); + XELOGE("Failed to initialize base command processor context"); return false; } - ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - VkResult status = VK_SUCCESS; + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - // Setup a blitter. - blitter_ = std::make_unique(provider); - status = blitter_->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize blitter"); - blitter_->Shutdown(); + // The unconditional inclusion of the vertex shader stage also covers the case + // of manual index / factor buffer fetch (the system constants and the shared + // memory are needed for that) in the tessellation vertex shader when + // fullDrawIndexUint32 is not supported. + guest_shader_pipeline_stages_ = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + guest_shader_vertex_stages_ = VK_SHADER_STAGE_VERTEX_BIT; + if (device_features.tessellationShader) { + guest_shader_pipeline_stages_ |= + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + guest_shader_vertex_stages_ |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + } + if (!device_features.vertexPipelineStoresAndAtomics) { + // For memory export from vertex shaders converted to compute shaders. + guest_shader_pipeline_stages_ |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + guest_shader_vertex_stages_ |= VK_SHADER_STAGE_COMPUTE_BIT; + } + + // 16384 is bigger than any single uniform buffer that Xenia needs, but is the + // minimum maxUniformBufferRange, thus the safe minimum amount. + VkDeviceSize uniform_buffer_alignment = std::max( + provider.device_properties().limits.minUniformBufferOffsetAlignment, + VkDeviceSize(1)); + uniform_buffer_pool_ = std::make_unique( + provider, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + xe::align(std::max(ui::GraphicsUploadBufferPool::kDefaultPageSize, + size_t(16384)), + size_t(uniform_buffer_alignment))); + + // Descriptor set layouts. + VkShaderStageFlags guest_shader_stages = + guest_shader_vertex_stages_ | VK_SHADER_STAGE_FRAGMENT_BIT; + // Empty. + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 0; + descriptor_set_layout_create_info.pBindings = nullptr; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_empty_) != VK_SUCCESS) { + XELOGE("Failed to create an empty Vulkan descriptor set layout"); + return false; + } + // Shared memory and EDRAM. + uint32_t shared_memory_binding_count_log2 = + SpirvShaderTranslator::GetSharedMemoryStorageBufferCountLog2( + provider.device_properties().limits.maxStorageBufferRange); + uint32_t shared_memory_binding_count = UINT32_C(1) + << shared_memory_binding_count_log2; + VkDescriptorSetLayoutBinding + descriptor_set_layout_bindings_shared_memory_and_edram[1]; + descriptor_set_layout_bindings_shared_memory_and_edram[0].binding = 0; + descriptor_set_layout_bindings_shared_memory_and_edram[0].descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_set_layout_bindings_shared_memory_and_edram[0].descriptorCount = + shared_memory_binding_count; + descriptor_set_layout_bindings_shared_memory_and_edram[0].stageFlags = + guest_shader_stages; + descriptor_set_layout_bindings_shared_memory_and_edram[0].pImmutableSamplers = + nullptr; + // TODO(Triang3l): EDRAM storage image binding for the fragment shader + // interlocks case. + descriptor_set_layout_create_info.bindingCount = uint32_t( + xe::countof(descriptor_set_layout_bindings_shared_memory_and_edram)); + descriptor_set_layout_create_info.pBindings = + descriptor_set_layout_bindings_shared_memory_and_edram; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_shared_memory_and_edram_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for the shared memory " + "and the EDRAM"); + return false; + } + // Transient: uniform buffer for the guest vertex shader stages. + VkDescriptorSetLayoutBinding descriptor_set_layout_binding_transient; + descriptor_set_layout_binding_transient.binding = 0; + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.descriptorCount = 1; + descriptor_set_layout_binding_transient.stageFlags = + guest_shader_vertex_stages_; + descriptor_set_layout_binding_transient.pImmutableSamplers = nullptr; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = + &descriptor_set_layout_binding_transient; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kUniformBufferGuestVertex)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a uniform buffer " + "bound to the guest vertex shader stages"); + return false; + } + // Transient: uniform buffer for fragment shaders. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kUniformBufferFragment)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a uniform buffer " + "bound to the fragment shader"); + return false; + } + // Transient: uniform buffer for the guest shader stages. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = guest_shader_stages; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kUniformBufferGuestShader)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a uniform buffer " + "bound to the guest shader stages"); + return false; + } + // Transient: system constants. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = guest_shader_stages; + if (device_features.tessellationShader) { + descriptor_set_layout_binding_transient.stageFlags |= + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + } + if (device_features.geometryShader) { + descriptor_set_layout_binding_transient.stageFlags |= + VK_SHADER_STAGE_GEOMETRY_BIT; + } + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout :: + kUniformBufferSystemConstants)]) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for the system " + "constants uniform buffer"); + return false; + } + // Transient: uniform buffer for compute shaders. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = + VK_SHADER_STAGE_COMPUTE_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kUniformBufferCompute)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a uniform buffer " + "bound to the compute shader"); + return false; + } + // Transient: storage buffer for compute shaders. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = + VK_SHADER_STAGE_COMPUTE_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kStorageBufferCompute)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a storage buffer " + "bound to the compute shader"); return false; } - // Setup fenced pools used for all our per-frame/per-draw resources. - command_buffer_pool_ = std::make_unique( - provider, provider.queue_family_graphics_compute()); - - // Initialize the state machine caches. - buffer_cache_ = std::make_unique( - register_file_, memory_, provider, kDefaultBufferCacheCapacity); - status = buffer_cache_->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize buffer cache"); - buffer_cache_->Shutdown(); + shared_memory_ = std::make_unique( + *this, *memory_, trace_writer_, guest_shader_pipeline_stages_); + if (!shared_memory_->Initialize()) { + XELOGE("Failed to initialize shared memory"); return false; } - texture_cache_ = std::make_unique( - memory_, register_file_, &trace_writer_, provider); - status = texture_cache_->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize texture cache"); - texture_cache_->Shutdown(); + primitive_processor_ = std::make_unique( + *register_file_, *memory_, trace_writer_, *shared_memory_, *this); + if (!primitive_processor_->Initialize()) { + XELOGE("Failed to initialize the geometric primitive processor"); return false; } - pipeline_cache_ = - std::make_unique(register_file_, provider); - status = pipeline_cache_->Initialize( - buffer_cache_->constant_descriptor_set_layout(), - texture_cache_->texture_descriptor_set_layout(), - buffer_cache_->vertex_descriptor_set_layout()); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize pipeline cache"); - pipeline_cache_->Shutdown(); + // Requires the transient descriptor set layouts. + // TODO(Triang3l): Get the actual draw resolution scale when the texture cache + // supports resolution scaling. + render_target_cache_ = std::make_unique( + *register_file_, *memory_, trace_writer_, 1, 1, *this); + if (!render_target_cache_->Initialize()) { + XELOGE("Failed to initialize the render target cache"); return false; } - render_cache_ = std::make_unique(register_file_, provider); - status = render_cache_->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize render cache"); - render_cache_->Shutdown(); + pipeline_cache_ = std::make_unique( + *this, *register_file_, *render_target_cache_, + guest_shader_vertex_stages_); + if (!pipeline_cache_->Initialize()) { + XELOGE("Failed to initialize the graphics pipeline cache"); return false; } + // Requires the transient descriptor set layouts. + // TODO(Triang3l): Actual draw resolution scale. + texture_cache_ = + VulkanTextureCache::Create(*register_file_, *shared_memory_, 1, 1, *this, + guest_shader_pipeline_stages_); + if (!texture_cache_) { + XELOGE("Failed to initialize the texture cache"); + return false; + } + + // Shared memory and EDRAM common bindings. + VkDescriptorPoolSize descriptor_pool_sizes[1]; + descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_pool_sizes[0].descriptorCount = shared_memory_binding_count; + // TODO(Triang3l): EDRAM storage image binding for the fragment shader + // interlocks case. + VkDescriptorPoolCreateInfo descriptor_pool_create_info; + descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptor_pool_create_info.pNext = nullptr; + descriptor_pool_create_info.flags = 0; + descriptor_pool_create_info.maxSets = 1; + descriptor_pool_create_info.poolSizeCount = 1; + descriptor_pool_create_info.pPoolSizes = descriptor_pool_sizes; + if (dfn.vkCreateDescriptorPool(device, &descriptor_pool_create_info, nullptr, + &shared_memory_and_edram_descriptor_pool_) != + VK_SUCCESS) { + XELOGE( + "Failed to create the Vulkan descriptor pool for shared memory and " + "EDRAM"); + return false; + } + VkDescriptorSetAllocateInfo descriptor_set_allocate_info; + descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptor_set_allocate_info.pNext = nullptr; + descriptor_set_allocate_info.descriptorPool = + shared_memory_and_edram_descriptor_pool_; + descriptor_set_allocate_info.descriptorSetCount = 1; + descriptor_set_allocate_info.pSetLayouts = + &descriptor_set_layout_shared_memory_and_edram_; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &shared_memory_and_edram_descriptor_set_) != + VK_SUCCESS) { + XELOGE( + "Failed to allocate the Vulkan descriptor set for shared memory and " + "EDRAM"); + return false; + } + VkDescriptorBufferInfo + shared_memory_descriptor_buffers_info[SharedMemory::kBufferSize / + (128 << 20)]; + uint32_t shared_memory_binding_range = + SharedMemory::kBufferSize >> shared_memory_binding_count_log2; + for (uint32_t i = 0; i < shared_memory_binding_count; ++i) { + VkDescriptorBufferInfo& shared_memory_descriptor_buffer_info = + shared_memory_descriptor_buffers_info[i]; + shared_memory_descriptor_buffer_info.buffer = shared_memory_->buffer(); + shared_memory_descriptor_buffer_info.offset = + shared_memory_binding_range * i; + shared_memory_descriptor_buffer_info.range = shared_memory_binding_range; + } + VkWriteDescriptorSet write_descriptor_sets[1]; + write_descriptor_sets[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_sets[0].pNext = nullptr; + write_descriptor_sets[0].dstSet = shared_memory_and_edram_descriptor_set_; + write_descriptor_sets[0].dstBinding = 0; + write_descriptor_sets[0].dstArrayElement = 0; + write_descriptor_sets[0].descriptorCount = shared_memory_binding_count; + write_descriptor_sets[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_sets[0].pImageInfo = nullptr; + write_descriptor_sets[0].pBufferInfo = shared_memory_descriptor_buffers_info; + write_descriptor_sets[0].pTexelBufferView = nullptr; + // TODO(Triang3l): EDRAM storage image binding for the fragment shader + // interlocks case. + dfn.vkUpdateDescriptorSets(device, 1, write_descriptor_sets, 0, nullptr); + + // Swap objects. + + // Gamma ramp, either device-local and host-visible at once, or separate + // device-local texel buffer and host-visible upload buffer. + gamma_ramp_256_entry_table_current_frame_ = UINT32_MAX; + gamma_ramp_pwl_current_frame_ = UINT32_MAX; + // Try to create a device-local host-visible buffer first, to skip copying. + constexpr uint32_t kGammaRampSize256EntryTable = sizeof(uint32_t) * 256; + constexpr uint32_t kGammaRampSizePWL = sizeof(uint16_t) * 2 * 3 * 128; + constexpr uint32_t kGammaRampSize = + kGammaRampSize256EntryTable + kGammaRampSizePWL; + VkBufferCreateInfo gamma_ramp_host_visible_buffer_create_info; + gamma_ramp_host_visible_buffer_create_info.sType = + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + gamma_ramp_host_visible_buffer_create_info.pNext = nullptr; + gamma_ramp_host_visible_buffer_create_info.flags = 0; + gamma_ramp_host_visible_buffer_create_info.size = + kGammaRampSize * kMaxFramesInFlight; + gamma_ramp_host_visible_buffer_create_info.usage = + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + gamma_ramp_host_visible_buffer_create_info.sharingMode = + VK_SHARING_MODE_EXCLUSIVE; + gamma_ramp_host_visible_buffer_create_info.queueFamilyIndexCount = 0; + gamma_ramp_host_visible_buffer_create_info.pQueueFamilyIndices = nullptr; + if (dfn.vkCreateBuffer(device, &gamma_ramp_host_visible_buffer_create_info, + nullptr, &gamma_ramp_buffer_) == VK_SUCCESS) { + bool use_gamma_ramp_host_visible_buffer = false; + VkMemoryRequirements gamma_ramp_host_visible_buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements( + device, gamma_ramp_buffer_, + &gamma_ramp_host_visible_buffer_memory_requirements); + uint32_t gamma_ramp_host_visible_buffer_memory_types = + gamma_ramp_host_visible_buffer_memory_requirements.memoryTypeBits & + (provider.memory_types_device_local() & + provider.memory_types_host_visible()); + VkMemoryAllocateInfo gamma_ramp_host_visible_buffer_memory_allocate_info; + // Prefer a host-uncached (because it's write-only) memory type, but try a + // host-cached host-visible device-local one as well. + if (xe::bit_scan_forward( + gamma_ramp_host_visible_buffer_memory_types & + ~provider.memory_types_host_cached(), + &(gamma_ramp_host_visible_buffer_memory_allocate_info + .memoryTypeIndex)) || + xe::bit_scan_forward( + gamma_ramp_host_visible_buffer_memory_types, + &(gamma_ramp_host_visible_buffer_memory_allocate_info + .memoryTypeIndex))) { + VkMemoryAllocateInfo* + gamma_ramp_host_visible_buffer_memory_allocate_info_last = + &gamma_ramp_host_visible_buffer_memory_allocate_info; + gamma_ramp_host_visible_buffer_memory_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + gamma_ramp_host_visible_buffer_memory_allocate_info.pNext = nullptr; + gamma_ramp_host_visible_buffer_memory_allocate_info.allocationSize = + gamma_ramp_host_visible_buffer_memory_requirements.size; + VkMemoryDedicatedAllocateInfoKHR + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + gamma_ramp_host_visible_buffer_memory_allocate_info_last->pNext = + &gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info; + gamma_ramp_host_visible_buffer_memory_allocate_info_last = + reinterpret_cast( + &gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info); + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.pNext = + nullptr; + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.image = + VK_NULL_HANDLE; + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.buffer = + gamma_ramp_buffer_; + } + if (dfn.vkAllocateMemory( + device, &gamma_ramp_host_visible_buffer_memory_allocate_info, + nullptr, &gamma_ramp_buffer_memory_) == VK_SUCCESS) { + if (dfn.vkBindBufferMemory(device, gamma_ramp_buffer_, + gamma_ramp_buffer_memory_, + 0) == VK_SUCCESS) { + if (dfn.vkMapMemory(device, gamma_ramp_buffer_memory_, 0, + VK_WHOLE_SIZE, 0, + &gamma_ramp_upload_mapping_) == VK_SUCCESS) { + use_gamma_ramp_host_visible_buffer = true; + gamma_ramp_upload_memory_size_ = + gamma_ramp_host_visible_buffer_memory_allocate_info + .allocationSize; + gamma_ramp_upload_memory_type_ = + gamma_ramp_host_visible_buffer_memory_allocate_info + .memoryTypeIndex; + } + } + if (!use_gamma_ramp_host_visible_buffer) { + dfn.vkFreeMemory(device, gamma_ramp_buffer_memory_, nullptr); + gamma_ramp_buffer_memory_ = VK_NULL_HANDLE; + } + } + } + if (!use_gamma_ramp_host_visible_buffer) { + dfn.vkDestroyBuffer(device, gamma_ramp_buffer_, nullptr); + gamma_ramp_buffer_ = VK_NULL_HANDLE; + } + } + if (gamma_ramp_buffer_ == VK_NULL_HANDLE) { + // Create separate buffers for the shader and uploading. + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, kGammaRampSize, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, gamma_ramp_buffer_, + gamma_ramp_buffer_memory_)) { + XELOGE("Failed to create the gamma ramp buffer"); + return false; + } + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, kGammaRampSize * kMaxFramesInFlight, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + ui::vulkan::util::MemoryPurpose::kUpload, gamma_ramp_upload_buffer_, + gamma_ramp_upload_buffer_memory_, &gamma_ramp_upload_memory_type_, + &gamma_ramp_upload_memory_size_)) { + XELOGE("Failed to create the gamma ramp upload buffer"); + return false; + } + if (dfn.vkMapMemory(device, gamma_ramp_upload_buffer_memory_, 0, + VK_WHOLE_SIZE, 0, + &gamma_ramp_upload_mapping_) != VK_SUCCESS) { + XELOGE("Failed to map the gamma ramp upload buffer"); + return false; + } + } + + // Gamma ramp buffer views. + uint32_t gamma_ramp_frame_count = + gamma_ramp_upload_buffer_ == VK_NULL_HANDLE ? kMaxFramesInFlight : 1; + VkBufferViewCreateInfo gamma_ramp_buffer_view_create_info; + gamma_ramp_buffer_view_create_info.sType = + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; + gamma_ramp_buffer_view_create_info.pNext = nullptr; + gamma_ramp_buffer_view_create_info.flags = 0; + gamma_ramp_buffer_view_create_info.buffer = gamma_ramp_buffer_; + // 256-entry table. + gamma_ramp_buffer_view_create_info.format = + VK_FORMAT_A2B10G10R10_UNORM_PACK32; + gamma_ramp_buffer_view_create_info.range = kGammaRampSize256EntryTable; + for (uint32_t i = 0; i < gamma_ramp_frame_count; ++i) { + gamma_ramp_buffer_view_create_info.offset = kGammaRampSize * i; + if (dfn.vkCreateBufferView(device, &gamma_ramp_buffer_view_create_info, + nullptr, &gamma_ramp_buffer_views_[i * 2]) != + VK_SUCCESS) { + XELOGE("Failed to create a 256-entry table gamma ramp buffer view"); + return false; + } + } + // Piecewise linear. + gamma_ramp_buffer_view_create_info.format = VK_FORMAT_R16G16_UINT; + gamma_ramp_buffer_view_create_info.range = kGammaRampSizePWL; + for (uint32_t i = 0; i < gamma_ramp_frame_count; ++i) { + gamma_ramp_buffer_view_create_info.offset = + kGammaRampSize * i + kGammaRampSize256EntryTable; + if (dfn.vkCreateBufferView(device, &gamma_ramp_buffer_view_create_info, + nullptr, &gamma_ramp_buffer_views_[i * 2 + 1]) != + VK_SUCCESS) { + XELOGE("Failed to create a PWL gamma ramp buffer view"); + return false; + } + } + + // Swap descriptor set layouts. + VkDescriptorSetLayoutBinding swap_descriptor_set_layout_binding; + swap_descriptor_set_layout_binding.binding = 0; + swap_descriptor_set_layout_binding.descriptorCount = 1; + swap_descriptor_set_layout_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + swap_descriptor_set_layout_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo swap_descriptor_set_layout_create_info; + swap_descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + swap_descriptor_set_layout_create_info.pNext = nullptr; + swap_descriptor_set_layout_create_info.flags = 0; + swap_descriptor_set_layout_create_info.bindingCount = 1; + swap_descriptor_set_layout_create_info.pBindings = + &swap_descriptor_set_layout_binding; + swap_descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + if (dfn.vkCreateDescriptorSetLayout( + device, &swap_descriptor_set_layout_create_info, nullptr, + &swap_descriptor_set_layout_sampled_image_) != VK_SUCCESS) { + XELOGE( + "Failed to create the presentation sampled image descriptor set " + "layout"); + return false; + } + swap_descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + if (dfn.vkCreateDescriptorSetLayout( + device, &swap_descriptor_set_layout_create_info, nullptr, + &swap_descriptor_set_layout_uniform_texel_buffer_) != VK_SUCCESS) { + XELOGE( + "Failed to create the presentation uniform texel buffer descriptor set " + "layout"); + return false; + } + + // Swap descriptor pool. + std::array swap_descriptor_pool_sizes; + VkDescriptorPoolCreateInfo swap_descriptor_pool_create_info; + swap_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + swap_descriptor_pool_create_info.pNext = nullptr; + swap_descriptor_pool_create_info.flags = 0; + swap_descriptor_pool_create_info.maxSets = 0; + swap_descriptor_pool_create_info.poolSizeCount = 0; + swap_descriptor_pool_create_info.pPoolSizes = + swap_descriptor_pool_sizes.data(); + // TODO(Triang3l): FXAA combined image and sampler sources. + { + VkDescriptorPoolSize& swap_descriptor_pool_size_sampled_image = + swap_descriptor_pool_sizes[swap_descriptor_pool_create_info + .poolSizeCount++]; + swap_descriptor_pool_size_sampled_image.type = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + // Source images. + swap_descriptor_pool_size_sampled_image.descriptorCount = + kMaxFramesInFlight; + swap_descriptor_pool_create_info.maxSets += kMaxFramesInFlight; + } + // 256-entry table and PWL gamma ramps. If the gamma ramp buffer is + // host-visible, for multiple frames. + uint32_t gamma_ramp_buffer_view_count = 2 * gamma_ramp_frame_count; + { + VkDescriptorPoolSize& swap_descriptor_pool_size_uniform_texel_buffer = + swap_descriptor_pool_sizes[swap_descriptor_pool_create_info + .poolSizeCount++]; + swap_descriptor_pool_size_uniform_texel_buffer.type = + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + swap_descriptor_pool_size_uniform_texel_buffer.descriptorCount = + gamma_ramp_buffer_view_count; + swap_descriptor_pool_create_info.maxSets += gamma_ramp_buffer_view_count; + } + if (dfn.vkCreateDescriptorPool(device, &swap_descriptor_pool_create_info, + nullptr, + &swap_descriptor_pool_) != VK_SUCCESS) { + XELOGE("Failed to create the presentation descriptor pool"); + return false; + } + + // Swap descriptor set allocation. + VkDescriptorSetAllocateInfo swap_descriptor_set_allocate_info; + swap_descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + swap_descriptor_set_allocate_info.pNext = nullptr; + swap_descriptor_set_allocate_info.descriptorPool = swap_descriptor_pool_; + swap_descriptor_set_allocate_info.descriptorSetCount = 1; + swap_descriptor_set_allocate_info.pSetLayouts = + &swap_descriptor_set_layout_uniform_texel_buffer_; + for (uint32_t i = 0; i < gamma_ramp_buffer_view_count; ++i) { + if (dfn.vkAllocateDescriptorSets(device, &swap_descriptor_set_allocate_info, + &swap_descriptors_gamma_ramp_[i]) != + VK_SUCCESS) { + XELOGE("Failed to allocate the gamma ramp descriptor sets"); + return false; + } + } + swap_descriptor_set_allocate_info.pSetLayouts = + &swap_descriptor_set_layout_sampled_image_; + for (uint32_t i = 0; i < kMaxFramesInFlight; ++i) { + if (dfn.vkAllocateDescriptorSets(device, &swap_descriptor_set_allocate_info, + &swap_descriptors_source_[i]) != + VK_SUCCESS) { + XELOGE( + "Failed to allocate the presentation source image descriptor sets"); + return false; + } + } + + // Gamma ramp descriptor sets. + VkWriteDescriptorSet gamma_ramp_write_descriptor_set; + gamma_ramp_write_descriptor_set.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + gamma_ramp_write_descriptor_set.pNext = nullptr; + gamma_ramp_write_descriptor_set.dstBinding = 0; + gamma_ramp_write_descriptor_set.dstArrayElement = 0; + gamma_ramp_write_descriptor_set.descriptorCount = 1; + gamma_ramp_write_descriptor_set.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + gamma_ramp_write_descriptor_set.pImageInfo = nullptr; + gamma_ramp_write_descriptor_set.pBufferInfo = nullptr; + for (uint32_t i = 0; i < gamma_ramp_buffer_view_count; ++i) { + gamma_ramp_write_descriptor_set.dstSet = swap_descriptors_gamma_ramp_[i]; + gamma_ramp_write_descriptor_set.pTexelBufferView = + &gamma_ramp_buffer_views_[i]; + dfn.vkUpdateDescriptorSets(device, 1, &gamma_ramp_write_descriptor_set, 0, + nullptr); + } + + // Gamma ramp application pipeline layout. + std::array + swap_apply_gamma_descriptor_set_layouts{}; + swap_apply_gamma_descriptor_set_layouts[kSwapApplyGammaDescriptorSetRamp] = + swap_descriptor_set_layout_uniform_texel_buffer_; + swap_apply_gamma_descriptor_set_layouts[kSwapApplyGammaDescriptorSetSource] = + swap_descriptor_set_layout_sampled_image_; + VkPipelineLayoutCreateInfo swap_apply_gamma_pipeline_layout_create_info; + swap_apply_gamma_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + swap_apply_gamma_pipeline_layout_create_info.pNext = nullptr; + swap_apply_gamma_pipeline_layout_create_info.flags = 0; + swap_apply_gamma_pipeline_layout_create_info.setLayoutCount = + uint32_t(swap_apply_gamma_descriptor_set_layouts.size()); + swap_apply_gamma_pipeline_layout_create_info.pSetLayouts = + swap_apply_gamma_descriptor_set_layouts.data(); + swap_apply_gamma_pipeline_layout_create_info.pushConstantRangeCount = 0; + swap_apply_gamma_pipeline_layout_create_info.pPushConstantRanges = nullptr; + if (dfn.vkCreatePipelineLayout( + device, &swap_apply_gamma_pipeline_layout_create_info, nullptr, + &swap_apply_gamma_pipeline_layout_) != VK_SUCCESS) { + XELOGE("Failed to create the gamma ramp application pipeline layout"); + return false; + } + + // Gamma application render pass. Doesn't make assumptions about outer usage + // (explicit barriers must be used instead) for simplicity of use in different + // scenarios with different pipelines. + VkAttachmentDescription swap_apply_gamma_render_pass_attachment; + swap_apply_gamma_render_pass_attachment.flags = 0; + swap_apply_gamma_render_pass_attachment.format = + ui::vulkan::VulkanPresenter::kGuestOutputFormat; + swap_apply_gamma_render_pass_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + swap_apply_gamma_render_pass_attachment.loadOp = + VK_ATTACHMENT_LOAD_OP_DONT_CARE; + swap_apply_gamma_render_pass_attachment.storeOp = + VK_ATTACHMENT_STORE_OP_STORE; + swap_apply_gamma_render_pass_attachment.stencilLoadOp = + VK_ATTACHMENT_LOAD_OP_DONT_CARE; + swap_apply_gamma_render_pass_attachment.stencilStoreOp = + VK_ATTACHMENT_STORE_OP_DONT_CARE; + swap_apply_gamma_render_pass_attachment.initialLayout = + VK_IMAGE_LAYOUT_UNDEFINED; + swap_apply_gamma_render_pass_attachment.finalLayout = + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkAttachmentReference swap_apply_gamma_render_pass_color_attachment; + swap_apply_gamma_render_pass_color_attachment.attachment = 0; + swap_apply_gamma_render_pass_color_attachment.layout = + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkSubpassDescription swap_apply_gamma_render_pass_subpass = {}; + swap_apply_gamma_render_pass_subpass.pipelineBindPoint = + VK_PIPELINE_BIND_POINT_GRAPHICS; + swap_apply_gamma_render_pass_subpass.colorAttachmentCount = 1; + swap_apply_gamma_render_pass_subpass.pColorAttachments = + &swap_apply_gamma_render_pass_color_attachment; + VkSubpassDependency swap_apply_gamma_render_pass_dependencies[2]; + for (uint32_t i = 0; i < 2; ++i) { + VkSubpassDependency& swap_apply_gamma_render_pass_dependency = + swap_apply_gamma_render_pass_dependencies[i]; + swap_apply_gamma_render_pass_dependency.srcSubpass = + i ? 0 : VK_SUBPASS_EXTERNAL; + swap_apply_gamma_render_pass_dependency.dstSubpass = + i ? VK_SUBPASS_EXTERNAL : 0; + swap_apply_gamma_render_pass_dependency.srcStageMask = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + swap_apply_gamma_render_pass_dependency.dstStageMask = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + swap_apply_gamma_render_pass_dependency.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + swap_apply_gamma_render_pass_dependency.dstAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + swap_apply_gamma_render_pass_dependency.dependencyFlags = + VK_DEPENDENCY_BY_REGION_BIT; + } + VkRenderPassCreateInfo swap_apply_gamma_render_pass_create_info; + swap_apply_gamma_render_pass_create_info.sType = + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + swap_apply_gamma_render_pass_create_info.pNext = nullptr; + swap_apply_gamma_render_pass_create_info.flags = 0; + swap_apply_gamma_render_pass_create_info.attachmentCount = 1; + swap_apply_gamma_render_pass_create_info.pAttachments = + &swap_apply_gamma_render_pass_attachment; + swap_apply_gamma_render_pass_create_info.subpassCount = 1; + swap_apply_gamma_render_pass_create_info.pSubpasses = + &swap_apply_gamma_render_pass_subpass; + swap_apply_gamma_render_pass_create_info.dependencyCount = + uint32_t(xe::countof(swap_apply_gamma_render_pass_dependencies)); + swap_apply_gamma_render_pass_create_info.pDependencies = + swap_apply_gamma_render_pass_dependencies; + if (dfn.vkCreateRenderPass(device, &swap_apply_gamma_render_pass_create_info, + nullptr, + &swap_apply_gamma_render_pass_) != VK_SUCCESS) { + XELOGE("Failed to create the gamma ramp application render pass"); + return false; + } + + // Gamma ramp application pipeline. + // Using a graphics pipeline, not a compute one, because storage image support + // is optional for VK_FORMAT_A2B10G10R10_UNORM_PACK32. + + enum SwapApplyGammaPixelShader { + kSwapApplyGammaPixelShader256EntryTable, + kSwapApplyGammaPixelShaderPWL, + + kSwapApplyGammaPixelShaderCount, + }; + std::array + swap_apply_gamma_pixel_shaders{}; + bool swap_apply_gamma_pixel_shaders_created = + (swap_apply_gamma_pixel_shaders[kSwapApplyGammaPixelShader256EntryTable] = + ui::vulkan::util::CreateShaderModule( + provider, shaders::apply_gamma_table_ps, + sizeof(shaders::apply_gamma_table_ps))) != VK_NULL_HANDLE && + (swap_apply_gamma_pixel_shaders[kSwapApplyGammaPixelShaderPWL] = + ui::vulkan::util::CreateShaderModule( + provider, shaders::apply_gamma_pwl_ps, + sizeof(shaders::apply_gamma_pwl_ps))) != VK_NULL_HANDLE; + if (!swap_apply_gamma_pixel_shaders_created) { + XELOGE("Failed to create the gamma ramp application pixel shader modules"); + for (VkShaderModule swap_apply_gamma_pixel_shader : + swap_apply_gamma_pixel_shaders) { + if (swap_apply_gamma_pixel_shader != VK_NULL_HANDLE) { + dfn.vkDestroyShaderModule(device, swap_apply_gamma_pixel_shader, + nullptr); + } + } + return false; + } + + VkPipelineShaderStageCreateInfo swap_apply_gamma_pipeline_stages[2]; + swap_apply_gamma_pipeline_stages[0].sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + swap_apply_gamma_pipeline_stages[0].pNext = nullptr; + swap_apply_gamma_pipeline_stages[0].flags = 0; + swap_apply_gamma_pipeline_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + swap_apply_gamma_pipeline_stages[0].module = + ui::vulkan::util::CreateShaderModule(provider, shaders::fullscreen_cw_vs, + sizeof(shaders::fullscreen_cw_vs)); + if (swap_apply_gamma_pipeline_stages[0].module == VK_NULL_HANDLE) { + XELOGE("Failed to create the gamma ramp application vertex shader module"); + for (VkShaderModule swap_apply_gamma_pixel_shader : + swap_apply_gamma_pixel_shaders) { + assert_true(swap_apply_gamma_pixel_shader != VK_NULL_HANDLE); + dfn.vkDestroyShaderModule(device, swap_apply_gamma_pixel_shader, nullptr); + } + } + swap_apply_gamma_pipeline_stages[0].pName = "main"; + swap_apply_gamma_pipeline_stages[0].pSpecializationInfo = nullptr; + swap_apply_gamma_pipeline_stages[1].sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + swap_apply_gamma_pipeline_stages[1].pNext = nullptr; + swap_apply_gamma_pipeline_stages[1].flags = 0; + swap_apply_gamma_pipeline_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + // The fragment shader module will be specified later. + swap_apply_gamma_pipeline_stages[1].pName = "main"; + swap_apply_gamma_pipeline_stages[1].pSpecializationInfo = nullptr; + + VkPipelineVertexInputStateCreateInfo + swap_apply_gamma_pipeline_vertex_input_state = {}; + swap_apply_gamma_pipeline_vertex_input_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + + VkPipelineInputAssemblyStateCreateInfo + swap_apply_gamma_pipeline_input_assembly_state; + swap_apply_gamma_pipeline_input_assembly_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + swap_apply_gamma_pipeline_input_assembly_state.pNext = nullptr; + swap_apply_gamma_pipeline_input_assembly_state.flags = 0; + swap_apply_gamma_pipeline_input_assembly_state.topology = + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + swap_apply_gamma_pipeline_input_assembly_state.primitiveRestartEnable = + VK_FALSE; + + VkPipelineViewportStateCreateInfo swap_apply_gamma_pipeline_viewport_state; + swap_apply_gamma_pipeline_viewport_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + swap_apply_gamma_pipeline_viewport_state.pNext = nullptr; + swap_apply_gamma_pipeline_viewport_state.flags = 0; + swap_apply_gamma_pipeline_viewport_state.viewportCount = 1; + swap_apply_gamma_pipeline_viewport_state.pViewports = nullptr; + swap_apply_gamma_pipeline_viewport_state.scissorCount = 1; + swap_apply_gamma_pipeline_viewport_state.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo + swap_apply_gamma_pipeline_rasterization_state = {}; + swap_apply_gamma_pipeline_rasterization_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + swap_apply_gamma_pipeline_rasterization_state.polygonMode = + VK_POLYGON_MODE_FILL; + swap_apply_gamma_pipeline_rasterization_state.cullMode = VK_CULL_MODE_NONE; + swap_apply_gamma_pipeline_rasterization_state.frontFace = + VK_FRONT_FACE_CLOCKWISE; + swap_apply_gamma_pipeline_rasterization_state.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo + swap_apply_gamma_pipeline_multisample_state = {}; + swap_apply_gamma_pipeline_multisample_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + swap_apply_gamma_pipeline_multisample_state.rasterizationSamples = + VK_SAMPLE_COUNT_1_BIT; + + VkPipelineColorBlendAttachmentState + swap_apply_gamma_pipeline_color_blend_attachment_state = {}; + swap_apply_gamma_pipeline_color_blend_attachment_state.colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + VkPipelineColorBlendStateCreateInfo + swap_apply_gamma_pipeline_color_blend_state = {}; + swap_apply_gamma_pipeline_color_blend_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + swap_apply_gamma_pipeline_color_blend_state.attachmentCount = 1; + swap_apply_gamma_pipeline_color_blend_state.pAttachments = + &swap_apply_gamma_pipeline_color_blend_attachment_state; + + static const VkDynamicState kSwapApplyGammaPipelineDynamicStates[] = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + VkPipelineDynamicStateCreateInfo swap_apply_gamma_pipeline_dynamic_state; + swap_apply_gamma_pipeline_dynamic_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + swap_apply_gamma_pipeline_dynamic_state.pNext = nullptr; + swap_apply_gamma_pipeline_dynamic_state.flags = 0; + swap_apply_gamma_pipeline_dynamic_state.dynamicStateCount = + uint32_t(xe::countof(kSwapApplyGammaPipelineDynamicStates)); + swap_apply_gamma_pipeline_dynamic_state.pDynamicStates = + kSwapApplyGammaPipelineDynamicStates; + + VkGraphicsPipelineCreateInfo swap_apply_gamma_pipeline_create_info; + swap_apply_gamma_pipeline_create_info.sType = + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + swap_apply_gamma_pipeline_create_info.pNext = nullptr; + swap_apply_gamma_pipeline_create_info.flags = 0; + swap_apply_gamma_pipeline_create_info.stageCount = + uint32_t(xe::countof(swap_apply_gamma_pipeline_stages)); + swap_apply_gamma_pipeline_create_info.pStages = + swap_apply_gamma_pipeline_stages; + swap_apply_gamma_pipeline_create_info.pVertexInputState = + &swap_apply_gamma_pipeline_vertex_input_state; + swap_apply_gamma_pipeline_create_info.pInputAssemblyState = + &swap_apply_gamma_pipeline_input_assembly_state; + swap_apply_gamma_pipeline_create_info.pTessellationState = nullptr; + swap_apply_gamma_pipeline_create_info.pViewportState = + &swap_apply_gamma_pipeline_viewport_state; + swap_apply_gamma_pipeline_create_info.pRasterizationState = + &swap_apply_gamma_pipeline_rasterization_state; + swap_apply_gamma_pipeline_create_info.pMultisampleState = + &swap_apply_gamma_pipeline_multisample_state; + swap_apply_gamma_pipeline_create_info.pDepthStencilState = nullptr; + swap_apply_gamma_pipeline_create_info.pColorBlendState = + &swap_apply_gamma_pipeline_color_blend_state; + swap_apply_gamma_pipeline_create_info.pDynamicState = + &swap_apply_gamma_pipeline_dynamic_state; + swap_apply_gamma_pipeline_create_info.layout = + swap_apply_gamma_pipeline_layout_; + swap_apply_gamma_pipeline_create_info.renderPass = + swap_apply_gamma_render_pass_; + swap_apply_gamma_pipeline_create_info.subpass = 0; + swap_apply_gamma_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + swap_apply_gamma_pipeline_create_info.basePipelineIndex = -1; + swap_apply_gamma_pipeline_stages[1].module = + swap_apply_gamma_pixel_shaders[kSwapApplyGammaPixelShader256EntryTable]; + VkResult swap_apply_gamma_pipeline_256_entry_table_create_result = + dfn.vkCreateGraphicsPipelines( + device, VK_NULL_HANDLE, 1, &swap_apply_gamma_pipeline_create_info, + nullptr, &swap_apply_gamma_256_entry_table_pipeline_); + swap_apply_gamma_pipeline_stages[1].module = + swap_apply_gamma_pixel_shaders[kSwapApplyGammaPixelShaderPWL]; + VkResult swap_apply_gamma_pipeline_pwl_create_result = + dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &swap_apply_gamma_pipeline_create_info, + nullptr, &swap_apply_gamma_pwl_pipeline_); + dfn.vkDestroyShaderModule(device, swap_apply_gamma_pipeline_stages[0].module, + nullptr); + for (VkShaderModule swap_apply_gamma_pixel_shader : + swap_apply_gamma_pixel_shaders) { + assert_true(swap_apply_gamma_pixel_shader != VK_NULL_HANDLE); + dfn.vkDestroyShaderModule(device, swap_apply_gamma_pixel_shader, nullptr); + } + if (swap_apply_gamma_pipeline_256_entry_table_create_result != VK_SUCCESS || + swap_apply_gamma_pipeline_pwl_create_result != VK_SUCCESS) { + XELOGE("Failed to create the gamma ramp application pipelines"); + return false; + } + + // Just not to expose uninitialized memory. + std::memset(&system_constants_, 0, sizeof(system_constants_)); + return true; } void VulkanCommandProcessor::ShutdownContext() { - // TODO(benvanik): wait until idle. + AwaitAllQueueOperationsCompletion(); const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - swap_submission_tracker_.Shutdown(); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device, - swap_framebuffer_); - swap_framebuffer_version_ = UINT64_MAX; + DestroyScratchBuffer(); + + for (SwapFramebuffer& swap_framebuffer : swap_framebuffers_) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device, + swap_framebuffer.framebuffer); + } + + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, + swap_apply_gamma_pwl_pipeline_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyPipeline, device, + swap_apply_gamma_256_entry_table_pipeline_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyRenderPass, device, + swap_apply_gamma_render_pass_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + swap_apply_gamma_pipeline_layout_); + + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, + swap_descriptor_pool_); + + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + swap_descriptor_set_layout_uniform_texel_buffer_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + swap_descriptor_set_layout_sampled_image_); + for (VkBufferView& gamma_ramp_buffer_view : gamma_ramp_buffer_views_) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBufferView, device, + gamma_ramp_buffer_view); + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + gamma_ramp_upload_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + gamma_ramp_upload_buffer_memory_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + gamma_ramp_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + gamma_ramp_buffer_memory_); + + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorPool, device, + shared_memory_and_edram_descriptor_pool_); - buffer_cache_.reset(); - pipeline_cache_.reset(); - render_cache_.reset(); texture_cache_.reset(); - blitter_.reset(); + pipeline_cache_.reset(); - // Free all pools. This must come after all of our caches clean up. - command_buffer_pool_.reset(); + render_target_cache_.reset(); + + primitive_processor_.reset(); + + shared_memory_.reset(); + + ClearTransientDescriptorPools(); + + for (const auto& pipeline_layout_pair : pipeline_layouts_) { + dfn.vkDestroyPipelineLayout( + device, pipeline_layout_pair.second.GetPipelineLayout(), nullptr); + } + pipeline_layouts_.clear(); + for (const auto& descriptor_set_layout_pair : + descriptor_set_layouts_textures_) { + dfn.vkDestroyDescriptorSetLayout(device, descriptor_set_layout_pair.second, + nullptr); + } + descriptor_set_layouts_textures_.clear(); + + for (VkDescriptorSetLayout& descriptor_set_layout_single_transient : + descriptor_set_layouts_single_transient_) { + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_single_transient); + } + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_shared_memory_and_edram_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, + device, descriptor_set_layout_empty_); + + uniform_buffer_pool_.reset(); + + sparse_bind_wait_stage_mask_ = 0; + sparse_buffer_binds_.clear(); + sparse_memory_binds_.clear(); + + deferred_command_buffer_.Reset(); + for (const auto& command_buffer_pair : command_buffers_submitted_) { + dfn.vkDestroyCommandPool(device, command_buffer_pair.second.pool, nullptr); + } + command_buffers_submitted_.clear(); + for (const CommandBuffer& command_buffer : command_buffers_writable_) { + dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); + } + command_buffers_writable_.clear(); + + for (const auto& destroy_pair : destroy_framebuffers_) { + dfn.vkDestroyFramebuffer(device, destroy_pair.second, nullptr); + } + destroy_framebuffers_.clear(); + for (const auto& destroy_pair : destroy_buffers_) { + dfn.vkDestroyBuffer(device, destroy_pair.second, nullptr); + } + destroy_buffers_.clear(); + for (const auto& destroy_pair : destroy_memory_) { + dfn.vkFreeMemory(device, destroy_pair.second, nullptr); + } + destroy_memory_.clear(); + + std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_)); + frame_completed_ = 0; + frame_current_ = 1; + frame_open_ = false; + + for (const auto& semaphore : submissions_in_flight_semaphores_) { + dfn.vkDestroySemaphore(device, semaphore.second, nullptr); + } + submissions_in_flight_semaphores_.clear(); + for (VkFence& fence : submissions_in_flight_fences_) { + dfn.vkDestroyFence(device, fence, nullptr); + } + submissions_in_flight_fences_.clear(); + current_submission_wait_stage_masks_.clear(); + for (VkSemaphore semaphore : current_submission_wait_semaphores_) { + dfn.vkDestroySemaphore(device, semaphore, nullptr); + } + current_submission_wait_semaphores_.clear(); + submission_completed_ = 0; + submission_open_ = false; + + for (VkSemaphore semaphore : semaphores_free_) { + dfn.vkDestroySemaphore(device, semaphore, nullptr); + } + semaphores_free_.clear(); + for (VkFence fence : fences_free_) { + dfn.vkDestroyFence(device, fence, nullptr); + } + fences_free_.clear(); + + device_lost_ = false; CommandProcessor::ShutdownContext(); } -void VulkanCommandProcessor::MakeCoherent() { - RegisterFile* regs = register_file_; - auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; - - CommandProcessor::MakeCoherent(); - - // Make region coherent - if (status_host & 0x80000000ul) { - // TODO(benvanik): less-fine-grained clearing. - buffer_cache_->InvalidateCache(); - - if ((status_host & 0x01000000) != 0 && (status_host & 0x02000000) == 0) { - coher_base_vc_ = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; - coher_size_vc_ = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; - } - } -} - void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { CommandProcessor::WriteRegister(index, value); if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { - uint32_t offset = index - XE_GPU_REG_SHADER_CONSTANT_000_X; - offset /= 4 * 4; - offset ^= 0x3F; - - dirty_float_constants_ |= (1ull << offset); + if (frame_open_) { + uint32_t float_constant_index = + (index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2; + if (float_constant_index >= 256) { + float_constant_index -= 256; + if (current_float_constant_map_pixel_[float_constant_index >> 6] & + (1ull << (float_constant_index & 63))) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + } + } else { + if (current_float_constant_map_vertex_[float_constant_index >> 6] & + (1ull << (float_constant_index & 63))) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + } + } + } } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && - index <= XE_GPU_REG_SHADER_CONSTANT_BOOL_224_255) { - uint32_t offset = index - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031; - offset ^= 0x7; - - dirty_bool_constants_ |= (1 << offset); - } else if (index >= XE_GPU_REG_SHADER_CONSTANT_LOOP_00 && index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { - uint32_t offset = index - XE_GPU_REG_SHADER_CONSTANT_LOOP_00; - offset ^= 0x1F; - - dirty_loop_constants_ |= (1 << offset); - } -} - -void VulkanCommandProcessor::BeginFrame() { - assert_false(frame_open_); - - // TODO(benvanik): bigger batches. - // TODO(DrChat): Decouple setup buffer from current batch. - // Begin a new batch, and allocate and begin a command buffer and setup - // buffer. - current_batch_fence_ = command_buffer_pool_->BeginBatch(); - current_command_buffer_ = command_buffer_pool_->AcquireEntry(); - current_setup_buffer_ = command_buffer_pool_->AcquireEntry(); - - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - - VkCommandBufferBeginInfo command_buffer_begin_info; - command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - command_buffer_begin_info.pNext = nullptr; - command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - command_buffer_begin_info.pInheritanceInfo = nullptr; - auto status = dfn.vkBeginCommandBuffer(current_command_buffer_, - &command_buffer_begin_info); - CheckResult(status, "vkBeginCommandBuffer"); - - status = dfn.vkBeginCommandBuffer(current_setup_buffer_, - &command_buffer_begin_info); - CheckResult(status, "vkBeginCommandBuffer"); - - // Flag renderdoc down to start a capture if requested. - // The capture will end when these commands are submitted to the queue. - if ((cvars::vulkan_renderdoc_capture_all || trace_requested_) && - !capturing_) { - const RENDERDOC_API_1_0_0* renderdoc_api = - provider.renderdoc_api().api_1_0_0(); - if (renderdoc_api && !renderdoc_api->IsFrameCapturing()) { - capturing_ = true; - trace_requested_ = false; - renderdoc_api->StartFrameCapture(nullptr, nullptr); + current_graphics_descriptor_set_values_up_to_date_ &= ~( + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants); + } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && + index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants); + if (texture_cache_) { + texture_cache_->TextureFetchConstantWritten( + (index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6); } } - - frame_open_ = true; } -void VulkanCommandProcessor::EndFrame() { - if (current_render_state_) { - render_cache_->EndRenderPass(); - current_render_state_ = nullptr; +void VulkanCommandProcessor::SparseBindBuffer( + VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds, + VkPipelineStageFlags wait_stage_mask) { + if (!bind_count) { + return; } + SparseBufferBind& buffer_bind = sparse_buffer_binds_.emplace_back(); + buffer_bind.buffer = buffer; + buffer_bind.bind_offset = sparse_memory_binds_.size(); + buffer_bind.bind_count = bind_count; + sparse_memory_binds_.reserve(sparse_memory_binds_.size() + bind_count); + sparse_memory_binds_.insert(sparse_memory_binds_.end(), binds, + binds + bind_count); + sparse_bind_wait_stage_mask_ |= wait_stage_mask; +} - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkResult status = VK_SUCCESS; - status = dfn.vkEndCommandBuffer(current_setup_buffer_); - CheckResult(status, "vkEndCommandBuffer"); - status = dfn.vkEndCommandBuffer(current_command_buffer_); - CheckResult(status, "vkEndCommandBuffer"); +void VulkanCommandProcessor::OnGammaRamp256EntryTableValueWritten() { + gamma_ramp_256_entry_table_current_frame_ = UINT32_MAX; +} - current_command_buffer_ = nullptr; - current_setup_buffer_ = nullptr; - command_buffer_pool_->EndBatch(); - - frame_open_ = false; +void VulkanCommandProcessor::OnGammaRampPWLValueWritten() { + gamma_ramp_pwl_current_frame_ = UINT32_MAX; } void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, @@ -269,1113 +1196,2885 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, return; } - std::vector submit_buffers; - if (frame_open_) { - // TODO(DrChat): If the setup buffer is empty, don't bother queueing it up. - submit_buffers.push_back(current_setup_buffer_); - submit_buffers.push_back(current_command_buffer_); + // In case the swap command is the only one in the frame. + if (!BeginSubmission(true)) { + return; } - bool submitted = false; - auto& regs = *register_file_; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0; - auto group = - reinterpret_cast(®s.values[r]); - TextureInfo texture_info; - if (!TextureInfo::Prepare(group->texture_fetch, &texture_info)) { - assert_always(); + // Obtaining the actual front buffer size to pass to RefreshGuestOutput, + // resolution-scaled if it's a resolve destination, or not otherwise. + uint32_t frontbuffer_width_scaled, frontbuffer_height_scaled; + xenos::TextureFormat frontbuffer_format; + VkImageView swap_texture_view = texture_cache_->RequestSwapTexture( + frontbuffer_width_scaled, frontbuffer_height_scaled, frontbuffer_format); + if (swap_texture_view == VK_NULL_HANDLE) { + return; } - auto texture = texture_cache_->Lookup(texture_info); - if (texture) { - presenter->RefreshGuestOutput( - frontbuffer_width, frontbuffer_height, 1280, 720, - [this, frontbuffer_width, frontbuffer_height, texture, &submit_buffers, - &submitted]( - ui::Presenter::GuestOutputRefreshContext& context) -> bool { - auto& vulkan_context = static_cast< - ui::vulkan::VulkanPresenter::VulkanGuestOutputRefreshContext&>( - context); - ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = - provider.dfn(); - VkDevice device = provider.device(); + presenter->RefreshGuestOutput( + frontbuffer_width_scaled, frontbuffer_height_scaled, 1280, 720, + [this, frontbuffer_width_scaled, frontbuffer_height_scaled, + frontbuffer_format, swap_texture_view]( + ui::Presenter::GuestOutputRefreshContext& context) -> bool { + // In case the swap command is the only one in the frame. + if (!BeginSubmission(true)) { + return false; + } - // Make sure the framebuffer is for the current guest output image. - if (swap_framebuffer_ != VK_NULL_HANDLE && - swap_framebuffer_version_ != vulkan_context.image_version()) { - swap_submission_tracker_.AwaitAllSubmissionsCompletion(); - dfn.vkDestroyFramebuffer(device, swap_framebuffer_, nullptr); - swap_framebuffer_ = VK_NULL_HANDLE; - } - if (swap_framebuffer_ == VK_NULL_HANDLE) { - VkRenderPass render_pass = blitter_->GetRenderPass( - ui::vulkan::VulkanPresenter::kGuestOutputFormat, true); - if (render_pass == VK_NULL_HANDLE) { - return false; + auto& vulkan_context = static_cast< + ui::vulkan::VulkanPresenter::VulkanGuestOutputRefreshContext&>( + context); + uint64_t guest_output_image_version = vulkan_context.image_version(); + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + uint32_t swap_frame_index = + uint32_t(frame_current_ % kMaxFramesInFlight); + + // This is according to D3D::InitializePresentationParameters from a + // game executable, which initializes the 256-entry table gamma ramp for + // 8_8_8_8 output and the PWL gamma ramp for 2_10_10_10. + // TODO(Triang3l): Choose between the table and PWL based on + // DC_LUTA_CONTROL, support both for all formats (and also different + // increments for PWL). + bool use_pwl_gamma_ramp = + frontbuffer_format == xenos::TextureFormat::k_2_10_10_10 || + frontbuffer_format == + xenos::TextureFormat::k_2_10_10_10_AS_16_16_16_16; + + // TODO(Triang3l): FXAA can result in more than 8 bits of precision. + context.SetIs8bpc(!use_pwl_gamma_ramp); + + // Update the gamma ramp if it's out of date. + uint32_t& gamma_ramp_frame_index_ref = + use_pwl_gamma_ramp ? gamma_ramp_pwl_current_frame_ + : gamma_ramp_256_entry_table_current_frame_; + if (gamma_ramp_frame_index_ref == UINT32_MAX) { + constexpr uint32_t kGammaRampSize256EntryTable = + sizeof(uint32_t) * 256; + constexpr uint32_t kGammaRampSizePWL = sizeof(uint16_t) * 2 * 3 * 128; + constexpr uint32_t kGammaRampSize = + kGammaRampSize256EntryTable + kGammaRampSizePWL; + uint32_t gamma_ramp_offset_in_frame = + use_pwl_gamma_ramp ? kGammaRampSize256EntryTable : 0; + uint32_t gamma_ramp_upload_offset = + kGammaRampSize * swap_frame_index + gamma_ramp_offset_in_frame; + uint32_t gamma_ramp_size = use_pwl_gamma_ramp + ? kGammaRampSizePWL + : kGammaRampSize256EntryTable; + void* gamma_ramp_frame_upload = + reinterpret_cast(gamma_ramp_upload_mapping_) + + gamma_ramp_upload_offset; + if (std::endian::native != std::endian::little && + use_pwl_gamma_ramp) { + // R16G16 is first R16, where the shader expects the base, and + // second G16, where the delta should be, but gamma_ramp_pwl_rgb() + // is an array of 32-bit DC_LUT_PWL_DATA registers - swap 16 bits in + // each 32. + auto gamma_ramp_pwl_upload = + reinterpret_cast( + gamma_ramp_frame_upload); + const reg::DC_LUT_PWL_DATA* gamma_ramp_pwl = gamma_ramp_pwl_rgb(); + for (size_t i = 0; i < 128 * 3; ++i) { + reg::DC_LUT_PWL_DATA& gamma_ramp_pwl_upload_entry = + gamma_ramp_pwl_upload[i]; + reg::DC_LUT_PWL_DATA gamma_ramp_pwl_entry = gamma_ramp_pwl[i]; + gamma_ramp_pwl_upload_entry.base = gamma_ramp_pwl_entry.delta; + gamma_ramp_pwl_upload_entry.delta = gamma_ramp_pwl_entry.base; } - VkImageView guest_output_image_view = vulkan_context.image_view(); - VkFramebufferCreateInfo swap_framebuffer_create_info; - swap_framebuffer_create_info.sType = - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - swap_framebuffer_create_info.pNext = nullptr; - swap_framebuffer_create_info.flags = 0; - swap_framebuffer_create_info.renderPass = render_pass; - swap_framebuffer_create_info.attachmentCount = 1; - swap_framebuffer_create_info.pAttachments = - &guest_output_image_view; - swap_framebuffer_create_info.width = frontbuffer_width; - swap_framebuffer_create_info.height = frontbuffer_height; - swap_framebuffer_create_info.layers = 1; - if (dfn.vkCreateFramebuffer(device, &swap_framebuffer_create_info, - nullptr, - &swap_framebuffer_) != VK_SUCCESS) { - XELOGE( - "Failed to create the Vulkan framebuffer for presentation"); - return false; + } else { + std::memcpy( + gamma_ramp_frame_upload, + use_pwl_gamma_ramp + ? static_cast(gamma_ramp_pwl_rgb()) + : static_cast(gamma_ramp_256_entry_table()), + gamma_ramp_size); + } + bool gamma_ramp_has_upload_buffer = + gamma_ramp_upload_buffer_memory_ != VK_NULL_HANDLE; + ui::vulkan::util::FlushMappedMemoryRange( + provider, + gamma_ramp_has_upload_buffer ? gamma_ramp_upload_buffer_memory_ + : gamma_ramp_buffer_memory_, + gamma_ramp_upload_memory_type_, gamma_ramp_upload_offset, + gamma_ramp_upload_memory_size_, gamma_ramp_size); + if (gamma_ramp_has_upload_buffer) { + // Copy from the host-visible buffer to the device-local one. + PushBufferMemoryBarrier( + gamma_ramp_buffer_, gamma_ramp_offset_in_frame, gamma_ramp_size, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, false); + SubmitBarriers(true); + VkBufferCopy gamma_ramp_buffer_copy; + gamma_ramp_buffer_copy.srcOffset = gamma_ramp_upload_offset; + gamma_ramp_buffer_copy.dstOffset = gamma_ramp_offset_in_frame; + gamma_ramp_buffer_copy.size = gamma_ramp_size; + deferred_command_buffer_.CmdVkCopyBuffer(gamma_ramp_upload_buffer_, + gamma_ramp_buffer_, 1, + &gamma_ramp_buffer_copy); + PushBufferMemoryBarrier( + gamma_ramp_buffer_, gamma_ramp_offset_in_frame, gamma_ramp_size, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, false); + } + // The device-local, but not host-visible, gamma ramp buffer doesn't + // have per-frame sets of gamma ramps. + gamma_ramp_frame_index_ref = + gamma_ramp_has_upload_buffer ? 0 : swap_frame_index; + } + + // Make sure a framebuffer is available for the current guest output + // image version. + size_t swap_framebuffer_index = SIZE_MAX; + size_t swap_framebuffer_new_index = SIZE_MAX; + // Try to find the existing framebuffer for the current guest output + // image version, or an unused (without an existing framebuffer, or with + // one, but that has never actually been used dynamically) slot. + for (size_t i = 0; i < swap_framebuffers_.size(); ++i) { + const SwapFramebuffer& existing_swap_framebuffer = + swap_framebuffers_[i]; + if (existing_swap_framebuffer.framebuffer != VK_NULL_HANDLE && + existing_swap_framebuffer.version == guest_output_image_version) { + swap_framebuffer_index = i; + break; + } + if (existing_swap_framebuffer.framebuffer == VK_NULL_HANDLE || + !existing_swap_framebuffer.last_submission) { + swap_framebuffer_new_index = i; + } + } + if (swap_framebuffer_index == SIZE_MAX) { + if (swap_framebuffer_new_index == SIZE_MAX) { + // Replace the earliest used framebuffer. + swap_framebuffer_new_index = 0; + for (size_t i = 1; i < swap_framebuffers_.size(); ++i) { + if (swap_framebuffers_[i].last_submission < + swap_framebuffers_[swap_framebuffer_new_index] + .last_submission) { + swap_framebuffer_new_index = i; + } } - swap_framebuffer_version_ = vulkan_context.image_version(); } - - // Build a final command buffer that copies the game's frontbuffer - // texture into our backbuffer texture. - VkCommandBuffer copy_commands = nullptr; - bool opened_batch = !command_buffer_pool_->has_open_batch(); - if (!command_buffer_pool_->has_open_batch()) { - current_batch_fence_ = command_buffer_pool_->BeginBatch(); - } - copy_commands = command_buffer_pool_->AcquireEntry(); - - VkCommandBufferBeginInfo command_buffer_begin_info; - command_buffer_begin_info.sType = - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - command_buffer_begin_info.pNext = nullptr; - command_buffer_begin_info.flags = - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - command_buffer_begin_info.pInheritanceInfo = nullptr; - dfn.vkBeginCommandBuffer(copy_commands, &command_buffer_begin_info); - - texture->in_flight_fence = current_batch_fence_; - - // Insert a barrier so the GPU finishes writing to the image, and a - // barrier after the last presenter's usage of the guest output image. - VkPipelineStageFlags acquire_barrier_src_stages = 0; - VkPipelineStageFlags acquire_barrier_dst_stages = 0; - VkImageMemoryBarrier acquire_image_memory_barriers[2]; - uint32_t acquire_image_memory_barrier_count = 0; - { - acquire_barrier_src_stages |= - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT; - acquire_barrier_dst_stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - VkImageMemoryBarrier& acquire_image_memory_barrier = - acquire_image_memory_barriers - [acquire_image_memory_barrier_count++]; - acquire_image_memory_barrier.sType = - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - acquire_image_memory_barrier.pNext = nullptr; - acquire_image_memory_barrier.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_TRANSFER_WRITE_BIT; - acquire_image_memory_barrier.dstAccessMask = - VK_ACCESS_SHADER_READ_BIT; - acquire_image_memory_barrier.oldLayout = texture->image_layout; - acquire_image_memory_barrier.newLayout = texture->image_layout; - acquire_image_memory_barrier.srcQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - acquire_image_memory_barrier.dstQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - acquire_image_memory_barrier.image = texture->image; - acquire_image_memory_barrier.subresourceRange = - ui::vulkan::util::InitializeSubresourceRange(); - } - { - acquire_barrier_dst_stages |= - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - VkImageMemoryBarrier& acquire_image_memory_barrier = - acquire_image_memory_barriers - [acquire_image_memory_barrier_count++]; - acquire_image_memory_barrier.sType = - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - acquire_image_memory_barrier.pNext = nullptr; - acquire_image_memory_barrier.dstAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - // Will be overwriting all the contents. - acquire_image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - acquire_image_memory_barrier.newLayout = - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - acquire_image_memory_barrier.srcQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - acquire_image_memory_barrier.dstQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - acquire_image_memory_barrier.image = vulkan_context.image(); - acquire_image_memory_barrier.subresourceRange = - ui::vulkan::util::InitializeSubresourceRange(); - if (vulkan_context.image_ever_written_previously()) { - acquire_barrier_src_stages |= - ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask; - acquire_image_memory_barrier.srcAccessMask = - ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask; + swap_framebuffer_index = swap_framebuffer_new_index; + SwapFramebuffer& new_swap_framebuffer = + swap_framebuffers_[swap_framebuffer_new_index]; + if (new_swap_framebuffer.framebuffer != VK_NULL_HANDLE) { + if (submission_completed_ >= new_swap_framebuffer.last_submission) { + dfn.vkDestroyFramebuffer(device, new_swap_framebuffer.framebuffer, + nullptr); } else { - acquire_image_memory_barrier.srcAccessMask = 0; + destroy_framebuffers_.emplace_back( + new_swap_framebuffer.last_submission, + new_swap_framebuffer.framebuffer); } + new_swap_framebuffer.framebuffer = VK_NULL_HANDLE; } - assert_not_zero(acquire_barrier_src_stages); - assert_not_zero(acquire_barrier_dst_stages); - assert_not_zero(acquire_image_memory_barrier_count); - dfn.vkCmdPipelineBarrier(copy_commands, acquire_barrier_src_stages, - acquire_barrier_dst_stages, 0, 0, nullptr, 0, - nullptr, acquire_image_memory_barrier_count, - acquire_image_memory_barriers); - - // Part of the source image that we want to blit from. - VkRect2D src_rect = { - {0, 0}, - {texture->texture_info.width + 1, - texture->texture_info.height + 1}, - }; - VkRect2D dst_rect = {{0, 0}, {frontbuffer_width, frontbuffer_height}}; - - VkViewport viewport = { - 0.f, 0.f, float(frontbuffer_width), float(frontbuffer_height), - 0.f, 1.f}; - - VkRect2D scissor = {{0, 0}, {frontbuffer_width, frontbuffer_height}}; - - blitter_->BlitTexture2D( - copy_commands, current_batch_fence_, - texture_cache_->DemandView(texture, 0x688)->view, src_rect, - {texture->texture_info.width + 1, - texture->texture_info.height + 1}, - ui::vulkan::VulkanPresenter::kGuestOutputFormat, dst_rect, - {frontbuffer_width, frontbuffer_height}, swap_framebuffer_, - viewport, scissor, VK_FILTER_LINEAR, true, true); - - VkPipelineStageFlags release_barrier_src_stages = 0; - VkPipelineStageFlags release_barrier_dst_stages = 0; - VkImageMemoryBarrier release_image_memory_barriers[2]; - uint32_t release_image_memory_barrier_count = 0; - { - release_barrier_src_stages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - release_barrier_dst_stages |= - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT; - VkImageMemoryBarrier& release_image_memory_barrier = - release_image_memory_barriers - [release_image_memory_barrier_count++]; - release_image_memory_barrier.sType = - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - release_image_memory_barrier.pNext = nullptr; - release_image_memory_barrier.srcAccessMask = - VK_ACCESS_SHADER_READ_BIT; - release_image_memory_barrier.dstAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_TRANSFER_WRITE_BIT; - release_image_memory_barrier.oldLayout = texture->image_layout; - release_image_memory_barrier.newLayout = texture->image_layout; - release_image_memory_barrier.srcQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - release_image_memory_barrier.dstQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - release_image_memory_barrier.image = texture->image; - release_image_memory_barrier.subresourceRange = - ui::vulkan::util::InitializeSubresourceRange(); - } - { - release_barrier_src_stages |= - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - release_barrier_dst_stages |= - ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask; - VkImageMemoryBarrier& release_image_memory_barrier = - release_image_memory_barriers - [release_image_memory_barrier_count++]; - release_image_memory_barrier.sType = - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - release_image_memory_barrier.pNext = nullptr; - release_image_memory_barrier.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - release_image_memory_barrier.dstAccessMask = - ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask; - release_image_memory_barrier.oldLayout = - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - release_image_memory_barrier.newLayout = - ui::vulkan::VulkanPresenter::kGuestOutputInternalLayout; - release_image_memory_barrier.srcQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - release_image_memory_barrier.dstQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - release_image_memory_barrier.image = vulkan_context.image(); - release_image_memory_barrier.subresourceRange = - ui::vulkan::util::InitializeSubresourceRange(); - } - assert_not_zero(release_barrier_src_stages); - assert_not_zero(release_barrier_dst_stages); - assert_not_zero(release_image_memory_barrier_count); - dfn.vkCmdPipelineBarrier(copy_commands, release_barrier_src_stages, - release_barrier_dst_stages, 0, 0, nullptr, 0, - nullptr, release_image_memory_barrier_count, - release_image_memory_barriers); - - dfn.vkEndCommandBuffer(copy_commands); - - // Need to submit all the commands before giving the image back to the - // presenter so it can submit its own commands for displaying it to - // the queue. - - if (frame_open_) { - EndFrame(); - } - - if (opened_batch) { - command_buffer_pool_->EndBatch(); - } - - submit_buffers.push_back(copy_commands); - - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = uint32_t(submit_buffers.size()); - submit_info.pCommandBuffers = submit_buffers.data(); - VkResult submit_result; - { - ui::vulkan::VulkanProvider::QueueAcquisition queue_acquisition( - provider.AcquireQueue(provider.queue_family_graphics_compute(), - 0)); - submit_result = dfn.vkQueueSubmit( - queue_acquisition.queue, 1, &submit_info, current_batch_fence_); - } - if (submit_result != VK_SUCCESS) { + VkImageView guest_output_image_view = vulkan_context.image_view(); + VkFramebufferCreateInfo swap_framebuffer_create_info; + swap_framebuffer_create_info.sType = + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + swap_framebuffer_create_info.pNext = nullptr; + swap_framebuffer_create_info.flags = 0; + swap_framebuffer_create_info.renderPass = + swap_apply_gamma_render_pass_; + swap_framebuffer_create_info.attachmentCount = 1; + swap_framebuffer_create_info.pAttachments = &guest_output_image_view; + swap_framebuffer_create_info.width = frontbuffer_width_scaled; + swap_framebuffer_create_info.height = frontbuffer_height_scaled; + swap_framebuffer_create_info.layers = 1; + if (dfn.vkCreateFramebuffer( + device, &swap_framebuffer_create_info, nullptr, + &new_swap_framebuffer.framebuffer) != VK_SUCCESS) { + XELOGE("Failed to create the Vulkan framebuffer for presentation"); return false; } - submitted = true; + new_swap_framebuffer.version = guest_output_image_version; + // The actual submission index will be set if the framebuffer is + // actually used, not dropped due to some error. + new_swap_framebuffer.last_submission = 0; + } - // Signal the fence for destroying objects depending on the guest - // output image. - { - ui::vulkan::VulkanSubmissionTracker::FenceAcquisition - fence_acqusition = - swap_submission_tracker_.AcquireFenceToAdvanceSubmission(); - ui::vulkan::VulkanProvider::QueueAcquisition queue_acquisition( - provider.AcquireQueue(provider.queue_family_graphics_compute(), - 0)); - if (dfn.vkQueueSubmit(queue_acquisition.queue, 0, nullptr, - fence_acqusition.fence()) != VK_SUCCESS) { - fence_acqusition.SubmissionSucceededSignalFailed(); - } - } + if (vulkan_context.image_ever_written_previously()) { + // Insert a barrier after the last presenter's usage of the guest + // output image. Will be overwriting all the contents, so oldLayout + // layout is UNDEFINED. The render pass will do the layout transition, + // but newLayout must not be UNDEFINED. + PushImageMemoryBarrier( + vulkan_context.image(), + ui::vulkan::util::InitializeSubresourceRange(), + ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + } - return true; - }); + // End the current render pass before inserting barriers and starting a + // new one, and insert the barrier. + SubmitBarriers(true); + + SwapFramebuffer& swap_framebuffer = + swap_framebuffers_[swap_framebuffer_index]; + swap_framebuffer.last_submission = GetCurrentSubmission(); + + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = swap_apply_gamma_render_pass_; + render_pass_begin_info.framebuffer = swap_framebuffer.framebuffer; + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + render_pass_begin_info.renderArea.extent.width = + frontbuffer_width_scaled; + render_pass_begin_info.renderArea.extent.height = + frontbuffer_height_scaled; + render_pass_begin_info.clearValueCount = 0; + render_pass_begin_info.pClearValues = nullptr; + deferred_command_buffer_.CmdVkBeginRenderPass( + &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); + + VkViewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = float(frontbuffer_width_scaled); + viewport.height = float(frontbuffer_height_scaled); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + SetViewport(viewport); + VkRect2D scissor; + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent.width = frontbuffer_width_scaled; + scissor.extent.height = frontbuffer_height_scaled; + SetScissor(scissor); + + BindExternalGraphicsPipeline( + use_pwl_gamma_ramp ? swap_apply_gamma_pwl_pipeline_ + : swap_apply_gamma_256_entry_table_pipeline_); + + VkDescriptorSet swap_descriptor_source = + swap_descriptors_source_[swap_frame_index]; + VkDescriptorImageInfo swap_descriptor_source_image_info; + swap_descriptor_source_image_info.sampler = VK_NULL_HANDLE; + swap_descriptor_source_image_info.imageView = swap_texture_view; + swap_descriptor_source_image_info.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + VkWriteDescriptorSet swap_descriptor_source_write; + swap_descriptor_source_write.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + swap_descriptor_source_write.pNext = nullptr; + swap_descriptor_source_write.dstSet = swap_descriptor_source; + swap_descriptor_source_write.dstBinding = 0; + swap_descriptor_source_write.dstArrayElement = 0; + swap_descriptor_source_write.descriptorCount = 1; + swap_descriptor_source_write.descriptorType = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + swap_descriptor_source_write.pImageInfo = + &swap_descriptor_source_image_info; + swap_descriptor_source_write.pBufferInfo = nullptr; + swap_descriptor_source_write.pTexelBufferView = nullptr; + dfn.vkUpdateDescriptorSets(device, 1, &swap_descriptor_source_write, 0, + nullptr); + + std::array + swap_descriptor_sets{}; + swap_descriptor_sets[kSwapApplyGammaDescriptorSetRamp] = + swap_descriptors_gamma_ramp_[2 * gamma_ramp_frame_index_ref + + uint32_t(use_pwl_gamma_ramp)]; + swap_descriptor_sets[kSwapApplyGammaDescriptorSetSource] = + swap_descriptor_source; + // TODO(Triang3l): Red / blue swap without imageViewFormatSwizzle. + deferred_command_buffer_.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, swap_apply_gamma_pipeline_layout_, + 0, uint32_t(swap_descriptor_sets.size()), + swap_descriptor_sets.data(), 0, nullptr); + + deferred_command_buffer_.CmdVkDraw(3, 1, 0, 0); + + deferred_command_buffer_.CmdVkEndRenderPass(); + + // Insert the release barrier. + PushImageMemoryBarrier( + vulkan_context.image(), + ui::vulkan::util::InitializeSubresourceRange(), + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + ui::vulkan::VulkanPresenter::kGuestOutputInternalLayout); + + // Need to submit all the commands before giving the image back to the + // presenter so it can submit its own commands for displaying it to the + // queue, and also need to submit the release barrier. + EndSubmission(true); + return true; + }); + + // End the frame even if did not present for any reason (the image refresher + // was not called), to prevent leaking per-frame resources. + EndSubmission(true); +} + +bool VulkanCommandProcessor::PushBufferMemoryBarrier( + VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + uint32_t src_queue_family_index, uint32_t dst_queue_family_index, + bool skip_if_equal) { + if (skip_if_equal && src_stage_mask == dst_stage_mask && + src_access_mask == dst_access_mask && + src_queue_family_index == dst_queue_family_index) { + return false; } - ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + // Separate different barriers for overlapping buffer ranges into different + // pipeline barrier commands. + for (const VkBufferMemoryBarrier& other_buffer_memory_barrier : + pending_barriers_buffer_memory_barriers_) { + if (other_buffer_memory_barrier.buffer != buffer || + (size != VK_WHOLE_SIZE && + offset + size <= other_buffer_memory_barrier.offset) || + (other_buffer_memory_barrier.size != VK_WHOLE_SIZE && + other_buffer_memory_barrier.offset + + other_buffer_memory_barrier.size <= + offset)) { + continue; + } + if (other_buffer_memory_barrier.offset == offset && + other_buffer_memory_barrier.size == size && + other_buffer_memory_barrier.srcAccessMask == src_access_mask && + other_buffer_memory_barrier.dstAccessMask == dst_access_mask && + other_buffer_memory_barrier.srcQueueFamilyIndex == + src_queue_family_index && + other_buffer_memory_barrier.dstQueueFamilyIndex == + dst_queue_family_index) { + // The barrier is already pending. + current_pending_barrier_.src_stage_mask |= src_stage_mask; + current_pending_barrier_.dst_stage_mask |= dst_stage_mask; + return true; + } + SplitPendingBarrier(); + break; + } + + current_pending_barrier_.src_stage_mask |= src_stage_mask; + current_pending_barrier_.dst_stage_mask |= dst_stage_mask; + VkBufferMemoryBarrier& buffer_memory_barrier = + pending_barriers_buffer_memory_barriers_.emplace_back(); + buffer_memory_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + buffer_memory_barrier.pNext = nullptr; + buffer_memory_barrier.srcAccessMask = src_access_mask; + buffer_memory_barrier.dstAccessMask = dst_access_mask; + buffer_memory_barrier.srcQueueFamilyIndex = src_queue_family_index; + buffer_memory_barrier.dstQueueFamilyIndex = dst_queue_family_index; + buffer_memory_barrier.buffer = buffer; + buffer_memory_barrier.offset = offset; + buffer_memory_barrier.size = size; + return true; +} + +bool VulkanCommandProcessor::PushImageMemoryBarrier( + VkImage image, const VkImageSubresourceRange& subresource_range, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + VkImageLayout old_layout, VkImageLayout new_layout, + uint32_t src_queue_family_index, uint32_t dst_queue_family_index, + bool skip_if_equal) { + if (skip_if_equal && src_stage_mask == dst_stage_mask && + src_access_mask == dst_access_mask && old_layout == new_layout && + src_queue_family_index == dst_queue_family_index) { + return false; + } + + // Separate different barriers for overlapping image subresource ranges into + // different pipeline barrier commands. + for (const VkImageMemoryBarrier& other_image_memory_barrier : + pending_barriers_image_memory_barriers_) { + if (other_image_memory_barrier.image != image || + !(other_image_memory_barrier.subresourceRange.aspectMask & + subresource_range.aspectMask) || + (subresource_range.levelCount != VK_REMAINING_MIP_LEVELS && + subresource_range.baseMipLevel + subresource_range.levelCount <= + other_image_memory_barrier.subresourceRange.baseMipLevel) || + (other_image_memory_barrier.subresourceRange.levelCount != + VK_REMAINING_MIP_LEVELS && + other_image_memory_barrier.subresourceRange.baseMipLevel + + other_image_memory_barrier.subresourceRange.levelCount <= + subresource_range.baseMipLevel) || + (subresource_range.layerCount != VK_REMAINING_ARRAY_LAYERS && + subresource_range.baseArrayLayer + subresource_range.layerCount <= + other_image_memory_barrier.subresourceRange.baseArrayLayer) || + (other_image_memory_barrier.subresourceRange.layerCount != + VK_REMAINING_ARRAY_LAYERS && + other_image_memory_barrier.subresourceRange.baseArrayLayer + + other_image_memory_barrier.subresourceRange.layerCount <= + subresource_range.baseArrayLayer)) { + continue; + } + if (other_image_memory_barrier.subresourceRange.aspectMask == + subresource_range.aspectMask && + other_image_memory_barrier.subresourceRange.baseMipLevel == + subresource_range.baseMipLevel && + other_image_memory_barrier.subresourceRange.levelCount == + subresource_range.levelCount && + other_image_memory_barrier.subresourceRange.baseArrayLayer == + subresource_range.baseArrayLayer && + other_image_memory_barrier.subresourceRange.layerCount == + subresource_range.layerCount && + other_image_memory_barrier.srcAccessMask == src_access_mask && + other_image_memory_barrier.dstAccessMask == dst_access_mask && + other_image_memory_barrier.oldLayout == old_layout && + other_image_memory_barrier.newLayout == new_layout && + other_image_memory_barrier.srcQueueFamilyIndex == + src_queue_family_index && + other_image_memory_barrier.dstQueueFamilyIndex == + dst_queue_family_index) { + // The barrier is already pending. + current_pending_barrier_.src_stage_mask |= src_stage_mask; + current_pending_barrier_.dst_stage_mask |= dst_stage_mask; + return true; + } + SplitPendingBarrier(); + break; + } + + current_pending_barrier_.src_stage_mask |= src_stage_mask; + current_pending_barrier_.dst_stage_mask |= dst_stage_mask; + VkImageMemoryBarrier& image_memory_barrier = + pending_barriers_image_memory_barriers_.emplace_back(); + image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_memory_barrier.pNext = nullptr; + image_memory_barrier.srcAccessMask = src_access_mask; + image_memory_barrier.dstAccessMask = dst_access_mask; + image_memory_barrier.oldLayout = old_layout; + image_memory_barrier.newLayout = new_layout; + image_memory_barrier.srcQueueFamilyIndex = src_queue_family_index; + image_memory_barrier.dstQueueFamilyIndex = dst_queue_family_index; + image_memory_barrier.image = image; + image_memory_barrier.subresourceRange = subresource_range; + return true; +} + +bool VulkanCommandProcessor::SubmitBarriers(bool force_end_render_pass) { + assert_true(submission_open_); + SplitPendingBarrier(); + if (pending_barriers_.empty()) { + if (force_end_render_pass) { + EndRenderPass(); + } + return false; + } + EndRenderPass(); + for (auto it = pending_barriers_.cbegin(); it != pending_barriers_.cend(); + ++it) { + auto it_next = std::next(it); + bool is_last = it_next == pending_barriers_.cend(); + // .data() + offset, not &[offset], for buffer and image barriers, because + // if there are no buffer or image memory barriers in the last pipeline + // barriers, the offsets may be equal to the sizes of the vectors. + deferred_command_buffer_.CmdVkPipelineBarrier( + it->src_stage_mask ? it->src_stage_mask + : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + it->dst_stage_mask ? it->dst_stage_mask + : VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + 0, 0, nullptr, + uint32_t((is_last ? pending_barriers_buffer_memory_barriers_.size() + : it_next->buffer_memory_barriers_offset) - + it->buffer_memory_barriers_offset), + pending_barriers_buffer_memory_barriers_.data() + + it->buffer_memory_barriers_offset, + uint32_t((is_last ? pending_barriers_image_memory_barriers_.size() + : it_next->image_memory_barriers_offset) - + it->image_memory_barriers_offset), + pending_barriers_image_memory_barriers_.data() + + it->image_memory_barriers_offset); + } + pending_barriers_.clear(); + pending_barriers_buffer_memory_barriers_.clear(); + pending_barriers_image_memory_barriers_.clear(); + current_pending_barrier_.buffer_memory_barriers_offset = 0; + current_pending_barrier_.image_memory_barriers_offset = 0; + return true; +} + +void VulkanCommandProcessor::SubmitBarriersAndEnterRenderTargetCacheRenderPass( + VkRenderPass render_pass, + const VulkanRenderTargetCache::Framebuffer* framebuffer) { + SubmitBarriers(false); + if (current_render_pass_ == render_pass && + current_framebuffer_ == framebuffer) { + return; + } + if (current_render_pass_ != VK_NULL_HANDLE) { + deferred_command_buffer_.CmdVkEndRenderPass(); + } + current_render_pass_ = render_pass; + current_framebuffer_ = framebuffer; + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = render_pass; + render_pass_begin_info.framebuffer = framebuffer->framebuffer; + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + // TODO(Triang3l): Actual dirty width / height in the deferred command + // buffer. + render_pass_begin_info.renderArea.extent = framebuffer->host_extent; + render_pass_begin_info.clearValueCount = 0; + render_pass_begin_info.pClearValues = nullptr; + deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); +} + +void VulkanCommandProcessor::EndRenderPass() { + assert_true(submission_open_); + if (current_render_pass_ == VK_NULL_HANDLE) { + return; + } + deferred_command_buffer_.CmdVkEndRenderPass(); + current_render_pass_ = VK_NULL_HANDLE; + current_framebuffer_ = nullptr; +} + +VkDescriptorSet VulkanCommandProcessor::AllocateSingleTransientDescriptor( + SingleTransientDescriptorLayout transient_descriptor_layout) { + assert_true(frame_open_); + VkDescriptorSet descriptor_set; + std::vector& transient_descriptors_free = + single_transient_descriptors_free_[size_t(transient_descriptor_layout)]; + if (!transient_descriptors_free.empty()) { + descriptor_set = transient_descriptors_free.back(); + transient_descriptors_free.pop_back(); + } else { + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::SingleTypeDescriptorSetAllocator& + transfer_descriptor_allocator = + transient_descriptor_layout == + SingleTransientDescriptorLayout::kStorageBufferCompute + ? transient_descriptor_allocator_storage_buffer_ + : transient_descriptor_allocator_uniform_buffer_; + descriptor_set = transfer_descriptor_allocator.Allocate( + GetSingleTransientDescriptorLayout(transient_descriptor_layout), 1); + if (descriptor_set == VK_NULL_HANDLE) { + return VK_NULL_HANDLE; + } + } + UsedSingleTransientDescriptor used_descriptor; + used_descriptor.frame = frame_current_; + used_descriptor.layout = transient_descriptor_layout; + used_descriptor.set = descriptor_set; + single_transient_descriptors_used_.emplace_back(used_descriptor); + return descriptor_set; +} + +VkDescriptorSetLayout VulkanCommandProcessor::GetTextureDescriptorSetLayout( + bool is_samplers, bool is_vertex, size_t binding_count) { + if (!binding_count) { + return descriptor_set_layout_empty_; + } + + TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; + texture_descriptor_set_layout_key.is_samplers = uint32_t(is_samplers); + texture_descriptor_set_layout_key.is_vertex = uint32_t(is_vertex); + texture_descriptor_set_layout_key.binding_count = uint32_t(binding_count); + auto it_existing = + descriptor_set_layouts_textures_.find(texture_descriptor_set_layout_key); + if (it_existing != descriptor_set_layouts_textures_.end()) { + return it_existing->second; + } + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - if (!submitted) { - // End the frame even if failed to refresh the guest output. - if (frame_open_) { - EndFrame(); - } - if (!submit_buffers.empty() || current_batch_fence_ != VK_NULL_HANDLE) { - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = uint32_t(submit_buffers.size()); - submit_info.pCommandBuffers = submit_buffers.data(); - VkResult submit_result; - { - ui::vulkan::VulkanProvider::QueueAcquisition queue_acquisition( - provider.AcquireQueue(provider.queue_family_graphics_compute(), 0)); - submit_result = dfn.vkQueueSubmit(queue_acquisition.queue, 1, - &submit_info, current_batch_fence_); - } - CheckResult(submit_result, "vkQueueSubmit"); - } + descriptor_set_layout_bindings_.clear(); + descriptor_set_layout_bindings_.reserve(binding_count); + VkDescriptorType descriptor_type = is_samplers + ? VK_DESCRIPTOR_TYPE_SAMPLER + : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + VkShaderStageFlags stage_flags = + is_vertex ? guest_shader_vertex_stages_ : VK_SHADER_STAGE_FRAGMENT_BIT; + for (size_t i = 0; i < binding_count; ++i) { + VkDescriptorSetLayoutBinding& descriptor_set_layout_binding = + descriptor_set_layout_bindings_.emplace_back(); + descriptor_set_layout_binding.binding = uint32_t(i); + descriptor_set_layout_binding.descriptorType = descriptor_type; + descriptor_set_layout_binding.descriptorCount = 1; + descriptor_set_layout_binding.stageFlags = stage_flags; } - - if (current_batch_fence_ != VK_NULL_HANDLE) { - dfn.vkWaitForFences(device, 1, ¤t_batch_fence_, VK_TRUE, -1); + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = uint32_t(binding_count); + descriptor_set_layout_create_info.pBindings = + descriptor_set_layout_bindings_.data(); + VkDescriptorSetLayout texture_descriptor_set_layout; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &texture_descriptor_set_layout) != VK_SUCCESS) { + return VK_NULL_HANDLE; } - if (cache_clear_requested_) { - cache_clear_requested_ = false; + descriptor_set_layouts_textures_.emplace(texture_descriptor_set_layout_key, + texture_descriptor_set_layout); + return texture_descriptor_set_layout; +} - buffer_cache_->ClearCache(); - pipeline_cache_->ClearCache(); - render_cache_->ClearCache(); - texture_cache_->ClearCache(); - } - - // Scavenging. +const VulkanPipelineCache::PipelineLayoutProvider* +VulkanCommandProcessor::GetPipelineLayout(size_t texture_count_pixel, + size_t sampler_count_pixel, + size_t texture_count_vertex, + size_t sampler_count_vertex) { + PipelineLayoutKey pipeline_layout_key; + pipeline_layout_key.texture_count_pixel = uint16_t(texture_count_pixel); + pipeline_layout_key.sampler_count_pixel = uint16_t(sampler_count_pixel); + pipeline_layout_key.texture_count_vertex = uint16_t(texture_count_vertex); + pipeline_layout_key.sampler_count_vertex = uint16_t(sampler_count_vertex); { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_i( - "gpu", - "xe::gpu::vulkan::VulkanCommandProcessor::PerformSwap Scavenging"); -#endif // FINE_GRAINED_DRAW_SCOPES - // Command buffers must be scavenged first to avoid a race condition. - // We don't want to reuse a batch when the caches haven't yet cleared old - // resources! - command_buffer_pool_->Scavenge(); - - blitter_->Scavenge(); - texture_cache_->Scavenge(); - buffer_cache_->Scavenge(); + auto it = pipeline_layouts_.find(pipeline_layout_key); + if (it != pipeline_layouts_.end()) { + return &it->second; + } } - current_batch_fence_ = nullptr; + VkDescriptorSetLayout descriptor_set_layout_textures_pixel = + GetTextureDescriptorSetLayout(false, false, texture_count_pixel); + if (descriptor_set_layout_textures_pixel == VK_NULL_HANDLE) { + XELOGE( + "Failed to obtain a Vulkan descriptor set layout for {} sampled images " + "for guest pixel shaders", + texture_count_pixel); + return nullptr; + } + VkDescriptorSetLayout descriptor_set_layout_samplers_pixel = + GetTextureDescriptorSetLayout(true, false, sampler_count_pixel); + if (descriptor_set_layout_samplers_pixel == VK_NULL_HANDLE) { + XELOGE( + "Failed to obtain a Vulkan descriptor set layout for {} samplers for " + "guest pixel shaders", + sampler_count_pixel); + return nullptr; + } + VkDescriptorSetLayout descriptor_set_layout_textures_vertex = + GetTextureDescriptorSetLayout(false, true, texture_count_vertex); + if (descriptor_set_layout_textures_vertex == VK_NULL_HANDLE) { + XELOGE( + "Failed to obtain a Vulkan descriptor set layout for {} sampled images " + "for guest vertex shaders", + texture_count_vertex); + return nullptr; + } + VkDescriptorSetLayout descriptor_set_layout_samplers_vertex = + GetTextureDescriptorSetLayout(true, true, sampler_count_vertex); + if (descriptor_set_layout_samplers_vertex == VK_NULL_HANDLE) { + XELOGE( + "Failed to obtain a Vulkan descriptor set layout for {} samplers for " + "guest vertex shaders", + sampler_count_vertex); + return nullptr; + } + + VkDescriptorSetLayout + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetCount]; + // Immutable layouts. + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = + descriptor_set_layout_shared_memory_and_edram_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferGuestShader); + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] = + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferSystemConstants); + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferFragment); + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferGuestVertex); + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] = + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferGuestShader); + // Mutable layouts. + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSamplersVertex] = + descriptor_set_layout_samplers_vertex; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = + descriptor_set_layout_textures_vertex; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSamplersPixel] = + descriptor_set_layout_samplers_pixel; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = + descriptor_set_layout_textures_pixel; + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkPipelineLayoutCreateInfo pipeline_layout_create_info; + pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_create_info.pNext = nullptr; + pipeline_layout_create_info.flags = 0; + pipeline_layout_create_info.setLayoutCount = + uint32_t(xe::countof(descriptor_set_layouts)); + pipeline_layout_create_info.pSetLayouts = descriptor_set_layouts; + pipeline_layout_create_info.pushConstantRangeCount = 0; + pipeline_layout_create_info.pPushConstantRanges = nullptr; + VkPipelineLayout pipeline_layout; + if (dfn.vkCreatePipelineLayout(device, &pipeline_layout_create_info, nullptr, + &pipeline_layout) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan pipeline layout for guest drawing with {} " + "pixel shader and {} vertex shader textures", + texture_count_pixel, texture_count_vertex); + return nullptr; + } + auto emplaced_pair = pipeline_layouts_.emplace( + std::piecewise_construct, std::forward_as_tuple(pipeline_layout_key), + std::forward_as_tuple(pipeline_layout, + descriptor_set_layout_textures_vertex, + descriptor_set_layout_samplers_vertex, + descriptor_set_layout_textures_pixel, + descriptor_set_layout_samplers_pixel)); + // unordered_map insertion doesn't invalidate element references. + return &emplaced_pair.first->second; +} + +VulkanCommandProcessor::ScratchBufferAcquisition +VulkanCommandProcessor::AcquireScratchGpuBuffer( + VkDeviceSize size, VkPipelineStageFlags initial_stage_mask, + VkAccessFlags initial_access_mask) { + assert_true(submission_open_); + assert_false(scratch_buffer_used_); + if (!submission_open_ || scratch_buffer_used_ || !size) { + return ScratchBufferAcquisition(); + } + + uint64_t submission_current = GetCurrentSubmission(); + + if (scratch_buffer_ != VK_NULL_HANDLE && size <= scratch_buffer_size_) { + // Already used previously - transition. + PushBufferMemoryBarrier(scratch_buffer_, 0, VK_WHOLE_SIZE, + scratch_buffer_last_stage_mask_, initial_stage_mask, + scratch_buffer_last_access_mask_, + initial_access_mask); + scratch_buffer_last_stage_mask_ = initial_stage_mask; + scratch_buffer_last_access_mask_ = initial_access_mask; + scratch_buffer_last_usage_submission_ = submission_current; + scratch_buffer_used_ = true; + return ScratchBufferAcquisition(*this, scratch_buffer_, initial_stage_mask, + initial_access_mask); + } + + size = xe::align(size, kScratchBufferSizeIncrement); + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + + VkDeviceMemory new_scratch_buffer_memory; + VkBuffer new_scratch_buffer; + // VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT for + // texture loading. + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, new_scratch_buffer, + new_scratch_buffer_memory)) { + XELOGE( + "VulkanCommandProcessor: Failed to create a {} MB scratch GPU buffer", + size >> 20); + return ScratchBufferAcquisition(); + } + + if (submission_completed_ >= scratch_buffer_last_usage_submission_) { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + if (scratch_buffer_ != VK_NULL_HANDLE) { + dfn.vkDestroyBuffer(device, scratch_buffer_, nullptr); + } + if (scratch_buffer_memory_ != VK_NULL_HANDLE) { + dfn.vkFreeMemory(device, scratch_buffer_memory_, nullptr); + } + } else { + if (scratch_buffer_ != VK_NULL_HANDLE) { + destroy_buffers_.emplace_back(scratch_buffer_last_usage_submission_, + scratch_buffer_); + } + if (scratch_buffer_memory_ != VK_NULL_HANDLE) { + destroy_memory_.emplace_back(scratch_buffer_last_usage_submission_, + scratch_buffer_memory_); + } + } + + scratch_buffer_memory_ = new_scratch_buffer_memory; + scratch_buffer_ = new_scratch_buffer; + scratch_buffer_size_ = size; + // Not used yet, no need for a barrier. + scratch_buffer_last_stage_mask_ = initial_access_mask; + scratch_buffer_last_access_mask_ = initial_stage_mask; + scratch_buffer_last_usage_submission_ = submission_current; + scratch_buffer_used_ = true; + return ScratchBufferAcquisition(*this, new_scratch_buffer, initial_stage_mask, + initial_access_mask); +} + +void VulkanCommandProcessor::BindExternalGraphicsPipeline( + VkPipeline pipeline, bool keep_dynamic_depth_bias, + bool keep_dynamic_blend_constants, bool keep_dynamic_stencil_mask_ref) { + if (!keep_dynamic_depth_bias) { + dynamic_depth_bias_update_needed_ = true; + } + if (!keep_dynamic_blend_constants) { + dynamic_blend_constants_update_needed_ = true; + } + if (!keep_dynamic_stencil_mask_ref) { + dynamic_stencil_compare_mask_front_update_needed_ = true; + dynamic_stencil_compare_mask_back_update_needed_ = true; + dynamic_stencil_write_mask_front_update_needed_ = true; + dynamic_stencil_write_mask_back_update_needed_ = true; + dynamic_stencil_reference_front_update_needed_ = true; + dynamic_stencil_reference_back_update_needed_ = true; + } + if (current_external_graphics_pipeline_ == pipeline) { + return; + } + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); + current_external_graphics_pipeline_ = pipeline; + current_guest_graphics_pipeline_ = VK_NULL_HANDLE; + current_guest_graphics_pipeline_layout_ = VK_NULL_HANDLE; +} + +void VulkanCommandProcessor::BindExternalComputePipeline(VkPipeline pipeline) { + if (current_external_compute_pipeline_ == pipeline) { + return; + } + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline); + current_external_compute_pipeline_ = pipeline; +} + +void VulkanCommandProcessor::SetViewport(const VkViewport& viewport) { + if (!dynamic_viewport_update_needed_) { + dynamic_viewport_update_needed_ |= dynamic_viewport_.x != viewport.x; + dynamic_viewport_update_needed_ |= dynamic_viewport_.y != viewport.y; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.width != viewport.width; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.height != viewport.height; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.minDepth != viewport.minDepth; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.maxDepth != viewport.maxDepth; + } + if (dynamic_viewport_update_needed_) { + dynamic_viewport_ = viewport; + deferred_command_buffer_.CmdVkSetViewport(0, 1, &dynamic_viewport_); + dynamic_viewport_update_needed_ = false; + } +} + +void VulkanCommandProcessor::SetScissor(const VkRect2D& scissor) { + if (!dynamic_scissor_update_needed_) { + dynamic_scissor_update_needed_ |= + dynamic_scissor_.offset.x != scissor.offset.x; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.offset.y != scissor.offset.y; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.extent.width != scissor.extent.width; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.extent.height != scissor.extent.height; + } + if (dynamic_scissor_update_needed_) { + dynamic_scissor_ = scissor; + deferred_command_buffer_.CmdVkSetScissor(0, 1, &dynamic_scissor_); + dynamic_scissor_update_needed_ = false; + } } Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { - return pipeline_cache_->LoadShader(shader_type, guest_address, host_address, - dword_count); + return pipeline_cache_->LoadShader(shader_type, host_address, dword_count); } -bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, +bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, uint32_t index_count, IndexBufferInfo* index_buffer_info, bool major_mode_explicit) { - auto& regs = *register_file_; - -#if FINE_GRAINED_DRAW_SCOPES +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES - auto enable_mode = - static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); - if (enable_mode == ModeControl::kIgnore) { - // Ignored. - return true; - } else if (enable_mode == ModeControl::kCopy) { + const RegisterFile& regs = *register_file_; + + xenos::ModeControl edram_mode = regs.Get().edram_mode; + if (edram_mode == xenos::ModeControl::kCopy) { // Special copy handling. return IssueCopy(); } - if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) { - // Doesn't actually draw. - return true; - } - - // Shaders will have already been defined by previous loads. - // We need them to do just about anything so validate here. + // Vertex shader analysis. auto vertex_shader = static_cast(active_vertex_shader()); - auto pixel_shader = static_cast(active_pixel_shader()); if (!vertex_shader) { // Always need a vertex shader. return false; } - // Depth-only mode doesn't need a pixel shader (we'll use a fake one). - if (enable_mode == ModeControl::kDepth) { - // Use a dummy pixel shader when required. - pixel_shader = nullptr; - } else if (!pixel_shader) { - // Need a pixel shader in normal color mode. - return true; - } + pipeline_cache_->AnalyzeShaderUcode(*vertex_shader); + bool memexport_used_vertex = vertex_shader->is_valid_memexport_used(); - bool full_update = false; - if (!frame_open_) { - BeginFrame(); - full_update = true; - } - auto command_buffer = current_command_buffer_; - auto setup_buffer = current_setup_buffer_; - - // Begin the render pass. - // This will setup our framebuffer and begin the pass in the command buffer. - // This reuses a previous render pass if one is already open. - if (render_cache_->dirty() || !current_render_state_) { - if (current_render_state_) { - render_cache_->EndRenderPass(); - current_render_state_ = nullptr; + // Pixel shader analysis. + bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); + bool is_rasterization_done = + draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal); + VulkanShader* pixel_shader = nullptr; + if (is_rasterization_done) { + // See xenos::ModeControl for explanation why the pixel shader is only used + // when it's kColorDepth here. + if (edram_mode == xenos::ModeControl::kColorDepth) { + pixel_shader = static_cast(active_pixel_shader()); + if (pixel_shader) { + pipeline_cache_->AnalyzeShaderUcode(*pixel_shader); + if (!draw_util::IsPixelShaderNeededWithRasterization(*pixel_shader, + regs)) { + pixel_shader = nullptr; + } + } } + } else { + // Disabling pixel shader for this case is also required by the pipeline + // cache. + if (!memexport_used_vertex) { + // This draw has no effect. + return true; + } + } + // TODO(Triang3l): Memory export. - full_update = true; - current_render_state_ = render_cache_->BeginRenderPass( - command_buffer, vertex_shader, pixel_shader); - if (!current_render_state_) { + reg::RB_DEPTHCONTROL normalized_depth_control = + draw_util::GetNormalizedDepthControl(regs); + uint32_t normalized_color_mask = + pixel_shader ? draw_util::GetNormalizedColorMask( + regs, pixel_shader->writes_color_targets()) + : 0; + + PrimitiveProcessor::ProcessingResult primitive_processing_result; + SpirvShaderTranslator::Modification vertex_shader_modification; + SpirvShaderTranslator::Modification pixel_shader_modification; + VulkanShader::VulkanTranslation* vertex_shader_translation; + VulkanShader::VulkanTranslation* pixel_shader_translation; + + // Two iterations because a submission (even the current one - in which case + // it needs to be ended, and a new one must be started) may need to be awaited + // in case of a sampler count overflow, and if that happens, all subsystem + // updates done previously must be performed again because the updates done + // before the awaiting may be referencing objects destroyed by + // CompletedSubmissionUpdated. + for (uint32_t i = 0; i < 2; ++i) { + if (!BeginSubmission(true)) { return false; } + + // Process primitives. + if (!primitive_processor_->Process(primitive_processing_result)) { + return false; + } + if (!primitive_processing_result.host_draw_vertex_count) { + // Nothing to draw. + return true; + } + // TODO(Triang3l): Tessellation, geometry-type-specific vertex shader, + // vertex shader as compute. + if (primitive_processing_result.host_vertex_shader_type != + Shader::HostVertexShaderType::kVertex) { + return false; + } + + // Shader modifications. + vertex_shader_modification = + pipeline_cache_->GetCurrentVertexShaderModification( + *vertex_shader, + primitive_processing_result.host_vertex_shader_type); + pixel_shader_modification = + pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification( + *pixel_shader, normalized_color_mask) + : SpirvShaderTranslator::Modification(0); + + // Translate the shaders now to obtain the sampler bindings. + vertex_shader_translation = static_cast( + vertex_shader->GetOrCreateTranslation( + vertex_shader_modification.value)); + pixel_shader_translation = + pixel_shader ? static_cast( + pixel_shader->GetOrCreateTranslation( + pixel_shader_modification.value)) + : nullptr; + if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader_translation, + pixel_shader_translation)) { + return false; + } + + // Obtain the samplers. Note that the bindings don't depend on the shader + // modification, so if on the second iteration of this loop it becomes + // different for some reason (like a race condition with the guest in index + // buffer processing in the primitive processor resulting in different host + // vertex shader types), the bindings will stay the same. + // TODO(Triang3l): Sampler caching and reuse for adjacent draws within one + // submission. + uint32_t samplers_overflowed_count = 0; + for (uint32_t j = 0; j < 2; ++j) { + std::vector>& + shader_samplers = + j ? current_samplers_pixel_ : current_samplers_vertex_; + if (!i) { + shader_samplers.clear(); + } + const VulkanShader* shader = j ? pixel_shader : vertex_shader; + if (!shader) { + continue; + } + const std::vector& shader_sampler_bindings = + shader->GetSamplerBindingsAfterTranslation(); + if (!i) { + shader_samplers.reserve(shader_sampler_bindings.size()); + for (const VulkanShader::SamplerBinding& shader_sampler_binding : + shader_sampler_bindings) { + shader_samplers.emplace_back( + texture_cache_->GetSamplerParameters(shader_sampler_binding), + VK_NULL_HANDLE); + } + } + for (std::pair& + shader_sampler_pair : shader_samplers) { + // UseSampler calls are needed even on the second iteration in case the + // submission was broken (and thus the last usage submission indices for + // the used samplers need to be updated) due to an overflow within one + // submission. Though sampler overflow is a very rare situation overall. + bool sampler_overflowed; + VkSampler shader_sampler = texture_cache_->UseSampler( + shader_sampler_pair.first, sampler_overflowed); + shader_sampler_pair.second = shader_sampler; + if (shader_sampler == VK_NULL_HANDLE) { + if (!sampler_overflowed || i) { + // If !sampler_overflowed, just failed to create a sampler for some + // reason. + // If i == 1, an overflow has happened twice, can't recover from it + // anymore (would enter an infinite loop otherwise if the number of + // attempts was not limited to 2). Possibly too many unique samplers + // in one draw, or failed to await submission completion. + return false; + } + ++samplers_overflowed_count; + } + } + } + if (!samplers_overflowed_count) { + break; + } + assert_zero(i); + // Free space for as many samplers as how many haven't been allocated + // successfully - obtain the submission index that needs to be awaited to + // reuse `samplers_overflowed_count` slots. This must be done after all the + // UseSampler calls, not inside the loop calling UseSampler, because earlier + // UseSampler calls may "mark for deletion" some samplers that later + // UseSampler calls in the loop may actually demand. + uint64_t sampler_overflow_await_submission = + texture_cache_->GetSubmissionToAwaitOnSamplerOverflow( + samplers_overflowed_count); + assert_true(sampler_overflow_await_submission <= GetCurrentSubmission()); + CheckSubmissionFenceAndDeviceLoss(sampler_overflow_await_submission); + } + + // Set up the render targets - this may perform dispatches and draws. + if (!render_target_cache_->Update(is_rasterization_done, + normalized_depth_control, + normalized_color_mask, *vertex_shader)) { + return false; + } + + // Create the pipeline (for this, need the render pass from the render target + // cache), translating the shaders - doing this now to obtain the used + // textures. + VkPipeline pipeline; + const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; + if (!pipeline_cache_->ConfigurePipeline( + vertex_shader_translation, pixel_shader_translation, + primitive_processing_result, normalized_depth_control, + normalized_color_mask, + render_target_cache_->last_update_render_pass_key(), pipeline, + pipeline_layout_provider)) { + return false; + } + + // Update the textures before most other work in the submission because + // samplers depend on this (and in case of sampler overflow in a submission, + // submissions must be split) - may perform dispatches and copying. + uint32_t used_texture_mask = + vertex_shader->GetUsedTextureMaskAfterTranslation() | + (pixel_shader != nullptr + ? pixel_shader->GetUsedTextureMaskAfterTranslation() + : 0); + texture_cache_->RequestTextures(used_texture_mask); + + // Update the graphics pipeline, and if the new graphics pipeline has a + // different layout, invalidate incompatible descriptor sets before updating + // current_guest_graphics_pipeline_layout_. + if (current_guest_graphics_pipeline_ != pipeline) { + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); + current_guest_graphics_pipeline_ = pipeline; + current_external_graphics_pipeline_ = VK_NULL_HANDLE; + } + auto pipeline_layout = + static_cast(pipeline_layout_provider); + if (current_guest_graphics_pipeline_layout_ != pipeline_layout) { + if (current_guest_graphics_pipeline_layout_) { + // Keep descriptor set layouts for which the new pipeline layout is + // compatible with the previous one (pipeline layouts are compatible for + // set N if set layouts 0 through N are compatible). + uint32_t descriptor_sets_kept = + uint32_t(SpirvShaderTranslator::kDescriptorSetCount); + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_samplers_vertex_ref() != + pipeline_layout->descriptor_set_layout_samplers_vertex_ref()) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetSamplersVertex)); + } + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref() != + pipeline_layout->descriptor_set_layout_textures_vertex_ref()) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex)); + } + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_samplers_pixel_ref() != + pipeline_layout->descriptor_set_layout_samplers_pixel_ref()) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetSamplersPixel)); + } + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref() != + pipeline_layout->descriptor_set_layout_textures_pixel_ref()) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesPixel)); + } + } else { + // No or unknown pipeline layout previously bound - all bindings are in an + // indeterminate state. + current_graphics_descriptor_sets_bound_up_to_date_ = 0; + } + current_guest_graphics_pipeline_layout_ = pipeline_layout; } const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; - // Configure the pipeline for drawing. - // This encodes all render state (blend, depth, etc), our shader stages, - // and our vertex input layout. - VkPipeline pipeline = nullptr; - auto pipeline_status = pipeline_cache_->ConfigurePipeline( - command_buffer, current_render_state_, vertex_shader, pixel_shader, - primitive_type, &pipeline); - if (pipeline_status == VulkanPipelineCache::UpdateStatus::kError) { - return false; - } else if (pipeline_status == VulkanPipelineCache::UpdateStatus::kMismatch || - full_update) { - dfn.vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); - } - pipeline_cache_->SetDynamicState(command_buffer, full_update); + bool host_render_targets_used = render_target_cache_->GetPath() == + RenderTargetCache::Path::kHostRenderTargets; - // Pass registers to the shaders. - if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) { + // Get dynamic rasterizer state. + draw_util::ViewportInfo viewport_info; + // Just handling maxViewportDimensions is enough - viewportBoundsRange[1] must + // be at least 2 * max(maxViewportDimensions[0...1]) - 1, and + // maxViewportDimensions must be greater than or equal to the size of the + // largest possible framebuffer attachment (if the viewport has positive + // offset and is between maxViewportDimensions and viewportBoundsRange[1], + // GetHostViewportInfo will adjust ndc_scale/ndc_offset to clamp it, and the + // clamped range will be outside the largest possible framebuffer anyway. + // FIXME(Triang3l): Possibly handle maxViewportDimensions and + // viewportBoundsRange separately because when using fragment shader + // interlocks, framebuffers are not used, while the range may be wider than + // dimensions? Though viewport bigger than 4096 - the smallest possible + // maximum dimension (which is below the 8192 texture size limit on the Xbox + // 360) - and with offset, is probably a situation that never happens in real + // life. Or even disregard the viewport bounds range in the fragment shader + // interlocks case completely - apply the viewport and the scissor offset + // directly to pixel address and to things like ps_param_gen. + draw_util::GetHostViewportInfo( + regs, 1, 1, false, device_limits.maxViewportDimensions[0], + device_limits.maxViewportDimensions[1], true, normalized_depth_control, + false, host_render_targets_used, + pixel_shader && pixel_shader->writes_depth(), viewport_info); + + // Update dynamic graphics pipeline state. + UpdateDynamicState(viewport_info, primitive_polygonal, + normalized_depth_control); + + // Update system constants before uploading them. + UpdateSystemConstantValues(primitive_polygonal, + primitive_processing_result.host_index_endian, + viewport_info, used_texture_mask); + + // Update uniform buffers and descriptor sets after binding the pipeline with + // the new layout. + if (!UpdateBindings(vertex_shader, pixel_shader)) { return false; } - // Upload and bind index buffer data (if we have any). - if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) { - return false; + // Ensure vertex buffers are resident. + // TODO(Triang3l): Cache residency for ranges in a way similar to how texture + // validity is tracked. + uint64_t vertex_buffers_resident[2] = {}; + for (const Shader::VertexBinding& vertex_binding : + vertex_shader->vertex_bindings()) { + uint32_t vfetch_index = vertex_binding.fetch_constant; + if (vertex_buffers_resident[vfetch_index >> 6] & + (uint64_t(1) << (vfetch_index & 63))) { + continue; + } + const auto& vfetch_constant = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + switch (vfetch_constant.type) { + case xenos::FetchConstantType::kVertex: + break; + case xenos::FetchConstantType::kInvalidVertex: + if (cvars::gpu_allow_invalid_fetch_constants) { + break; + } + XELOGW( + "Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! " + "This " + "is incorrect behavior, but you can try bypassing this by " + "launching Xenia with --gpu_allow_invalid_fetch_constants=true.", + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); + return false; + default: + XELOGW( + "Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!", + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); + return false; + } + if (!shared_memory_->RequestRange(vfetch_constant.address << 2, + vfetch_constant.size << 2)) { + XELOGE( + "Failed to request vertex buffer at 0x{:08X} (size {}) in the shared " + "memory", + vfetch_constant.address << 2, vfetch_constant.size << 2); + return false; + } + vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1) + << (vfetch_index & 63); } - // Upload and bind all vertex buffer data. - if (!PopulateVertexBuffers(command_buffer, setup_buffer, vertex_shader)) { - return false; - } + // Insert the shared memory barrier if needed. + // TODO(Triang3l): Memory export. + shared_memory_->Use(VulkanSharedMemory::Usage::kRead); - // Bind samplers/textures. - // Uploads all textures that need it. - // Setup buffer may be flushed to GPU if the texture cache needs it. - if (!PopulateSamplers(command_buffer, setup_buffer, vertex_shader, - pixel_shader)) { - return false; - } + // After all commands that may dispatch, copy or insert barriers, submit the + // barriers (may end the render pass), and (re)enter the render pass before + // drawing. + SubmitBarriersAndEnterRenderTargetCacheRenderPass( + render_target_cache_->last_update_render_pass(), + render_target_cache_->last_update_framebuffer()); - // Actually issue the draw. - if (!index_buffer_info) { - // Auto-indexed draw. - uint32_t instance_count = 1; - uint32_t first_vertex = - register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32; - uint32_t first_instance = 0; - dfn.vkCmdDraw(command_buffer, index_count, instance_count, first_vertex, - first_instance); + // Draw. + if (primitive_processing_result.index_buffer_type == + PrimitiveProcessor::ProcessedIndexBufferType::kNone) { + deferred_command_buffer_.CmdVkDraw( + primitive_processing_result.host_draw_vertex_count, 1, 0, 0); } else { - // Index buffer draw. - uint32_t instance_count = 1; - uint32_t first_index = 0; - uint32_t vertex_offset = - register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32; - uint32_t first_instance = 0; - dfn.vkCmdDrawIndexed(command_buffer, index_count, instance_count, - first_index, vertex_offset, first_instance); + std::pair index_buffer; + switch (primitive_processing_result.index_buffer_type) { + case PrimitiveProcessor::ProcessedIndexBufferType::kGuest: + index_buffer.first = shared_memory_->buffer(); + index_buffer.second = primitive_processing_result.guest_index_base; + break; + case PrimitiveProcessor::ProcessedIndexBufferType::kHostConverted: + index_buffer = primitive_processor_->GetConvertedIndexBuffer( + primitive_processing_result.host_index_buffer_handle); + break; + case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltin: + index_buffer = primitive_processor_->GetBuiltinIndexBuffer( + primitive_processing_result.host_index_buffer_handle); + break; + default: + assert_unhandled_case(primitive_processing_result.index_buffer_type); + return false; + } + deferred_command_buffer_.CmdVkBindIndexBuffer( + index_buffer.first, index_buffer.second, + primitive_processing_result.host_index_format == + xenos::IndexFormat::kInt16 + ? VK_INDEX_TYPE_UINT16 + : VK_INDEX_TYPE_UINT32); + deferred_command_buffer_.CmdVkDrawIndexed( + primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0); } return true; } -bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - xe::gpu::Shader::ConstantRegisterMap dummy_map; - std::memset(&dummy_map, 0, sizeof(dummy_map)); - - // Upload the constants the shaders require. - // These are optional, and if none are defined 0 will be returned. - auto constant_offsets = buffer_cache_->UploadConstantRegisters( - current_setup_buffer_, vertex_shader->constant_register_map(), - pixel_shader ? pixel_shader->constant_register_map() : dummy_map, - current_batch_fence_); - if (constant_offsets.first == VK_WHOLE_SIZE || - constant_offsets.second == VK_WHOLE_SIZE) { - // Shader wants constants but we couldn't upload them. - return false; - } - - // Configure constant uniform access to point at our offsets. - auto constant_descriptor_set = buffer_cache_->constant_descriptor_set(); - auto pipeline_layout = pipeline_cache_->pipeline_layout(); - uint32_t set_constant_offsets[2] = { - static_cast(constant_offsets.first), - static_cast(constant_offsets.second)}; - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - dfn.vkCmdBindDescriptorSets( - command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, - &constant_descriptor_set, - static_cast(xe::countof(set_constant_offsets)), - set_constant_offsets); - - return true; -} - -bool VulkanCommandProcessor::PopulateIndexBuffer( - VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info) { - auto& regs = *register_file_; - if (!index_buffer_info || !index_buffer_info->guest_base) { - // No index buffer or auto draw. - return true; - } - auto& info = *index_buffer_info; - -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - // Min/max index ranges for clamping. This is often [0g,FFFF|FFFFFF]. - // All indices should be clamped to [min,max]. May be a way to do this in GL. - uint32_t min_index = regs[XE_GPU_REG_VGT_MIN_VTX_INDX].u32; - uint32_t max_index = regs[XE_GPU_REG_VGT_MAX_VTX_INDX].u32; - assert_true(min_index == 0); - assert_true(max_index == 0xFFFF || max_index == 0xFFFFFF); - - assert_true(info.endianness == xenos::Endian::k8in16 || - info.endianness == xenos::Endian::k8in32); - - trace_writer_.WriteMemoryRead(info.guest_base, info.length); - - // Upload (or get a cached copy of) the buffer. - uint32_t source_addr = info.guest_base; - uint32_t source_length = - info.count * (info.format == xenos::IndexFormat::kInt32 - ? sizeof(uint32_t) - : sizeof(uint16_t)); - auto buffer_ref = buffer_cache_->UploadIndexBuffer( - current_setup_buffer_, source_addr, source_length, info.format, - current_batch_fence_); - if (buffer_ref.second == VK_WHOLE_SIZE) { - // Failed to upload buffer. - return false; - } - - // Bind the buffer. - VkIndexType index_type = info.format == xenos::IndexFormat::kInt32 - ? VK_INDEX_TYPE_UINT32 - : VK_INDEX_TYPE_UINT16; - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - dfn.vkCmdBindIndexBuffer(command_buffer, buffer_ref.first, buffer_ref.second, - index_type); - - return true; -} - -bool VulkanCommandProcessor::PopulateVertexBuffers( - VkCommandBuffer command_buffer, VkCommandBuffer setup_buffer, - VulkanShader* vertex_shader) { - auto& regs = *register_file_; - -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - auto& vertex_bindings = vertex_shader->vertex_bindings(); - if (vertex_bindings.empty()) { - // No bindings. - return true; - } - - assert_true(vertex_bindings.size() <= 32); - auto descriptor_set = buffer_cache_->PrepareVertexSet( - setup_buffer, current_batch_fence_, vertex_bindings); - if (!descriptor_set) { - XELOGW("Failed to prepare vertex set!"); - return false; - } - - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - dfn.vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_cache_->pipeline_layout(), 2, 1, - &descriptor_set, 0, nullptr); - return true; -} - -bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, - VkCommandBuffer setup_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - std::vector dummy_bindings; - auto descriptor_set = texture_cache_->PrepareTextureSet( - setup_buffer, current_batch_fence_, vertex_shader->texture_bindings(), - pixel_shader ? pixel_shader->texture_bindings() : dummy_bindings); - if (!descriptor_set) { - // Unable to bind set. - XELOGW("Failed to prepare texture set!"); - return false; - } - - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - dfn.vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_cache_->pipeline_layout(), 1, 1, - &descriptor_set, 0, nullptr); - - return true; -} - bool VulkanCommandProcessor::IssueCopy() { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); - auto& regs = *register_file_; +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES - // This is used to resolve surfaces, taking them from EDRAM render targets - // to system memory. It can optionally clear color/depth surfaces, too. - // The command buffer has stuff for actually doing this by drawing, however - // we should be able to do it without that much easier. - - struct { - reg::RB_COPY_CONTROL copy_control; - uint32_t copy_dest_base; - reg::RB_COPY_DEST_PITCH copy_dest_pitch; - reg::RB_COPY_DEST_INFO copy_dest_info; - uint32_t tile_clear; - uint32_t depth_clear; - uint32_t color_clear; - uint32_t color_clear_low; - uint32_t copy_func; - uint32_t copy_ref; - uint32_t copy_mask; - uint32_t copy_surface_slice; - }* copy_regs = reinterpret_cast( - ®s[XE_GPU_REG_RB_COPY_CONTROL].u32); - - struct { - reg::PA_SC_WINDOW_OFFSET window_offset; - reg::PA_SC_WINDOW_SCISSOR_TL window_scissor_tl; - reg::PA_SC_WINDOW_SCISSOR_BR window_scissor_br; - }* window_regs = reinterpret_cast( - ®s[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32); - - // True if the source tile is a color target - bool is_color_source = copy_regs->copy_control.copy_src_select <= 3; - - // Render targets 0-3, 4 = depth - uint32_t copy_src_select = copy_regs->copy_control.copy_src_select; - bool color_clear_enabled = copy_regs->copy_control.color_clear_enable != 0; - bool depth_clear_enabled = copy_regs->copy_control.depth_clear_enable != 0; - CopyCommand copy_command = copy_regs->copy_control.copy_command; - - assert_true(copy_regs->copy_dest_info.copy_dest_array == 0); - assert_true(copy_regs->copy_dest_info.copy_dest_slice == 0); - auto copy_dest_format = - ColorFormatToTextureFormat(copy_regs->copy_dest_info.copy_dest_format); - // TODO: copy dest number / bias - - uint32_t copy_dest_base = copy_regs->copy_dest_base; - uint32_t copy_dest_pitch = copy_regs->copy_dest_pitch.copy_dest_pitch; - uint32_t copy_dest_height = copy_regs->copy_dest_pitch.copy_dest_height; - - // None of this is supported yet: - assert_true(copy_regs->copy_surface_slice == 0); - assert_true(copy_regs->copy_func == 0); - assert_true(copy_regs->copy_ref == 0); - assert_true(copy_regs->copy_mask == 0); - - // RB_SURFACE_INFO - // https://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html - uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t surface_pitch = surface_info & 0x3FFF; - auto surface_msaa = - static_cast((surface_info >> 16) & 0x3); - - // TODO(benvanik): any way to scissor this? a200 has: - // REG_A2XX_RB_COPY_DEST_OFFSET = A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) | - // A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff); - // but I can't seem to find something similar. - uint32_t dest_logical_width = copy_dest_pitch; - uint32_t dest_logical_height = copy_dest_height; - - // vtx_window_offset_enable - assert_true(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x00010000); - uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - int32_t window_offset_x = window_regs->window_offset.window_x_offset; - int32_t window_offset_y = window_regs->window_offset.window_y_offset; - - uint32_t dest_texel_size = uint32_t(GetTexelSize(copy_dest_format)); - - // Adjust the copy base offset to point to the beginning of the texture, so - // we don't run into hiccups down the road (e.g. resolving the last part going - // backwards). - int32_t dest_offset = - window_offset_y * copy_dest_pitch * int(dest_texel_size); - dest_offset += window_offset_x * 32 * int(dest_texel_size); - copy_dest_base += dest_offset; - - // HACK: vertices to use are always in vf0. - int copy_vertex_fetch_slot = 0; - int r = - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (copy_vertex_fetch_slot / 3) * 6; - const auto group = reinterpret_cast(®s.values[r]); - const xe_gpu_vertex_fetch_t* fetch = nullptr; - switch (copy_vertex_fetch_slot % 3) { - case 0: - fetch = &group->vertex_fetch_0; - break; - case 1: - fetch = &group->vertex_fetch_1; - break; - case 2: - fetch = &group->vertex_fetch_2; - break; - } - assert_true(fetch->type == xenos::FetchConstantType::kVertex); - assert_true(fetch->endian == xenos::Endian::k8in32); - assert_true(fetch->size == 6); - const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2); - trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4); - - // Most vertices have a negative half pixel offset applied, which we reverse. - auto& vtx_cntl = *(reg::PA_SU_VTX_CNTL*)®s[XE_GPU_REG_PA_SU_VTX_CNTL].u32; - float vtx_offset = vtx_cntl.pix_center == 0 ? 0.5f : 0.f; - - float dest_points[6]; - for (int i = 0; i < 6; i++) { - dest_points[i] = - GpuSwap(xe::load(vertex_addr + i * 4), fetch->endian) + - vtx_offset; + if (!BeginSubmission(true)) { + return false; } - // Note: The xenos only supports rectangle copies (luckily) - int32_t dest_min_x = int32_t( - (std::min(std::min(dest_points[0], dest_points[2]), dest_points[4]))); - int32_t dest_max_x = int32_t( - (std::max(std::max(dest_points[0], dest_points[2]), dest_points[4]))); - - int32_t dest_min_y = int32_t( - (std::min(std::min(dest_points[1], dest_points[3]), dest_points[5]))); - int32_t dest_max_y = int32_t( - (std::max(std::max(dest_points[1], dest_points[3]), dest_points[5]))); - - VkOffset2D resolve_offset = {dest_min_x, dest_min_y}; - VkExtent2D resolve_extent = {uint32_t(dest_max_x - dest_min_x), - uint32_t(dest_max_y - dest_min_y)}; - - uint32_t color_edram_base = 0; - uint32_t depth_edram_base = 0; - xenos::ColorRenderTargetFormat color_format; - xenos::DepthRenderTargetFormat depth_format; - if (is_color_source) { - // Source from a color target. - reg::RB_COLOR_INFO color_info[4] = { - regs.Get(), - regs.Get(XE_GPU_REG_RB_COLOR1_INFO), - regs.Get(XE_GPU_REG_RB_COLOR2_INFO), - regs.Get(XE_GPU_REG_RB_COLOR3_INFO), - }; - color_edram_base = color_info[copy_src_select].color_base; - color_format = color_info[copy_src_select].color_format; - assert_true(color_info[copy_src_select].color_exp_bias == 0); + uint32_t written_address, written_length; + if (!render_target_cache_->Resolve(*memory_, *shared_memory_, *texture_cache_, + written_address, written_length)) { + return false; } - if (!is_color_source || depth_clear_enabled) { - // Source from or clear a depth target. - reg::RB_DEPTH_INFO depth_info = {regs[XE_GPU_REG_RB_DEPTH_INFO].u32}; - depth_edram_base = depth_info.depth_base; - depth_format = depth_info.depth_format; - if (!is_color_source) { - copy_dest_format = DepthRenderTargetToTextureFormat(depth_format); + // TODO(Triang3l): CPU readback. + + return true; +} + +void VulkanCommandProcessor::InitializeTrace() { + CommandProcessor::InitializeTrace(); + + if (!BeginSubmission(true)) { + return; + } + // TODO(Triang3l): Write the EDRAM. + bool shared_memory_submitted = + shared_memory_->InitializeTraceSubmitDownloads(); + if (!shared_memory_submitted) { + return; + } + AwaitAllQueueOperationsCompletion(); + if (shared_memory_submitted) { + shared_memory_->InitializeTraceCompleteDownloads(); + } +} + +void VulkanCommandProcessor::CheckSubmissionFenceAndDeviceLoss( + uint64_t await_submission) { + // Only report once, no need to retry a wait that won't succeed anyway. + if (device_lost_) { + return; + } + + if (await_submission >= GetCurrentSubmission()) { + if (submission_open_) { + EndSubmission(false); + } + // A submission won't be ended if it hasn't been started, or if ending + // has failed - clamp the index. + await_submission = GetCurrentSubmission() - 1; + } + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + size_t fences_total = submissions_in_flight_fences_.size(); + size_t fences_awaited = 0; + if (await_submission > submission_completed_) { + // Await in a blocking way if requested. + // TODO(Triang3l): Await only one fence. "Fence signal operations that are + // defined by vkQueueSubmit additionally include in the first + // synchronization scope all commands that occur earlier in submission + // order." + VkResult wait_result = dfn.vkWaitForFences( + device, uint32_t(await_submission - submission_completed_), + submissions_in_flight_fences_.data(), VK_TRUE, UINT64_MAX); + if (wait_result == VK_SUCCESS) { + fences_awaited += await_submission - submission_completed_; + } else { + XELOGE("Failed to await submission completion Vulkan fences"); + if (wait_result == VK_ERROR_DEVICE_LOST) { + device_lost_ = true; + } + } + } + // Check how far into the submissions the GPU currently is, in order because + // submission themselves can be executed out of order, but Xenia serializes + // that for simplicity. + while (fences_awaited < fences_total) { + VkResult fence_status = dfn.vkWaitForFences( + device, 1, &submissions_in_flight_fences_[fences_awaited], VK_TRUE, 0); + if (fence_status != VK_SUCCESS) { + if (fence_status == VK_ERROR_DEVICE_LOST) { + device_lost_ = true; + } + break; + } + ++fences_awaited; + } + if (device_lost_) { + graphics_system_->OnHostGpuLossFromAnyThread(true); + return; + } + if (!fences_awaited) { + // Not updated - no need to reclaim or download things. + return; + } + // Reclaim fences. + fences_free_.reserve(fences_free_.size() + fences_awaited); + auto submissions_in_flight_fences_awaited_end = + submissions_in_flight_fences_.cbegin(); + std::advance(submissions_in_flight_fences_awaited_end, fences_awaited); + fences_free_.insert(fences_free_.cend(), + submissions_in_flight_fences_.cbegin(), + submissions_in_flight_fences_awaited_end); + submissions_in_flight_fences_.erase(submissions_in_flight_fences_.cbegin(), + submissions_in_flight_fences_awaited_end); + submission_completed_ += fences_awaited; + + // Reclaim semaphores. + while (!submissions_in_flight_semaphores_.empty()) { + const auto& semaphore_submission = + submissions_in_flight_semaphores_.front(); + if (semaphore_submission.first > submission_completed_) { + break; + } + semaphores_free_.push_back(semaphore_submission.second); + submissions_in_flight_semaphores_.pop_front(); + } + + // Reclaim command pools. + while (!command_buffers_submitted_.empty()) { + const auto& command_buffer_pair = command_buffers_submitted_.front(); + if (command_buffer_pair.first > submission_completed_) { + break; + } + command_buffers_writable_.push_back(command_buffer_pair.second); + command_buffers_submitted_.pop_front(); + } + + shared_memory_->CompletedSubmissionUpdated(); + + primitive_processor_->CompletedSubmissionUpdated(); + + render_target_cache_->CompletedSubmissionUpdated(); + + texture_cache_->CompletedSubmissionUpdated(submission_completed_); + + // Destroy objects scheduled for destruction. + while (!destroy_framebuffers_.empty()) { + const auto& destroy_pair = destroy_framebuffers_.front(); + if (destroy_pair.first > submission_completed_) { + break; + } + dfn.vkDestroyFramebuffer(device, destroy_pair.second, nullptr); + destroy_framebuffers_.pop_front(); + } + while (!destroy_buffers_.empty()) { + const auto& destroy_pair = destroy_buffers_.front(); + if (destroy_pair.first > submission_completed_) { + break; + } + dfn.vkDestroyBuffer(device, destroy_pair.second, nullptr); + destroy_buffers_.pop_front(); + } + while (!destroy_memory_.empty()) { + const auto& destroy_pair = destroy_memory_.front(); + if (destroy_pair.first > submission_completed_) { + break; + } + dfn.vkFreeMemory(device, destroy_pair.second, nullptr); + destroy_memory_.pop_front(); + } +} + +bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + if (device_lost_) { + return false; + } + + bool is_opening_frame = is_guest_command && !frame_open_; + if (submission_open_ && !is_opening_frame) { + return true; + } + + // Check the fence - needed for all kinds of submissions (to reclaim transient + // resources early) and specifically for frames (not to queue too many), and + // await the availability of the current frame. Also check whether the device + // is still available, and whether the await was successful. + uint64_t await_submission = + is_opening_frame + ? closed_frame_submissions_[frame_current_ % kMaxFramesInFlight] + : 0; + CheckSubmissionFenceAndDeviceLoss(await_submission); + if (device_lost_ || submission_completed_ < await_submission) { + return false; + } + + if (is_opening_frame) { + // Update the completed frame index, also obtaining the actual completed + // frame number (since the CPU may be actually less than 3 frames behind) + // before reclaiming resources tracked with the frame number. + frame_completed_ = std::max(frame_current_, uint64_t(kMaxFramesInFlight)) - + kMaxFramesInFlight; + for (uint64_t frame = frame_completed_ + 1; frame < frame_current_; + ++frame) { + if (closed_frame_submissions_[frame % kMaxFramesInFlight] > + submission_completed_) { + break; + } + frame_completed_ = frame; } } - xenos::Endian resolve_endian = xenos::Endian::k8in32; - if (copy_regs->copy_dest_info.copy_dest_endian <= xenos::Endian128::k16in32) { - resolve_endian = - static_cast(copy_regs->copy_dest_info.copy_dest_endian); + if (!submission_open_) { + submission_open_ = true; + + // Start a new deferred command buffer - will submit it to the real one in + // the end of the submission (when async pipeline object creation requests + // are fulfilled). + deferred_command_buffer_.Reset(); + + // Reset cached state of the command buffer. + dynamic_viewport_update_needed_ = true; + dynamic_scissor_update_needed_ = true; + dynamic_depth_bias_update_needed_ = true; + dynamic_blend_constants_update_needed_ = true; + dynamic_stencil_compare_mask_front_update_needed_ = true; + dynamic_stencil_compare_mask_back_update_needed_ = true; + dynamic_stencil_write_mask_front_update_needed_ = true; + dynamic_stencil_write_mask_back_update_needed_ = true; + dynamic_stencil_reference_front_update_needed_ = true; + dynamic_stencil_reference_back_update_needed_ = true; + current_render_pass_ = VK_NULL_HANDLE; + current_framebuffer_ = nullptr; + current_guest_graphics_pipeline_ = VK_NULL_HANDLE; + current_external_graphics_pipeline_ = VK_NULL_HANDLE; + current_external_compute_pipeline_ = VK_NULL_HANDLE; + current_guest_graphics_pipeline_layout_ = nullptr; + current_graphics_descriptor_sets_bound_up_to_date_ = 0; + + primitive_processor_->BeginSubmission(); + + texture_cache_->BeginSubmission(GetCurrentSubmission()); } - // Demand a resolve texture from the texture cache. - TextureInfo texture_info; - TextureInfo::PrepareResolve( - copy_dest_base, copy_dest_format, resolve_endian, copy_dest_pitch, - dest_logical_width, std::max(1u, dest_logical_height), 1, &texture_info); + if (is_opening_frame) { + frame_open_ = true; - auto texture = texture_cache_->DemandResolveTexture(texture_info); - if (!texture) { - // Out of memory. - XELOGD("Failed to demand resolve texture!"); - return false; + // Reset bindings that depend on transient data. + std::memset(current_float_constant_map_vertex_, 0, + sizeof(current_float_constant_map_vertex_)); + std::memset(current_float_constant_map_pixel_, 0, + sizeof(current_float_constant_map_pixel_)); + std::memset(current_graphics_descriptor_sets_, 0, + sizeof(current_graphics_descriptor_sets_)); + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = + shared_memory_and_edram_descriptor_set_; + current_graphics_descriptor_set_values_up_to_date_ = + UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram; + + // Reclaim pool pages - no need to do this every small submission since some + // may be reused. + // FIXME(Triang3l): This will result in a memory leak if the guest is not + // presenting. + uniform_buffer_pool_->Reclaim(frame_completed_); + while (!single_transient_descriptors_used_.empty()) { + const UsedSingleTransientDescriptor& used_transient_descriptor = + single_transient_descriptors_used_.front(); + if (used_transient_descriptor.frame > frame_completed_) { + break; + } + single_transient_descriptors_free_[size_t( + used_transient_descriptor.layout)] + .push_back(used_transient_descriptor.set); + single_transient_descriptors_used_.pop_front(); + } + while (!texture_transient_descriptor_sets_used_.empty()) { + const UsedTextureTransientDescriptorSet& used_transient_descriptor_set = + texture_transient_descriptor_sets_used_.front(); + if (used_transient_descriptor_set.frame > frame_completed_) { + break; + } + auto it = texture_transient_descriptor_sets_free_.find( + used_transient_descriptor_set.layout); + if (it == texture_transient_descriptor_sets_free_.end()) { + it = + texture_transient_descriptor_sets_free_ + .emplace( + std::piecewise_construct, + std::forward_as_tuple(used_transient_descriptor_set.layout), + std::forward_as_tuple()) + .first; + } + it->second.push_back(used_transient_descriptor_set.set); + texture_transient_descriptor_sets_used_.pop_front(); + } + + primitive_processor_->BeginFrame(); + + texture_cache_->BeginFrame(); } - if (!(texture->usage_flags & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))) { - // Resolve image doesn't support drawing, and we don't support conversion. - return false; + return true; +} + +bool VulkanCommandProcessor::EndSubmission(bool is_swap) { + ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Make sure everything needed for submitting exist. + if (submission_open_) { + if (fences_free_.empty()) { + VkFenceCreateInfo fence_create_info; + fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_create_info.pNext = nullptr; + fence_create_info.flags = 0; + VkFence fence; + if (dfn.vkCreateFence(device, &fence_create_info, nullptr, &fence) != + VK_SUCCESS) { + XELOGE("Failed to create a Vulkan fence"); + // Try to submit later. Completely dropping the submission is not + // permitted because resources would be left in an undefined state. + return false; + } + fences_free_.push_back(fence); + } + if (!sparse_memory_binds_.empty() && semaphores_free_.empty()) { + VkSemaphoreCreateInfo semaphore_create_info; + semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_create_info.pNext = nullptr; + semaphore_create_info.flags = 0; + VkSemaphore semaphore; + if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr, + &semaphore) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan semaphore"); + return false; + } + semaphores_free_.push_back(semaphore); + } + if (command_buffers_writable_.empty()) { + CommandBuffer command_buffer; + VkCommandPoolCreateInfo command_pool_create_info; + command_pool_create_info.sType = + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_create_info.pNext = nullptr; + command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; + command_pool_create_info.queueFamilyIndex = + provider.queue_family_graphics_compute(); + if (dfn.vkCreateCommandPool(device, &command_pool_create_info, nullptr, + &command_buffer.pool) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan command pool"); + return false; + } + VkCommandBufferAllocateInfo command_buffer_allocate_info; + command_buffer_allocate_info.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_allocate_info.pNext = nullptr; + command_buffer_allocate_info.commandPool = command_buffer.pool; + command_buffer_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + command_buffer_allocate_info.commandBufferCount = 1; + if (dfn.vkAllocateCommandBuffers(device, &command_buffer_allocate_info, + &command_buffer.buffer) != VK_SUCCESS) { + XELOGE("Failed to allocate a Vulkan command buffer"); + dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); + return false; + } + command_buffers_writable_.push_back(command_buffer); + } } - texture->in_flight_fence = current_batch_fence_; + bool is_closing_frame = is_swap && frame_open_; - if (!frame_open_) { - BeginFrame(); - } else if (current_render_state_) { - // Copy commands cannot be issued within a render pass. - render_cache_->EndRenderPass(); - current_render_state_ = nullptr; + if (is_closing_frame) { + primitive_processor_->EndFrame(); + } + + if (submission_open_) { + assert_false(scratch_buffer_used_); + + EndRenderPass(); + + render_target_cache_->EndSubmission(); + + primitive_processor_->EndSubmission(); + + shared_memory_->EndSubmission(); + + uniform_buffer_pool_->FlushWrites(); + + // Submit sparse binds earlier, before executing the deferred command + // buffer, to reduce latency. + if (!sparse_memory_binds_.empty()) { + sparse_buffer_bind_infos_temp_.clear(); + sparse_buffer_bind_infos_temp_.reserve(sparse_buffer_binds_.size()); + for (const SparseBufferBind& sparse_buffer_bind : sparse_buffer_binds_) { + VkSparseBufferMemoryBindInfo& sparse_buffer_bind_info = + sparse_buffer_bind_infos_temp_.emplace_back(); + sparse_buffer_bind_info.buffer = sparse_buffer_bind.buffer; + sparse_buffer_bind_info.bindCount = sparse_buffer_bind.bind_count; + sparse_buffer_bind_info.pBinds = + sparse_memory_binds_.data() + sparse_buffer_bind.bind_offset; + } + assert_false(semaphores_free_.empty()); + VkSemaphore bind_sparse_semaphore = semaphores_free_.back(); + VkBindSparseInfo bind_sparse_info; + bind_sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + bind_sparse_info.pNext = nullptr; + bind_sparse_info.waitSemaphoreCount = 0; + bind_sparse_info.pWaitSemaphores = nullptr; + bind_sparse_info.bufferBindCount = + uint32_t(sparse_buffer_bind_infos_temp_.size()); + bind_sparse_info.pBufferBinds = + !sparse_buffer_bind_infos_temp_.empty() + ? sparse_buffer_bind_infos_temp_.data() + : nullptr; + bind_sparse_info.imageOpaqueBindCount = 0; + bind_sparse_info.pImageOpaqueBinds = nullptr; + bind_sparse_info.imageBindCount = 0; + bind_sparse_info.pImageBinds = 0; + bind_sparse_info.signalSemaphoreCount = 1; + bind_sparse_info.pSignalSemaphores = &bind_sparse_semaphore; + VkResult bind_sparse_result; + { + ui::vulkan::VulkanProvider::QueueAcquisition queue_acquisition( + provider.AcquireQueue(provider.queue_family_sparse_binding(), 0)); + bind_sparse_result = dfn.vkQueueBindSparse( + queue_acquisition.queue, 1, &bind_sparse_info, VK_NULL_HANDLE); + } + if (bind_sparse_result != VK_SUCCESS) { + XELOGE("Failed to submit Vulkan sparse binds"); + return false; + } + current_submission_wait_semaphores_.push_back(bind_sparse_semaphore); + semaphores_free_.pop_back(); + current_submission_wait_stage_masks_.push_back( + sparse_bind_wait_stage_mask_); + sparse_bind_wait_stage_mask_ = 0; + sparse_buffer_binds_.clear(); + sparse_memory_binds_.clear(); + } + + SubmitBarriers(true); + + assert_false(command_buffers_writable_.empty()); + CommandBuffer command_buffer = command_buffers_writable_.back(); + if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) { + XELOGE("Failed to reset a Vulkan command pool"); + return false; + } + VkCommandBufferBeginInfo command_buffer_begin_info; + command_buffer_begin_info.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + command_buffer_begin_info.pNext = nullptr; + command_buffer_begin_info.flags = + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + command_buffer_begin_info.pInheritanceInfo = nullptr; + if (dfn.vkBeginCommandBuffer(command_buffer.buffer, + &command_buffer_begin_info) != VK_SUCCESS) { + XELOGE("Failed to begin a Vulkan command buffer"); + return false; + } + deferred_command_buffer_.Execute(command_buffer.buffer); + if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) { + XELOGE("Failed to end a Vulkan command buffer"); + return false; + } + + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = nullptr; + if (!current_submission_wait_semaphores_.empty()) { + submit_info.waitSemaphoreCount = + uint32_t(current_submission_wait_semaphores_.size()); + submit_info.pWaitSemaphores = current_submission_wait_semaphores_.data(); + submit_info.pWaitDstStageMask = + current_submission_wait_stage_masks_.data(); + } else { + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = nullptr; + submit_info.pWaitDstStageMask = nullptr; + } + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer.buffer; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = nullptr; + assert_false(fences_free_.empty()); + VkFence fence = fences_free_.back(); + if (dfn.vkResetFences(device, 1, &fence) != VK_SUCCESS) { + XELOGE("Failed to reset a Vulkan submission fence"); + return false; + } + VkResult submit_result; + { + ui::vulkan::VulkanProvider::QueueAcquisition queue_acquisition( + provider.AcquireQueue(provider.queue_family_graphics_compute(), 0)); + submit_result = + dfn.vkQueueSubmit(queue_acquisition.queue, 1, &submit_info, fence); + } + if (submit_result != VK_SUCCESS) { + XELOGE("Failed to submit a Vulkan command buffer"); + if (submit_result == VK_ERROR_DEVICE_LOST && !device_lost_) { + device_lost_ = true; + graphics_system_->OnHostGpuLossFromAnyThread(true); + } + return false; + } + uint64_t submission_current = GetCurrentSubmission(); + current_submission_wait_stage_masks_.clear(); + for (VkSemaphore semaphore : current_submission_wait_semaphores_) { + submissions_in_flight_semaphores_.emplace_back(submission_current, + semaphore); + } + current_submission_wait_semaphores_.clear(); + command_buffers_submitted_.emplace_back(submission_current, command_buffer); + command_buffers_writable_.pop_back(); + // Increments the current submission number, going to the next submission. + submissions_in_flight_fences_.push_back(fence); + fences_free_.pop_back(); + + submission_open_ = false; + } + + if (is_closing_frame) { + frame_open_ = false; + // Submission already closed now, so minus 1. + closed_frame_submissions_[(frame_current_++) % kMaxFramesInFlight] = + GetCurrentSubmission() - 1; + + if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) { + cache_clear_requested_ = false; + + DestroyScratchBuffer(); + + for (SwapFramebuffer& swap_framebuffer : swap_framebuffers_) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device, + swap_framebuffer.framebuffer); + } + + assert_true(command_buffers_submitted_.empty()); + for (const CommandBuffer& command_buffer : command_buffers_writable_) { + dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); + } + command_buffers_writable_.clear(); + + ClearTransientDescriptorPools(); + + uniform_buffer_pool_->ClearCache(); + + texture_cache_->ClearCache(); + + render_target_cache_->ClearCache(); + + // Not clearing the pipeline layouts and the descriptor set layouts as + // they're referenced by pipelines, which are not destroyed. + + primitive_processor_->ClearCache(); + + shared_memory_->ClearCache(); + } + } + + return true; +} + +void VulkanCommandProcessor::ClearTransientDescriptorPools() { + texture_transient_descriptor_sets_free_.clear(); + texture_transient_descriptor_sets_used_.clear(); + transient_descriptor_allocator_sampler_.Reset(); + transient_descriptor_allocator_sampled_image_.Reset(); + + for (std::vector& transient_descriptors_free : + single_transient_descriptors_free_) { + transient_descriptors_free.clear(); + } + single_transient_descriptors_used_.clear(); + transient_descriptor_allocator_storage_buffer_.Reset(); + transient_descriptor_allocator_uniform_buffer_.Reset(); +} + +void VulkanCommandProcessor::SplitPendingBarrier() { + size_t pending_buffer_memory_barrier_count = + pending_barriers_buffer_memory_barriers_.size(); + size_t pending_image_memory_barrier_count = + pending_barriers_image_memory_barriers_.size(); + if (!current_pending_barrier_.src_stage_mask && + !current_pending_barrier_.dst_stage_mask && + current_pending_barrier_.buffer_memory_barriers_offset >= + pending_buffer_memory_barrier_count && + current_pending_barrier_.image_memory_barriers_offset >= + pending_image_memory_barrier_count) { + return; + } + pending_barriers_.emplace_back(current_pending_barrier_); + current_pending_barrier_.src_stage_mask = 0; + current_pending_barrier_.dst_stage_mask = 0; + current_pending_barrier_.buffer_memory_barriers_offset = + pending_buffer_memory_barrier_count; + current_pending_barrier_.image_memory_barriers_offset = + pending_image_memory_barrier_count; +} + +void VulkanCommandProcessor::DestroyScratchBuffer() { + assert_false(scratch_buffer_used_); + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + scratch_buffer_last_usage_submission_ = 0; + scratch_buffer_last_access_mask_ = 0; + scratch_buffer_last_stage_mask_ = 0; + scratch_buffer_size_ = 0; + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + scratch_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + scratch_buffer_memory_); +} + +void VulkanCommandProcessor::UpdateDynamicState( + const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal, + reg::RB_DEPTHCONTROL normalized_depth_control) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const RegisterFile& regs = *register_file_; + + // Window parameters. + // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h + // See r200UpdateWindow: + // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + auto pa_sc_window_offset = regs.Get(); + + // Viewport. + VkViewport viewport; + if (viewport_info.xy_extent[0] && viewport_info.xy_extent[1]) { + viewport.x = float(viewport_info.xy_offset[0]); + viewport.y = float(viewport_info.xy_offset[1]); + viewport.width = float(viewport_info.xy_extent[0]); + viewport.height = float(viewport_info.xy_extent[1]); + } else { + // Vulkan viewport width must be greater than 0.0f, but the Xenia viewport + // may be empty for various reasons - set the viewport to outside the + // framebuffer. + viewport.x = -1.0f; + viewport.y = -1.0f; + viewport.width = 1.0f; + viewport.height = 1.0f; + } + viewport.minDepth = viewport_info.z_min; + viewport.maxDepth = viewport_info.z_max; + SetViewport(viewport); + + // Scissor. + draw_util::Scissor scissor; + draw_util::GetScissor(regs, scissor); + VkRect2D scissor_rect; + scissor_rect.offset.x = int32_t(scissor.offset[0]); + scissor_rect.offset.y = int32_t(scissor.offset[1]); + scissor_rect.extent.width = scissor.extent[0]; + scissor_rect.extent.height = scissor.extent[1]; + SetScissor(scissor_rect); + + // Depth bias. + // TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB + // implementation. + float depth_bias_constant_factor, depth_bias_slope_factor; + draw_util::GetPreferredFacePolygonOffset(regs, primitive_polygonal, + depth_bias_slope_factor, + depth_bias_constant_factor); + depth_bias_constant_factor *= + regs.Get().depth_format == + xenos::DepthRenderTargetFormat::kD24S8 + ? draw_util::kD3D10PolygonOffsetFactorUnorm24 + : draw_util::kD3D10PolygonOffsetFactorFloat24; + // With non-square resolution scaling, make sure the worst-case impact is + // reverted (slope only along the scaled axis), thus max. More bias is better + // than less bias, because less bias means Z fighting with the background is + // more likely. + depth_bias_slope_factor *= + xenos::kPolygonOffsetScaleSubpixelUnit * + float(std::max(render_target_cache_->draw_resolution_scale_x(), + render_target_cache_->draw_resolution_scale_y())); + // std::memcmp instead of != so in case of NaN, every draw won't be + // invalidating it. + dynamic_depth_bias_update_needed_ |= + std::memcmp(&dynamic_depth_bias_constant_factor_, + &depth_bias_constant_factor, sizeof(float)) != 0; + dynamic_depth_bias_update_needed_ |= + std::memcmp(&dynamic_depth_bias_slope_factor_, &depth_bias_slope_factor, + sizeof(float)) != 0; + if (dynamic_depth_bias_update_needed_) { + dynamic_depth_bias_constant_factor_ = depth_bias_constant_factor; + dynamic_depth_bias_slope_factor_ = depth_bias_slope_factor; + deferred_command_buffer_.CmdVkSetDepthBias( + dynamic_depth_bias_constant_factor_, 0.0f, + dynamic_depth_bias_slope_factor_); + dynamic_depth_bias_update_needed_ = false; + } + + // Blend constants. + float blend_constants[] = { + regs[XE_GPU_REG_RB_BLEND_RED].f32, + regs[XE_GPU_REG_RB_BLEND_GREEN].f32, + regs[XE_GPU_REG_RB_BLEND_BLUE].f32, + regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, + }; + dynamic_blend_constants_update_needed_ |= + std::memcmp(dynamic_blend_constants_, blend_constants, + sizeof(float) * 4) != 0; + if (dynamic_blend_constants_update_needed_) { + std::memcpy(dynamic_blend_constants_, blend_constants, sizeof(float) * 4); + deferred_command_buffer_.CmdVkSetBlendConstants(dynamic_blend_constants_); + dynamic_blend_constants_update_needed_ = false; + } + + // Stencil masks and references. + // Due to pretty complex conditions involving registers not directly related + // to stencil (primitive type, culling), changing the values only when stencil + // is actually needed. However, due to the way dynamic state needs to be set + // in Vulkan, which doesn't take into account whether the state actually has + // effect on drawing, and because the masks and the references are always + // dynamic in Xenia guest pipelines, they must be set in the command buffer + // before any draw. + if (normalized_depth_control.stencil_enable) { + Register stencil_ref_mask_front_reg, stencil_ref_mask_back_reg; + if (primitive_polygonal && normalized_depth_control.backface_enable) { + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (!device_portability_subset_features || + device_portability_subset_features->separateStencilMaskRef) { + // Choose the back face values only if drawing only back faces. + stencil_ref_mask_front_reg = + regs.Get().cull_front + ? XE_GPU_REG_RB_STENCILREFMASK_BF + : XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = stencil_ref_mask_front_reg; + } else { + stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; + } + } else { + stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK; + } + auto stencil_ref_mask_front = + regs.Get(stencil_ref_mask_front_reg); + auto stencil_ref_mask_back = + regs.Get(stencil_ref_mask_back_reg); + // Compare mask. + dynamic_stencil_compare_mask_front_update_needed_ |= + dynamic_stencil_compare_mask_front_ != + stencil_ref_mask_front.stencilmask; + dynamic_stencil_compare_mask_front_ = stencil_ref_mask_front.stencilmask; + dynamic_stencil_compare_mask_back_update_needed_ |= + dynamic_stencil_compare_mask_back_ != stencil_ref_mask_back.stencilmask; + dynamic_stencil_compare_mask_back_ = stencil_ref_mask_back.stencilmask; + // Write mask. + dynamic_stencil_write_mask_front_update_needed_ |= + dynamic_stencil_write_mask_front_ != + stencil_ref_mask_front.stencilwritemask; + dynamic_stencil_write_mask_front_ = stencil_ref_mask_front.stencilwritemask; + dynamic_stencil_write_mask_back_update_needed_ |= + dynamic_stencil_write_mask_back_ != + stencil_ref_mask_back.stencilwritemask; + dynamic_stencil_write_mask_back_ = stencil_ref_mask_back.stencilwritemask; + // Reference. + dynamic_stencil_reference_front_update_needed_ |= + dynamic_stencil_reference_front_ != stencil_ref_mask_front.stencilref; + dynamic_stencil_reference_front_ = stencil_ref_mask_front.stencilref; + dynamic_stencil_reference_back_update_needed_ |= + dynamic_stencil_reference_back_ != stencil_ref_mask_back.stencilref; + dynamic_stencil_reference_back_ = stencil_ref_mask_back.stencilref; + } + // Using VK_STENCIL_FACE_FRONT_AND_BACK for higher safety when running on the + // Vulkan portability subset without separateStencilMaskRef. + if (dynamic_stencil_compare_mask_front_update_needed_ || + dynamic_stencil_compare_mask_back_update_needed_) { + if (dynamic_stencil_compare_mask_front_ == + dynamic_stencil_compare_mask_back_) { + deferred_command_buffer_.CmdVkSetStencilCompareMask( + VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_compare_mask_front_); + } else { + if (dynamic_stencil_compare_mask_front_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilCompareMask( + VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_compare_mask_front_); + } + if (dynamic_stencil_compare_mask_back_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilCompareMask( + VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_compare_mask_back_); + } + } + dynamic_stencil_compare_mask_front_update_needed_ = false; + dynamic_stencil_compare_mask_back_update_needed_ = false; + } + if (dynamic_stencil_write_mask_front_update_needed_ || + dynamic_stencil_write_mask_back_update_needed_) { + if (dynamic_stencil_write_mask_front_ == dynamic_stencil_write_mask_back_) { + deferred_command_buffer_.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_write_mask_front_); + } else { + if (dynamic_stencil_write_mask_front_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_write_mask_front_); + } + if (dynamic_stencil_write_mask_back_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_write_mask_back_); + } + } + dynamic_stencil_write_mask_front_update_needed_ = false; + dynamic_stencil_write_mask_back_update_needed_ = false; + } + if (dynamic_stencil_reference_front_update_needed_ || + dynamic_stencil_reference_back_update_needed_) { + if (dynamic_stencil_reference_front_ == dynamic_stencil_reference_back_) { + deferred_command_buffer_.CmdVkSetStencilReference( + VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_reference_front_); + } else { + if (dynamic_stencil_reference_front_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilReference( + VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_reference_front_); + } + if (dynamic_stencil_reference_back_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilReference( + VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_reference_back_); + } + } + dynamic_stencil_reference_front_update_needed_ = false; + dynamic_stencil_reference_back_update_needed_ = false; + } + + // TODO(Triang3l): VK_EXT_extended_dynamic_state and + // VK_EXT_extended_dynamic_state2. +} + +void VulkanCommandProcessor::UpdateSystemConstantValues( + bool primitive_polygonal, xenos::Endian index_endian, + const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const RegisterFile& regs = *register_file_; + auto pa_cl_vte_cntl = regs.Get(); + float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; + auto rb_colorcontrol = regs.Get(); + auto vgt_draw_initiator = regs.Get(); + int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); + + // Get the color info register values for each render target. + reg::RB_COLOR_INFO color_infos[xenos::kMaxColorRenderTargets]; + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + color_infos[i] = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[i]); + } + + bool dirty = false; + + // Flags. + uint32_t flags = 0; + // W0 division control. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. + // = false: multiply the X, Y coordinates by 1/W0. + // 9: VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. + // = false: multiply the Z coordinate by 1/W0. + // 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal + // to get 1/W0. + if (pa_cl_vte_cntl.vtx_xy_fmt) { + flags |= SpirvShaderTranslator::kSysFlag_XYDividedByW; + } + if (pa_cl_vte_cntl.vtx_z_fmt) { + flags |= SpirvShaderTranslator::kSysFlag_ZDividedByW; + } + if (pa_cl_vte_cntl.vtx_w0_fmt) { + flags |= SpirvShaderTranslator::kSysFlag_WNotReciprocal; + } + // Whether the primitive is polygonal, and gl_FrontFacing matters. + if (primitive_polygonal) { + flags |= SpirvShaderTranslator::kSysFlag_PrimitivePolygonal; + } + // Primitive type. + if (draw_util::IsPrimitiveLine(regs)) { + flags |= SpirvShaderTranslator::kSysFlag_PrimitiveLine; + } + // Alpha test. + xenos::CompareFunction alpha_test_function = + rb_colorcontrol.alpha_test_enable ? rb_colorcontrol.alpha_func + : xenos::CompareFunction::kAlways; + flags |= uint32_t(alpha_test_function) + << SpirvShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; + // Gamma writing. + // TODO(Triang3l): Gamma as sRGB check. + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + if (color_infos[i].color_format == + xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { + flags |= SpirvShaderTranslator::kSysFlag_ConvertColor0ToGamma << i; + } + } + dirty |= system_constants_.flags != flags; + system_constants_.flags = flags; + + // Index or tessellation edge factor buffer endianness. + dirty |= system_constants_.vertex_index_endian != index_endian; + system_constants_.vertex_index_endian = index_endian; + + // Vertex index offset. + dirty |= system_constants_.vertex_base_index != vgt_indx_offset; + system_constants_.vertex_base_index = vgt_indx_offset; + + // Conversion to host normalized device coordinates. + for (uint32_t i = 0; i < 3; ++i) { + dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i]; + dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i]; + system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i]; + system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i]; + } + + // Texture signedness / gamma. + { + uint32_t textures_remaining = used_texture_mask; + uint32_t texture_index; + while (xe::bit_scan_forward(textures_remaining, &texture_index)) { + textures_remaining &= ~(UINT32_C(1) << texture_index); + uint32_t& texture_signs_uint = + system_constants_.texture_swizzled_signs[texture_index >> 2]; + uint32_t texture_signs_shift = 8 * (texture_index & 3); + uint8_t texture_signs = + texture_cache_->GetActiveTextureSwizzledSigns(texture_index); + uint32_t texture_signs_shifted = uint32_t(texture_signs) + << texture_signs_shift; + uint32_t texture_signs_mask = ((UINT32_C(1) << 8) - 1) + << texture_signs_shift; + dirty |= + (texture_signs_uint & texture_signs_mask) != texture_signs_shifted; + texture_signs_uint = + (texture_signs_uint & ~texture_signs_mask) | texture_signs_shifted; + } + } + + // Texture host swizzle in the shader. + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features && + !device_portability_subset_features->imageViewFormatSwizzle) { + uint32_t textures_remaining = used_texture_mask; + uint32_t texture_index; + while (xe::bit_scan_forward(textures_remaining, &texture_index)) { + textures_remaining &= ~(UINT32_C(1) << texture_index); + uint32_t& texture_swizzles_uint = + system_constants_.texture_swizzles[texture_index >> 1]; + uint32_t texture_swizzle_shift = 12 * (texture_index & 1); + uint32_t texture_swizzle = + texture_cache_->GetActiveTextureHostSwizzle(texture_index); + uint32_t texture_swizzle_shifted = uint32_t(texture_swizzle) + << texture_swizzle_shift; + uint32_t texture_swizzle_mask = ((UINT32_C(1) << 12) - 1) + << texture_swizzle_shift; + dirty |= (texture_swizzles_uint & texture_swizzle_mask) != + texture_swizzle_shifted; + texture_swizzles_uint = (texture_swizzles_uint & ~texture_swizzle_mask) | + texture_swizzle_shifted; + } + } + + // Alpha test. + dirty |= system_constants_.alpha_test_reference != rb_alpha_ref; + system_constants_.alpha_test_reference = rb_alpha_ref; + + // Color exponent bias. + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + reg::RB_COLOR_INFO color_info = color_infos[i]; + // Exponent bias is in bits 20:25 of RB_COLOR_INFO. + int32_t color_exp_bias = color_info.color_exp_bias; + if (render_target_cache_->GetPath() == + RenderTargetCache::Path::kHostRenderTargets && + (color_info.color_format == xenos::ColorRenderTargetFormat::k_16_16 && + !render_target_cache_->IsFixedRG16TruncatedToMinus1To1() || + color_info.color_format == + xenos::ColorRenderTargetFormat::k_16_16_16_16 && + !render_target_cache_->IsFixedRGBA16TruncatedToMinus1To1())) { + // Remap from -32...32 to -1...1 by dividing the output values by 32, + // losing blending correctness, but getting the full range. + color_exp_bias -= 5; + } + float color_exp_bias_scale; + *reinterpret_cast(&color_exp_bias_scale) = + UINT32_C(0x3F800000) + (color_exp_bias << 23); + dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale; + system_constants_.color_exp_bias[i] = color_exp_bias_scale; + } + + if (dirty) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); + } +} + +bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, + const VulkanShader* pixel_shader) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const RegisterFile& regs = *register_file_; + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Invalidate descriptors for changed data. + + // Float constants. + // These are the constant base addresses/ranges for shaders. + // We have these hardcoded right now cause nothing seems to differ on the Xbox + // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). + assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || + regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || + regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + // Check if the float constant layout is still the same and get the counts. + const Shader::ConstantRegisterMap& float_constant_map_vertex = + vertex_shader->constant_register_map(); + uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count; + for (uint32_t i = 0; i < 4; ++i) { + if (current_float_constant_map_vertex_[i] != + float_constant_map_vertex.float_bitmap[i]) { + current_float_constant_map_vertex_[i] = + float_constant_map_vertex.float_bitmap[i]; + // If no float constants at all, any buffer can be reused for them, so not + // invalidating. + if (float_constant_count_vertex) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~( + UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + } + } + } + uint32_t float_constant_count_pixel = 0; + if (pixel_shader != nullptr) { + const Shader::ConstantRegisterMap& float_constant_map_pixel = + pixel_shader->constant_register_map(); + float_constant_count_pixel = float_constant_map_pixel.float_count; + for (uint32_t i = 0; i < 4; ++i) { + if (current_float_constant_map_pixel_[i] != + float_constant_map_pixel.float_bitmap[i]) { + current_float_constant_map_pixel_[i] = + float_constant_map_pixel.float_bitmap[i]; + if (float_constant_count_pixel) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + } + } + } + } else { + std::memset(current_float_constant_map_pixel_, 0, + sizeof(current_float_constant_map_pixel_)); + } + + // Textures and samplers. + const std::vector& samplers_vertex = + vertex_shader->GetSamplerBindingsAfterTranslation(); + const std::vector& textures_vertex = + vertex_shader->GetTextureBindingsAfterTranslation(); + uint32_t sampler_count_vertex = uint32_t(samplers_vertex.size()); + uint32_t texture_count_vertex = uint32_t(textures_vertex.size()); + const std::vector* samplers_pixel; + const std::vector* textures_pixel; + uint32_t sampler_count_pixel, texture_count_pixel; + if (pixel_shader) { + samplers_pixel = &pixel_shader->GetSamplerBindingsAfterTranslation(); + textures_pixel = &pixel_shader->GetTextureBindingsAfterTranslation(); + sampler_count_pixel = uint32_t(samplers_pixel->size()); + texture_count_pixel = uint32_t(textures_pixel->size()); + } else { + samplers_pixel = nullptr; + textures_pixel = nullptr; + sampler_count_pixel = 0; + texture_count_pixel = 0; + } + // TODO(Triang3l): Reuse texture and sampler bindings if not changed. + current_graphics_descriptor_set_values_up_to_date_ &= + ~((UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex) | + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex) | + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel) | + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel)); + + // Make sure new descriptor sets are bound to the command buffer. + + current_graphics_descriptor_sets_bound_up_to_date_ &= + current_graphics_descriptor_set_values_up_to_date_; + + // Fill the texture and sampler write image infos. + + bool write_vertex_samplers = + sampler_count_vertex && + !(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex)); + bool write_vertex_textures = + texture_count_vertex && + !(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex)); + bool write_pixel_samplers = + sampler_count_pixel && + !(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel)); + bool write_pixel_textures = + texture_count_pixel && + !(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel)); + descriptor_write_image_info_.clear(); + descriptor_write_image_info_.reserve( + (write_vertex_samplers ? sampler_count_vertex : 0) + + (write_vertex_textures ? texture_count_vertex : 0) + + (write_pixel_samplers ? sampler_count_pixel : 0) + + (write_pixel_textures ? texture_count_pixel : 0)); + size_t vertex_sampler_image_info_offset = descriptor_write_image_info_.size(); + if (write_vertex_samplers) { + for (const std::pair& + sampler_pair : current_samplers_vertex_) { + VkDescriptorImageInfo& descriptor_image_info = + descriptor_write_image_info_.emplace_back(); + descriptor_image_info.sampler = sampler_pair.second; + } + } + size_t vertex_texture_image_info_offset = descriptor_write_image_info_.size(); + if (write_vertex_textures) { + for (const VulkanShader::TextureBinding& texture_binding : + textures_vertex) { + VkDescriptorImageInfo& descriptor_image_info = + descriptor_write_image_info_.emplace_back(); + descriptor_image_info.imageView = + texture_cache_->GetActiveBindingOrNullImageView( + texture_binding.fetch_constant, texture_binding.dimension, + bool(texture_binding.is_signed)); + descriptor_image_info.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + } + size_t pixel_sampler_image_info_offset = descriptor_write_image_info_.size(); + if (write_pixel_samplers) { + for (const std::pair& + sampler_pair : current_samplers_pixel_) { + VkDescriptorImageInfo& descriptor_image_info = + descriptor_write_image_info_.emplace_back(); + descriptor_image_info.sampler = sampler_pair.second; + } + } + size_t pixel_texture_image_info_offset = descriptor_write_image_info_.size(); + if (write_pixel_textures) { + for (const VulkanShader::TextureBinding& texture_binding : + *textures_pixel) { + VkDescriptorImageInfo& descriptor_image_info = + descriptor_write_image_info_.emplace_back(); + descriptor_image_info.imageView = + texture_cache_->GetActiveBindingOrNullImageView( + texture_binding.fetch_constant, texture_binding.dimension, + bool(texture_binding.is_signed)); + descriptor_image_info.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + } + + // Write the new descriptor sets. + + VkWriteDescriptorSet + write_descriptor_sets[SpirvShaderTranslator::kDescriptorSetCount]; + uint32_t write_descriptor_set_count = 0; + uint32_t write_descriptor_set_bits = 0; + assert_not_zero( + current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram)); + // Bool and loop constants. + VkDescriptorBufferInfo buffer_info_bool_loop_constants; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants))) { + VkWriteDescriptorSet& write_bool_loop_constants = + write_descriptor_sets[write_descriptor_set_count++]; + constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32); + uint8_t* mapping_bool_loop_constants = WriteTransientUniformBufferBinding( + kBoolLoopConstantsSize, + SingleTransientDescriptorLayout::kUniformBufferGuestShader, + buffer_info_bool_loop_constants, write_bool_loop_constants); + if (!mapping_bool_loop_constants) { + return false; + } + std::memcpy(mapping_bool_loop_constants, + ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + kBoolLoopConstantsSize); + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = + write_bool_loop_constants.dstSet; + } + // System constants. + VkDescriptorBufferInfo buffer_info_system_constants; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetSystemConstants))) { + VkWriteDescriptorSet& write_system_constants = + write_descriptor_sets[write_descriptor_set_count++]; + uint8_t* mapping_system_constants = WriteTransientUniformBufferBinding( + sizeof(SpirvShaderTranslator::SystemConstants), + SingleTransientDescriptorLayout::kUniformBufferSystemConstants, + buffer_info_system_constants, write_system_constants); + if (!mapping_system_constants) { + return false; + } + std::memcpy(mapping_system_constants, &system_constants_, + sizeof(SpirvShaderTranslator::SystemConstants)); + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSystemConstants] = + write_system_constants.dstSet; + } + // Pixel shader float constants. + VkDescriptorBufferInfo buffer_info_float_constant_pixel; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel))) { + // Even if the shader doesn't need any float constants, a valid binding must + // still be provided (the pipeline layout always has float constants, for + // both the vertex shader and the pixel shader), so if the first draw in the + // frame doesn't have float constants at all, still allocate an empty + // buffer. + VkWriteDescriptorSet& write_float_constants_pixel = + write_descriptor_sets[write_descriptor_set_count++]; + uint8_t* mapping_float_constants_pixel = WriteTransientUniformBufferBinding( + sizeof(float) * 4 * std::max(float_constant_count_pixel, UINT32_C(1)), + SingleTransientDescriptorLayout::kUniformBufferFragment, + buffer_info_float_constant_pixel, write_float_constants_pixel); + if (!mapping_float_constants_pixel) { + return false; + } + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = current_float_constant_map_pixel_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping_float_constants_pixel, + ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping_float_constants_pixel += sizeof(float) * 4; + } + } + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = + write_float_constants_pixel.dstSet; + } + // Vertex shader float constants. + VkDescriptorBufferInfo buffer_info_float_constant_vertex; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex))) { + VkWriteDescriptorSet& write_float_constants_vertex = + write_descriptor_sets[write_descriptor_set_count++]; + uint8_t* mapping_float_constants_vertex = + WriteTransientUniformBufferBinding( + sizeof(float) * 4 * + std::max(float_constant_count_vertex, UINT32_C(1)), + SingleTransientDescriptorLayout::kUniformBufferGuestVertex, + buffer_info_float_constant_vertex, write_float_constants_vertex); + if (!mapping_float_constants_vertex) { + return false; + } + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = current_float_constant_map_vertex_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping_float_constants_vertex, + ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping_float_constants_vertex += sizeof(float) * 4; + } + } + write_descriptor_set_bits |= + UINT32_C(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = + write_float_constants_vertex.dstSet; + } + // Fetch constants. + VkDescriptorBufferInfo buffer_info_fetch_constants; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) { + VkWriteDescriptorSet& write_fetch_constants = + write_descriptor_sets[write_descriptor_set_count++]; + constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32; + uint8_t* mapping_fetch_constants = WriteTransientUniformBufferBinding( + kFetchConstantsSize, + SingleTransientDescriptorLayout::kUniformBufferGuestShader, + buffer_info_fetch_constants, write_fetch_constants); + if (!mapping_fetch_constants) { + return false; + } + std::memcpy(mapping_fetch_constants, + ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, + kFetchConstantsSize); + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetFetchConstants] = + write_fetch_constants.dstSet; + } + // Vertex shader samplers. + if (write_vertex_samplers) { + VkWriteDescriptorSet& write_samplers = + write_descriptor_sets[write_descriptor_set_count++]; + if (!WriteTransientTextureBindings( + true, true, sampler_count_vertex, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_samplers_vertex_ref(), + descriptor_write_image_info_.data() + + vertex_sampler_image_info_offset, + write_samplers)) { + return false; + } + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSamplersVertex] = + write_samplers.dstSet; + } + // Vertex shader textures. + if (write_vertex_textures) { + VkWriteDescriptorSet& write_textures = + write_descriptor_sets[write_descriptor_set_count++]; + if (!WriteTransientTextureBindings( + false, true, texture_count_vertex, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref(), + descriptor_write_image_info_.data() + + vertex_texture_image_info_offset, + write_textures)) { + return false; + } + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetTexturesVertex] = + write_textures.dstSet; + } + // Pixel shader samplers. + if (write_pixel_samplers) { + VkWriteDescriptorSet& write_samplers = + write_descriptor_sets[write_descriptor_set_count++]; + if (!WriteTransientTextureBindings( + true, false, sampler_count_pixel, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_samplers_pixel_ref(), + descriptor_write_image_info_.data() + + pixel_sampler_image_info_offset, + write_samplers)) { + return false; + } + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSamplersPixel] = + write_samplers.dstSet; + } + // Pixel shader textures. + if (write_pixel_textures) { + VkWriteDescriptorSet& write_textures = + write_descriptor_sets[write_descriptor_set_count++]; + if (!WriteTransientTextureBindings( + false, false, texture_count_pixel, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref(), + descriptor_write_image_info_.data() + + pixel_texture_image_info_offset, + write_textures)) { + return false; + } + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetTexturesPixel] = + write_textures.dstSet; + } + // Write. + if (write_descriptor_set_count) { + dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count, + write_descriptor_sets, 0, nullptr); + } + // Only make valid if all descriptor sets have been allocated and written + // successfully. + current_graphics_descriptor_set_values_up_to_date_ |= + write_descriptor_set_bits; + + // Bind the new descriptor sets. + uint32_t descriptor_sets_needed = + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1; + if (!sampler_count_vertex) { + descriptor_sets_needed &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex); + } + if (!texture_count_vertex) { + descriptor_sets_needed &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex); + } + if (!sampler_count_pixel) { + descriptor_sets_needed &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel); + } + if (!texture_count_pixel) { + descriptor_sets_needed &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel); + } + uint32_t descriptor_sets_remaining = + descriptor_sets_needed & + ~current_graphics_descriptor_sets_bound_up_to_date_; + uint32_t descriptor_set_index; + while ( + xe::bit_scan_forward(descriptor_sets_remaining, &descriptor_set_index)) { + uint32_t descriptor_set_mask_tzcnt = + xe::tzcnt(~(descriptor_sets_remaining | + ((UINT32_C(1) << descriptor_set_index) - 1))); + // TODO(Triang3l): Bind to compute for memexport emulation without vertex + // shader memory stores. + deferred_command_buffer_.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, + current_guest_graphics_pipeline_layout_->GetPipelineLayout(), + descriptor_set_index, descriptor_set_mask_tzcnt - descriptor_set_index, + current_graphics_descriptor_sets_ + descriptor_set_index, 0, nullptr); + if (descriptor_set_mask_tzcnt >= 32) { + break; + } + descriptor_sets_remaining &= + ~((UINT32_C(1) << descriptor_set_mask_tzcnt) - 1); + } + current_graphics_descriptor_sets_bound_up_to_date_ |= descriptor_sets_needed; + + return true; +} + +uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding( + size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, + VkDescriptorBufferInfo& descriptor_buffer_info_out, + VkWriteDescriptorSet& write_descriptor_set_out) { + assert_true(frame_open_); + VkDescriptorSet descriptor_set = + AllocateSingleTransientDescriptor(transient_descriptor_layout); + if (descriptor_set == VK_NULL_HANDLE) { + return nullptr; + } + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, size, + size_t( + provider.device_properties().limits.minUniformBufferOffsetAlignment), + descriptor_buffer_info_out.buffer, descriptor_buffer_info_out.offset); + if (!mapping) { + return nullptr; + } + descriptor_buffer_info_out.range = VkDeviceSize(size); + write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_out.pNext = nullptr; + write_descriptor_set_out.dstSet = descriptor_set; + write_descriptor_set_out.dstBinding = 0; + write_descriptor_set_out.dstArrayElement = 0; + write_descriptor_set_out.descriptorCount = 1; + write_descriptor_set_out.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + write_descriptor_set_out.pImageInfo = nullptr; + write_descriptor_set_out.pBufferInfo = &descriptor_buffer_info_out; + write_descriptor_set_out.pTexelBufferView = nullptr; + return mapping; +} + +uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding( + size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, + VkDescriptorSet& descriptor_set_out) { + VkDescriptorBufferInfo write_descriptor_buffer_info; + VkWriteDescriptorSet write_descriptor_set; + uint8_t* mapping = WriteTransientUniformBufferBinding( + size, transient_descriptor_layout, write_descriptor_buffer_info, + write_descriptor_set); + if (!mapping) { + return nullptr; } const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - auto command_buffer = current_command_buffer_; + dfn.vkUpdateDescriptorSets(device, 1, &write_descriptor_set, 0, nullptr); + descriptor_set_out = write_descriptor_set.dstSet; + return mapping; +} - if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) { - // Transition the image to a general layout. - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = 0; - image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.image = texture->image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - is_color_source - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - texture->image_layout = VK_IMAGE_LAYOUT_GENERAL; - - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, - nullptr, 0, nullptr, 1, &image_barrier); +bool VulkanCommandProcessor::WriteTransientTextureBindings( + bool is_samplers, bool is_vertex, uint32_t binding_count, + VkDescriptorSetLayout descriptor_set_layout, + const VkDescriptorImageInfo* image_info, + VkWriteDescriptorSet& write_descriptor_set_out) { + assert_not_zero(binding_count); + assert_true(frame_open_); + TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; + texture_descriptor_set_layout_key.is_samplers = uint32_t(is_samplers); + texture_descriptor_set_layout_key.is_vertex = uint32_t(is_vertex); + texture_descriptor_set_layout_key.binding_count = binding_count; + VkDescriptorSet texture_descriptor_set; + auto textures_free_it = texture_transient_descriptor_sets_free_.find( + texture_descriptor_set_layout_key); + if (textures_free_it != texture_transient_descriptor_sets_free_.end() && + !textures_free_it->second.empty()) { + texture_descriptor_set = textures_free_it->second.back(); + textures_free_it->second.pop_back(); + } else { + texture_descriptor_set = + (is_samplers ? transient_descriptor_allocator_sampler_ + : transient_descriptor_allocator_sampled_image_) + .Allocate(descriptor_set_layout, binding_count); + if (texture_descriptor_set == VK_NULL_HANDLE) { + return false; + } } - - // Transition the image into a transfer destination layout, if needed. - // TODO: If blitting, layout should be color attachment. - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = - is_color_source ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - image_barrier.oldLayout = texture->image_layout; - image_barrier.newLayout = - is_color_source ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - image_barrier.image = texture->image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - is_color_source ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - - // Ask the render cache to copy to the resolve texture. - auto edram_base = is_color_source ? color_edram_base : depth_edram_base; - uint32_t src_format = is_color_source ? static_cast(color_format) - : static_cast(depth_format); - VkFilter filter = is_color_source ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; - - XELOGGPU("Resolve RT {:08X} {:08X}({}) -> 0x{:08X} ({}x{}, format: {})", - edram_base, surface_pitch, surface_pitch, copy_dest_base, - copy_dest_pitch, copy_dest_height, texture_info.format_info()->name); - switch (copy_command) { - case CopyCommand::kRaw: - /* - render_cache_->RawCopyToImage(command_buffer, edram_base, - texture->image, texture->image_layout, is_color_source, resolve_offset, - resolve_extent); break; - */ - - case CopyCommand::kConvert: { - /* - if (!is_color_source && copy_regs->copy_dest_info.copy_dest_swap == 0) { - // Depth images are a bit more complicated. Try a blit! - render_cache_->BlitToImage( - command_buffer, edram_base, surface_pitch, resolve_extent.height, - surface_msaa, texture->image, texture->image_layout, - is_color_source, src_format, filter, - {resolve_offset.x, resolve_offset.y, 0}, - {resolve_extent.width, resolve_extent.height, 1}); - break; - } - */ - - // Blit with blitter. - auto view = render_cache_->FindTileView( - edram_base, surface_pitch, surface_msaa, is_color_source, src_format); - if (!view) { - XELOGGPU("Failed to find tile view!"); - break; - } - - // Convert the tile view to a sampled image. - // Put a barrier on the tile view. - VkImageMemoryBarrier tile_image_barrier; - tile_image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - tile_image_barrier.pNext = nullptr; - tile_image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - tile_image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - tile_image_barrier.srcAccessMask = - is_color_source ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - tile_image_barrier.dstAccessMask = - VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT; - tile_image_barrier.oldLayout = view->image_layout; - tile_image_barrier.newLayout = view->image_layout; - tile_image_barrier.image = view->image; - tile_image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - tile_image_barrier.subresourceRange.aspectMask = - is_color_source - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - dfn.vkCmdPipelineBarrier( - command_buffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &tile_image_barrier); - - auto render_pass = - blitter_->GetRenderPass(texture->format, is_color_source); - - // Create a framebuffer containing our image. - if (!texture->framebuffer) { - auto texture_view = texture_cache_->DemandView(texture, 0x688); - - VkFramebufferCreateInfo fb_create_info = { - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - render_pass, - 1, - &texture_view->view, - texture->texture_info.width + 1, - texture->texture_info.height + 1, - 1, - }; - - VkResult res = dfn.vkCreateFramebuffer(device, &fb_create_info, nullptr, - &texture->framebuffer); - CheckResult(res, "vkCreateFramebuffer"); - } - - VkRect2D src_rect = { - {0, 0}, - resolve_extent, - }; - - VkRect2D dst_rect = { - {resolve_offset.x, resolve_offset.y}, - resolve_extent, - }; - - // If the destination rectangle lies outside the window, make it start - // inside. The Xenos does not copy pixel data at any offset in screen - // coordinates. - int32_t dst_adj_x = - std::max(dst_rect.offset.x, -window_offset_x) - dst_rect.offset.x; - int32_t dst_adj_y = - std::max(dst_rect.offset.y, -window_offset_y) - dst_rect.offset.y; - - if (uint32_t(dst_adj_x) > dst_rect.extent.width || - uint32_t(dst_adj_y) > dst_rect.extent.height) { - // No-op? - break; - } - - dst_rect.offset.x += dst_adj_x; - dst_rect.offset.y += dst_adj_y; - dst_rect.extent.width -= dst_adj_x; - dst_rect.extent.height -= dst_adj_y; - src_rect.extent.width -= dst_adj_x; - src_rect.extent.height -= dst_adj_y; - - VkViewport viewport = { - 0.f, 0.f, float(copy_dest_pitch), float(copy_dest_height), 0.f, 1.f, - }; - - uint32_t scissor_tl_x = window_regs->window_scissor_tl.tl_x; - uint32_t scissor_br_x = window_regs->window_scissor_br.br_x; - uint32_t scissor_tl_y = window_regs->window_scissor_tl.tl_y; - uint32_t scissor_br_y = window_regs->window_scissor_br.br_y; - - // Clamp the values to destination dimensions. - scissor_tl_x = std::min(scissor_tl_x, copy_dest_pitch); - scissor_br_x = std::min(scissor_br_x, copy_dest_pitch); - scissor_tl_y = std::min(scissor_tl_y, copy_dest_height); - scissor_br_y = std::min(scissor_br_y, copy_dest_height); - - VkRect2D scissor = { - {int32_t(scissor_tl_x), int32_t(scissor_tl_y)}, - {scissor_br_x - scissor_tl_x, scissor_br_y - scissor_tl_y}, - }; - - blitter_->BlitTexture2D( - command_buffer, current_batch_fence_, - is_color_source ? view->image_view : view->image_view_depth, src_rect, - view->GetSize(), texture->format, dst_rect, - {copy_dest_pitch, copy_dest_height}, texture->framebuffer, viewport, - scissor, filter, is_color_source, - copy_regs->copy_dest_info.copy_dest_swap != 0); - - // Pull the tile view back to a color/depth attachment. - std::swap(tile_image_barrier.srcAccessMask, - tile_image_barrier.dstAccessMask); - std::swap(tile_image_barrier.oldLayout, tile_image_barrier.newLayout); - dfn.vkCmdPipelineBarrier(command_buffer, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, - nullptr, 0, nullptr, 1, &tile_image_barrier); - } break; - - case CopyCommand::kConstantOne: - case CopyCommand::kNull: - assert_always(); - break; - } - - // And pull it back from a transfer destination. - image_barrier.srcAccessMask = image_barrier.dstAccessMask; - image_barrier.dstAccessMask = - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT; - std::swap(image_barrier.newLayout, image_barrier.oldLayout); - dfn.vkCmdPipelineBarrier(command_buffer, - is_color_source - ? VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - : VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &image_barrier); - - // Perform any requested clears. - uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; - uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; - uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; - assert_true(copy_color_clear == copy_color_clear_low); - - if (color_clear_enabled) { - // If color clear is enabled, we can only clear a selected color target! - assert_true(is_color_source); - - // TODO(benvanik): verify color order. - float color[] = {((copy_color_clear >> 0) & 0xFF) / 255.0f, - ((copy_color_clear >> 8) & 0xFF) / 255.0f, - ((copy_color_clear >> 16) & 0xFF) / 255.0f, - ((copy_color_clear >> 24) & 0xFF) / 255.0f}; - - // TODO(DrChat): Do we know the surface height at this point? - render_cache_->ClearEDRAMColor(command_buffer, color_edram_base, - color_format, surface_pitch, - resolve_extent.height, surface_msaa, color); - } - - if (depth_clear_enabled) { - float depth = - (copy_depth_clear & 0xFFFFFF00) / static_cast(0xFFFFFF00); - uint8_t stencil = copy_depth_clear & 0xFF; - - // TODO(DrChat): Do we know the surface height at this point? - render_cache_->ClearEDRAMDepthStencil( - command_buffer, depth_edram_base, depth_format, surface_pitch, - resolve_extent.height, surface_msaa, depth, stencil); - } - + UsedTextureTransientDescriptorSet& used_texture_descriptor_set = + texture_transient_descriptor_sets_used_.emplace_back(); + used_texture_descriptor_set.frame = frame_current_; + used_texture_descriptor_set.layout = texture_descriptor_set_layout_key; + used_texture_descriptor_set.set = texture_descriptor_set; + write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_out.pNext = nullptr; + write_descriptor_set_out.dstSet = texture_descriptor_set; + write_descriptor_set_out.dstBinding = 0; + write_descriptor_set_out.dstArrayElement = 0; + write_descriptor_set_out.descriptorCount = binding_count; + write_descriptor_set_out.descriptorType = + is_samplers ? VK_DESCRIPTOR_TYPE_SAMPLER + : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + write_descriptor_set_out.pImageInfo = image_info; + write_descriptor_set_out.pBufferInfo = nullptr; + write_descriptor_set_out.pTexelBufferView = nullptr; return true; } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 062ef0f61..1186310f2 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,69 +10,264 @@ #ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ #define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ -#include +#include +#include #include -#include +#include #include #include -#include -#include -#include #include +#include #include -#include "xenia/base/threading.h" +#include "xenia/base/assert.h" +#include "xenia/base/hash.h" #include "xenia/gpu/command_processor.h" -#include "xenia/gpu/register_file.h" -#include "xenia/gpu/vulkan/buffer_cache.h" -#include "xenia/gpu/vulkan/render_cache.h" +#include "xenia/gpu/draw_util.h" +#include "xenia/gpu/registers.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" #include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include "xenia/gpu/vulkan/vulkan_primitive_processor.h" +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/gpu/vulkan/vulkan_texture_cache.h" #include "xenia/gpu/xenos.h" -#include "xenia/kernel/xthread.h" -#include "xenia/memory.h" -#include "xenia/ui/vulkan/blitter.h" -#include "xenia/ui/vulkan/fenced_pools.h" +#include "xenia/kernel/kernel_state.h" +#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h" +#include "xenia/ui/vulkan/vulkan_presenter.h" #include "xenia/ui/vulkan/vulkan_provider.h" -#include "xenia/ui/vulkan/vulkan_submission_tracker.h" -#include "xenia/ui/vulkan/vulkan_util.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" namespace xe { namespace gpu { namespace vulkan { -class VulkanTextureCache; - class VulkanCommandProcessor : public CommandProcessor { public: + // Single-descriptor layouts for use within a single frame. + enum class SingleTransientDescriptorLayout { + kUniformBufferGuestVertex, + kUniformBufferFragment, + kUniformBufferGuestShader, + kUniformBufferSystemConstants, + kUniformBufferCompute, + kStorageBufferCompute, + kCount, + }; + + class ScratchBufferAcquisition { + public: + explicit ScratchBufferAcquisition() = default; + explicit ScratchBufferAcquisition(VulkanCommandProcessor& command_processor, + VkBuffer buffer, + VkPipelineStageFlags stage_mask, + VkAccessFlags access_mask) + : command_processor_(&command_processor), + buffer_(buffer), + stage_mask_(stage_mask), + access_mask_(access_mask) {} + + ScratchBufferAcquisition(const ScratchBufferAcquisition& acquisition) = + delete; + ScratchBufferAcquisition& operator=( + const ScratchBufferAcquisition& acquisition) = delete; + + ScratchBufferAcquisition(ScratchBufferAcquisition&& acquisition) { + command_processor_ = acquisition.command_processor_; + buffer_ = acquisition.buffer_; + stage_mask_ = acquisition.stage_mask_; + access_mask_ = acquisition.access_mask_; + acquisition.command_processor_ = nullptr; + acquisition.buffer_ = VK_NULL_HANDLE; + acquisition.stage_mask_ = 0; + acquisition.access_mask_ = 0; + } + ScratchBufferAcquisition& operator=( + ScratchBufferAcquisition&& acquisition) { + if (this == &acquisition) { + return *this; + } + command_processor_ = acquisition.command_processor_; + buffer_ = acquisition.buffer_; + stage_mask_ = acquisition.stage_mask_; + access_mask_ = acquisition.access_mask_; + acquisition.command_processor_ = nullptr; + acquisition.buffer_ = VK_NULL_HANDLE; + acquisition.stage_mask_ = 0; + acquisition.access_mask_ = 0; + return *this; + } + + ~ScratchBufferAcquisition() { + if (buffer_ != VK_NULL_HANDLE) { + assert_true(command_processor_->scratch_buffer_used_); + assert_true(command_processor_->scratch_buffer_ == buffer_); + command_processor_->scratch_buffer_last_stage_mask_ = stage_mask_; + command_processor_->scratch_buffer_last_access_mask_ = access_mask_; + command_processor_->scratch_buffer_last_usage_submission_ = + command_processor_->GetCurrentSubmission(); + command_processor_->scratch_buffer_used_ = false; + } + } + + // VK_NULL_HANDLE if failed to acquire or if moved. + VkBuffer buffer() const { return buffer_; } + + VkPipelineStageFlags GetStageMask() const { return stage_mask_; } + VkPipelineStageFlags SetStageMask(VkPipelineStageFlags new_stage_mask) { + VkPipelineStageFlags old_stage_mask = stage_mask_; + stage_mask_ = new_stage_mask; + return old_stage_mask; + } + VkAccessFlags GetAccessMask() const { return access_mask_; } + VkAccessFlags SetAccessMask(VkAccessFlags new_access_mask) { + VkAccessFlags old_access_mask = access_mask_; + access_mask_ = new_access_mask; + return old_access_mask; + } + + private: + VulkanCommandProcessor* command_processor_ = nullptr; + VkBuffer buffer_ = VK_NULL_HANDLE; + VkPipelineStageFlags stage_mask_ = 0; + VkAccessFlags access_mask_ = 0; + }; + VulkanCommandProcessor(VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state); - ~VulkanCommandProcessor() override; + ~VulkanCommandProcessor(); - void RequestFrameTrace(const std::filesystem::path& root_path) override; - void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; - void RestoreEdramSnapshot(const void* snapshot) override; void ClearCaches() override; + void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; + + void RestoreEdramSnapshot(const void* snapshot) override; + ui::vulkan::VulkanProvider& GetVulkanProvider() const { return *static_cast( graphics_system_->provider()); } - RenderCache* render_cache() { return render_cache_.get(); } + // Returns the deferred drawing command list for the currently open + // submission. + DeferredCommandBuffer& deferred_command_buffer() { + assert_true(submission_open_); + return deferred_command_buffer_; + } - private: + bool submission_open() const { return submission_open_; } + uint64_t GetCurrentSubmission() const { + return submission_completed_ + + uint64_t(submissions_in_flight_fences_.size()) + 1; + } + uint64_t GetCompletedSubmission() const { return submission_completed_; } + + // Sparse binds are: + // - In a single submission, all submitted in one vkQueueBindSparse. + // - Sent to the queue without waiting for a semaphore. + // Thus, multiple sparse binds between the completed and the current + // submission, and within one submission, must not touch any overlapping + // memory regions. + void SparseBindBuffer(VkBuffer buffer, uint32_t bind_count, + const VkSparseMemoryBind* binds, + VkPipelineStageFlags wait_stage_mask); + + uint64_t GetCurrentFrame() const { return frame_current_; } + uint64_t GetCompletedFrame() const { return frame_completed_; } + + // Submission must be open to insert barriers. If no pipeline stages access + // the resource in a synchronization scope, the stage masks should be 0 (top / + // bottom of pipe should be specified only if explicitly needed). Returning + // true if the barrier has actually been inserted and not dropped. + bool PushBufferMemoryBarrier( + VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED, + uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED, + bool skip_if_equal = true); + bool PushImageMemoryBarrier( + VkImage image, const VkImageSubresourceRange& subresource_range, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + VkImageLayout old_layout, VkImageLayout new_layout, + uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED, + uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED, + bool skip_if_equal = true); + // Returns whether any barriers have been submitted - if true is returned, the + // render pass will also be closed. + bool SubmitBarriers(bool force_end_render_pass); + + // If not started yet, begins a render pass from the render target cache. + // Submission must be open. + void SubmitBarriersAndEnterRenderTargetCacheRenderPass( + VkRenderPass render_pass, + const VulkanRenderTargetCache::Framebuffer* framebuffer); + // Must be called before doing anything outside the render pass scope, + // including adding pipeline barriers that are not a part of the render pass + // scope. Submission must be open. + void EndRenderPass(); + + VkDescriptorSetLayout GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout transient_descriptor_layout) const { + return descriptor_set_layouts_single_transient_[size_t( + transient_descriptor_layout)]; + } + // A frame must be open. + VkDescriptorSet AllocateSingleTransientDescriptor( + SingleTransientDescriptorLayout transient_descriptor_layout); + // Allocates a descriptor, space in the uniform buffer pool, and fills the + // VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it. + // Returns null in case of failure. + uint8_t* WriteTransientUniformBufferBinding( + size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, + VkDescriptorBufferInfo& descriptor_buffer_info_out, + VkWriteDescriptorSet& write_descriptor_set_out); + uint8_t* WriteTransientUniformBufferBinding( + size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, + VkDescriptorSet& descriptor_set_out); + + // The returned reference is valid until a cache clear. + VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_samplers, + bool is_vertex, + size_t binding_count); + // The returned reference is valid until a cache clear. + const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout( + size_t texture_count_pixel, size_t sampler_count_pixel, + size_t texture_count_vertex, size_t sampler_count_vertex); + + // Returns a single temporary GPU-side buffer within a submission for tasks + // like texture untiling and resolving. May push a buffer memory barrier into + // the initial usage. Submission must be open. + ScratchBufferAcquisition AcquireScratchGpuBuffer( + VkDeviceSize size, VkPipelineStageFlags initial_stage_mask, + VkAccessFlags initial_access_mask); + + // Binds a graphics pipeline for host-specific purposes, invalidating the + // affected state. keep_dynamic_* must be false (to invalidate the dynamic + // state after binding the pipeline with the same state being static, or if + // the caller changes the dynamic state bypassing the VulkanCommandProcessor) + // unless the caller has these state variables as dynamic and uses the + // tracking in VulkanCommandProcessor to modify them. + void BindExternalGraphicsPipeline(VkPipeline pipeline, + bool keep_dynamic_depth_bias = false, + bool keep_dynamic_blend_constants = false, + bool keep_dynamic_stencil_mask_ref = false); + void BindExternalComputePipeline(VkPipeline pipeline); + void SetViewport(const VkViewport& viewport); + void SetScissor(const VkRect2D& scissor); + + protected: bool SetupContext() override; void ShutdownContext() override; - void MakeCoherent() override; - void WriteRegister(uint32_t index, uint32_t value) override; - void BeginFrame(); - void EndFrame(); + void OnGammaRamp256EntryTableValueWritten() override; + void OnGammaRampPWLValueWritten() override; void IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) override; @@ -81,52 +276,459 @@ class VulkanCommandProcessor : public CommandProcessor { const uint32_t* host_address, uint32_t dword_count) override; - bool IssueDraw(xenos::PrimitiveType primitive_type, uint32_t index_count, + bool IssueDraw(xenos::PrimitiveType prim_type, uint32_t index_count, IndexBufferInfo* index_buffer_info, bool major_mode_explicit) override; - bool PopulateConstants(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); - bool PopulateIndexBuffer(VkCommandBuffer command_buffer, - IndexBufferInfo* index_buffer_info); - bool PopulateVertexBuffers(VkCommandBuffer command_buffer, - VkCommandBuffer setup_buffer, - VulkanShader* vertex_shader); - bool PopulateSamplers(VkCommandBuffer command_buffer, - VkCommandBuffer setup_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); bool IssueCopy() override; - uint64_t dirty_float_constants_ = 0; // Dirty float constants in blocks of 4 - uint8_t dirty_bool_constants_ = 0; - uint32_t dirty_loop_constants_ = 0; - uint8_t dirty_gamma_constants_ = 0; + void InitializeTrace() override; - uint32_t coher_base_vc_ = 0; - uint32_t coher_size_vc_ = 0; + private: + struct CommandBuffer { + VkCommandPool pool; + VkCommandBuffer buffer; + }; + + struct SparseBufferBind { + VkBuffer buffer; + size_t bind_offset; + uint32_t bind_count; + }; + + union TextureDescriptorSetLayoutKey { + uint32_t key; + struct { + // 0 - sampled image descriptors, 1 - sampler descriptors. + uint32_t is_samplers : 1; + uint32_t is_vertex : 1; + // For 0, use descriptor_set_layout_empty_ instead as these are owning + // references. + uint32_t binding_count : 30; + }; + + TextureDescriptorSetLayoutKey() : key(0) { + static_assert_size(*this, sizeof(key)); + } + + struct Hasher { + size_t operator()(const TextureDescriptorSetLayoutKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const TextureDescriptorSetLayoutKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const TextureDescriptorSetLayoutKey& other_key) const { + return !(*this == other_key); + } + }; + + union PipelineLayoutKey { + uint64_t key; + struct { + // Pixel textures in the low bits since those are varied much more + // commonly. + uint16_t texture_count_pixel; + uint16_t sampler_count_pixel; + uint16_t texture_count_vertex; + uint16_t sampler_count_vertex; + }; + + PipelineLayoutKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const PipelineLayoutKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const PipelineLayoutKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const PipelineLayoutKey& other_key) const { + return !(*this == other_key); + } + }; + + class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider { + public: + explicit PipelineLayout( + VkPipelineLayout pipeline_layout, + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref, + VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref, + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref, + VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref) + : pipeline_layout_(pipeline_layout), + descriptor_set_layout_textures_vertex_ref_( + descriptor_set_layout_textures_vertex_ref), + descriptor_set_layout_samplers_vertex_ref_( + descriptor_set_layout_samplers_vertex_ref), + descriptor_set_layout_textures_pixel_ref_( + descriptor_set_layout_textures_pixel_ref), + descriptor_set_layout_samplers_pixel_ref_( + descriptor_set_layout_samplers_pixel_ref) {} + VkPipelineLayout GetPipelineLayout() const override { + return pipeline_layout_; + } + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const { + return descriptor_set_layout_textures_vertex_ref_; + } + VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref() const { + return descriptor_set_layout_samplers_vertex_ref_; + } + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const { + return descriptor_set_layout_textures_pixel_ref_; + } + VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref() const { + return descriptor_set_layout_samplers_pixel_ref_; + } + + private: + VkPipelineLayout pipeline_layout_; + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_; + VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref_; + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_; + VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref_; + }; + + struct UsedSingleTransientDescriptor { + uint64_t frame; + SingleTransientDescriptorLayout layout; + VkDescriptorSet set; + }; + + struct UsedTextureTransientDescriptorSet { + uint64_t frame; + TextureDescriptorSetLayoutKey layout; + VkDescriptorSet set; + }; + + enum SwapApplyGammaDescriptorSet : uint32_t { + kSwapApplyGammaDescriptorSetRamp, + kSwapApplyGammaDescriptorSetSource, + + kSwapApplyGammaDescriptorSetCount, + }; + + // Framebuffer for the current presenter's guest output image revision, and + // its usage tracking. + struct SwapFramebuffer { + VkFramebuffer framebuffer = VK_NULL_HANDLE; + uint64_t version = UINT64_MAX; + uint64_t last_submission = 0; + }; + + // BeginSubmission and EndSubmission may be called at any time. If there's an + // open non-frame submission, BeginSubmission(true) will promote it to a + // frame. EndSubmission(true) will close the frame no matter whether the + // submission has already been closed. + // Unlike on Direct3D 12, submission boundaries do not imply any memory + // barriers aside from an incoming host write (but not outgoing host read) + // dependency. + + // Rechecks submission number and reclaims per-submission resources. Pass 0 as + // the submission to await to simply check status, or pass + // GetCurrentSubmission() to wait for all queue operations to be completed. + void CheckSubmissionFenceAndDeviceLoss(uint64_t await_submission); + // If is_guest_command is true, a new full frame - with full cleanup of + // resources and, if needed, starting capturing - is opened if pending (as + // opposed to simply resuming after mid-frame synchronization). Returns + // whether a submission is open currently and the device is not lost. + bool BeginSubmission(bool is_guest_command); + // If is_swap is true, a full frame is closed - with, if needed, cache + // clearing and stopping capturing. Returns whether the submission was done + // successfully, if it has failed, leaves it open. + bool EndSubmission(bool is_swap); + bool AwaitAllQueueOperationsCompletion() { + CheckSubmissionFenceAndDeviceLoss(GetCurrentSubmission()); + return !submission_open_ && submissions_in_flight_fences_.empty(); + } + + void ClearTransientDescriptorPools(); + + void SplitPendingBarrier(); + + void DestroyScratchBuffer(); + + void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, + bool primitive_polygonal, + reg::RB_DEPTHCONTROL normalized_depth_control); + void UpdateSystemConstantValues(bool primitive_polygonal, + xenos::Endian index_endian, + const draw_util::ViewportInfo& viewport_info, + uint32_t used_texture_mask); + bool UpdateBindings(const VulkanShader* vertex_shader, + const VulkanShader* pixel_shader); + // Allocates a descriptor set and fills the VkWriteDescriptorSet structure. + // The descriptor set layout must be the one for the given is_samplers, + // is_vertex, binding_count (from GetTextureDescriptorSetLayout - may be + // already available at the moment of the call, no need to locate it again). + // Returns whether the allocation was successful. + bool WriteTransientTextureBindings( + bool is_samplers, bool is_vertex, uint32_t binding_count, + VkDescriptorSetLayout descriptor_set_layout, + const VkDescriptorImageInfo* image_info, + VkWriteDescriptorSet& write_descriptor_set_out); + + bool device_lost_ = false; - bool capturing_ = false; - bool trace_requested_ = false; bool cache_clear_requested_ = false; - std::unique_ptr buffer_cache_; + // Host shader types that guest shaders can be translated into - they can + // access the shared memory (via vertex fetch, memory export, or manual index + // buffer reading) and textures. + VkPipelineStageFlags guest_shader_pipeline_stages_ = 0; + VkShaderStageFlags guest_shader_vertex_stages_ = 0; + + std::vector fences_free_; + std::vector semaphores_free_; + + bool submission_open_ = false; + uint64_t submission_completed_ = 0; + // In case vkQueueSubmit fails after something like a successful + // vkQueueBindSparse, to wait correctly on the next attempt. + std::vector current_submission_wait_semaphores_; + std::vector current_submission_wait_stage_masks_; + std::vector submissions_in_flight_fences_; + std::deque> + submissions_in_flight_semaphores_; + + static constexpr uint32_t kMaxFramesInFlight = 3; + bool frame_open_ = false; + // Guest frame index, since some transient resources can be reused across + // submissions. Values updated in the beginning of a frame. + uint64_t frame_current_ = 1; + uint64_t frame_completed_ = 0; + // Submission indices of frames that have already been submitted. + uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {}; + + // , sorted by the submission number. + std::deque> destroy_memory_; + std::deque> destroy_buffers_; + std::deque> destroy_framebuffers_; + + std::vector command_buffers_writable_; + std::deque> command_buffers_submitted_; + DeferredCommandBuffer deferred_command_buffer_; + + std::vector sparse_memory_binds_; + std::vector sparse_buffer_binds_; + // SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer + // on submission (because pBinds should point to a place in std::vector, but + // it may be reallocated). + std::vector sparse_buffer_bind_infos_temp_; + VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0; + + // Temporary storage with reusable memory for creating descriptor set layouts. + std::vector descriptor_set_layout_bindings_; + // Temporary storage with reusable memory for writing image and sampler + // descriptors. + std::vector descriptor_write_image_info_; + + std::unique_ptr uniform_buffer_pool_; + + // Descriptor set layouts used by different shaders. + VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = + VK_NULL_HANDLE; + std::array + descriptor_set_layouts_single_transient_{}; + + // Descriptor set layouts are referenced by pipeline_layouts_. + std::unordered_map + descriptor_set_layouts_textures_; + // Pipeline layouts are referenced by VulkanPipelineCache. + std::unordered_map + pipeline_layouts_; + + ui::vulkan::SingleTypeDescriptorSetAllocator + transient_descriptor_allocator_uniform_buffer_; + ui::vulkan::SingleTypeDescriptorSetAllocator + transient_descriptor_allocator_storage_buffer_; + std::deque single_transient_descriptors_used_; + std::array, + size_t(SingleTransientDescriptorLayout::kCount)> + single_transient_descriptors_free_; + + ui::vulkan::SingleTypeDescriptorSetAllocator + transient_descriptor_allocator_sampled_image_; + ui::vulkan::SingleTypeDescriptorSetAllocator + transient_descriptor_allocator_sampler_; + std::deque + texture_transient_descriptor_sets_used_; + std::unordered_map, + TextureDescriptorSetLayoutKey::Hasher> + texture_transient_descriptor_sets_free_; + + std::unique_ptr shared_memory_; + + std::unique_ptr primitive_processor_; + + std::unique_ptr render_target_cache_; + std::unique_ptr pipeline_cache_; - std::unique_ptr render_cache_; + std::unique_ptr texture_cache_; - std::unique_ptr blitter_; - std::unique_ptr command_buffer_pool_; + VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE; + VkDescriptorSet shared_memory_and_edram_descriptor_set_; - bool frame_open_ = false; - const RenderState* current_render_state_ = nullptr; - VkCommandBuffer current_command_buffer_ = nullptr; - VkCommandBuffer current_setup_buffer_ = nullptr; - VkFence current_batch_fence_; + // Bytes 0x0...0x3FF - 256-entry gamma ramp table with B10G10R10X2 data (read + // as R10G10B10X2 with swizzle). + // Bytes 0x400...0x9FF - 128-entry PWL R16G16 gamma ramp (R - base, G - delta, + // low 6 bits of each are zero, 3 elements per entry). + // kMaxFramesInFlight pairs of gamma ramps if in host-visible memory and + // uploaded directly, one otherwise. + VkDeviceMemory gamma_ramp_buffer_memory_ = VK_NULL_HANDLE; + VkBuffer gamma_ramp_buffer_ = VK_NULL_HANDLE; + // kMaxFramesInFlight pairs, only when the gamma ramp buffer is not + // host-visible. + VkDeviceMemory gamma_ramp_upload_buffer_memory_ = VK_NULL_HANDLE; + VkBuffer gamma_ramp_upload_buffer_ = VK_NULL_HANDLE; + VkDeviceSize gamma_ramp_upload_memory_size_; + uint32_t gamma_ramp_upload_memory_type_; + // Mapping of either gamma_ramp_buffer_memory_ (if it's host-visible) or + // gamma_ramp_upload_buffer_memory_ (otherwise). + void* gamma_ramp_upload_mapping_; + std::array gamma_ramp_buffer_views_{}; + // UINT32_MAX if outdated. + uint32_t gamma_ramp_256_entry_table_current_frame_ = UINT32_MAX; + uint32_t gamma_ramp_pwl_current_frame_ = UINT32_MAX; - ui::vulkan::VulkanSubmissionTracker swap_submission_tracker_; - VkFramebuffer swap_framebuffer_ = VK_NULL_HANDLE; - uint64_t swap_framebuffer_version_ = UINT64_MAX; + VkDescriptorSetLayout swap_descriptor_set_layout_sampled_image_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout swap_descriptor_set_layout_uniform_texel_buffer_ = + VK_NULL_HANDLE; + + // Descriptor pool for allocating descriptors needed for presentation, such as + // the destination images and the gamma ramps. + VkDescriptorPool swap_descriptor_pool_ = VK_NULL_HANDLE; + // Interleaved 256-entry table and PWL texel buffer descriptors. + // kMaxFramesInFlight pairs of gamma ramps if in host-visible memory and + // uploaded directly, one otherwise. + std::array + swap_descriptors_gamma_ramp_; + // Sampled images. + std::array swap_descriptors_source_; + + VkPipelineLayout swap_apply_gamma_pipeline_layout_ = VK_NULL_HANDLE; + // Has no dependencies on specific pipeline stages on both ends to simplify + // use in different scenarios with different pipelines - use explicit barriers + // for synchronization. + VkRenderPass swap_apply_gamma_render_pass_ = VK_NULL_HANDLE; + VkPipeline swap_apply_gamma_256_entry_table_pipeline_ = VK_NULL_HANDLE; + VkPipeline swap_apply_gamma_pwl_pipeline_ = VK_NULL_HANDLE; + + std::array + swap_framebuffers_; + + // Pending pipeline barriers. + std::vector pending_barriers_buffer_memory_barriers_; + std::vector pending_barriers_image_memory_barriers_; + struct PendingBarrier { + VkPipelineStageFlags src_stage_mask = 0; + VkPipelineStageFlags dst_stage_mask = 0; + size_t buffer_memory_barriers_offset = 0; + size_t image_memory_barriers_offset = 0; + }; + std::vector pending_barriers_; + PendingBarrier current_pending_barrier_; + + // GPU-local scratch buffer. + static constexpr VkDeviceSize kScratchBufferSizeIncrement = 16 * 1024 * 1024; + VkDeviceMemory scratch_buffer_memory_ = VK_NULL_HANDLE; + VkBuffer scratch_buffer_ = VK_NULL_HANDLE; + VkDeviceSize scratch_buffer_size_ = 0; + VkPipelineStageFlags scratch_buffer_last_stage_mask_ = 0; + VkAccessFlags scratch_buffer_last_access_mask_ = 0; + uint64_t scratch_buffer_last_usage_submission_ = 0; + bool scratch_buffer_used_ = false; + + // The current dynamic state of the graphics pipeline bind point. Note that + // binding any pipeline to the bind point with static state (even if it's + // unused, like depth bias being disabled, but the values themselves still not + // declared as dynamic in the pipeline) invalidates such dynamic state. + VkViewport dynamic_viewport_; + VkRect2D dynamic_scissor_; + float dynamic_depth_bias_constant_factor_; + float dynamic_depth_bias_slope_factor_; + float dynamic_blend_constants_[4]; + // The stencil values are pre-initialized (to D3D11_DEFAULT_STENCIL_*, and the + // initial values for front and back are the same for portability subset + // safety) because they're updated conditionally to avoid changing the back + // face values when stencil is disabled and the primitive type is changed + // between polygonal and non-polygonal. + uint32_t dynamic_stencil_compare_mask_front_ = UINT8_MAX; + uint32_t dynamic_stencil_compare_mask_back_ = UINT8_MAX; + uint32_t dynamic_stencil_write_mask_front_ = UINT8_MAX; + uint32_t dynamic_stencil_write_mask_back_ = UINT8_MAX; + uint32_t dynamic_stencil_reference_front_ = 0; + uint32_t dynamic_stencil_reference_back_ = 0; + bool dynamic_viewport_update_needed_; + bool dynamic_scissor_update_needed_; + bool dynamic_depth_bias_update_needed_; + bool dynamic_blend_constants_update_needed_; + bool dynamic_stencil_compare_mask_front_update_needed_; + bool dynamic_stencil_compare_mask_back_update_needed_; + bool dynamic_stencil_write_mask_front_update_needed_; + bool dynamic_stencil_write_mask_back_update_needed_; + bool dynamic_stencil_reference_front_update_needed_; + bool dynamic_stencil_reference_back_update_needed_; + + // Currently used samplers. + std::vector> + current_samplers_vertex_; + std::vector> + current_samplers_pixel_; + + // Cache render pass currently started in the command buffer with the + // framebuffer. + VkRenderPass current_render_pass_; + const VulkanRenderTargetCache::Framebuffer* current_framebuffer_; + + // Currently bound graphics pipeline, either from the pipeline cache (with + // potentially deferred creation - current_external_graphics_pipeline_ is + // VK_NULL_HANDLE in this case) or a non-Xenos one + // (current_guest_graphics_pipeline_ is VK_NULL_HANDLE in this case). + // TODO(Triang3l): Change to a deferred compilation handle. + VkPipeline current_guest_graphics_pipeline_; + VkPipeline current_external_graphics_pipeline_; + VkPipeline current_external_compute_pipeline_; + + // Pipeline layout of the current guest graphics pipeline. + const PipelineLayout* current_guest_graphics_pipeline_layout_; + VkDescriptorSet current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetCount]; + // Whether descriptor sets in current_graphics_descriptor_sets_ point to + // up-to-date data. + uint32_t current_graphics_descriptor_set_values_up_to_date_; + // Whether the descriptor sets currently bound to the command buffer - only + // low bits for the descriptor set layouts that remained the same are kept + // when changing the pipeline layout. May be out of sync with + // current_graphics_descriptor_set_values_up_to_date_, but should be ensured + // to be a subset of it at some point when it becomes important; bits for + // non-existent descriptor set layouts may also be set, but need to be ignored + // when they start to matter. + uint32_t current_graphics_descriptor_sets_bound_up_to_date_; + static_assert( + SpirvShaderTranslator::kDescriptorSetCount <= + sizeof(current_graphics_descriptor_set_values_up_to_date_) * CHAR_BIT, + "Bit fields storing descriptor set validity must be large enough"); + static_assert( + SpirvShaderTranslator::kDescriptorSetCount <= + sizeof(current_graphics_descriptor_sets_bound_up_to_date_) * CHAR_BIT, + "Bit fields storing descriptor set validity must be large enough"); + + // Float constant usage masks of the last draw call. + uint64_t current_float_constant_map_vertex_[4]; + uint64_t current_float_constant_map_pixel_[4]; + + // System shader constants. + SpirvShaderTranslator::SystemConstants system_constants_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc deleted file mode 100644 index 1e27a4e9a..000000000 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc +++ /dev/null @@ -1,16 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" - -DEFINE_bool(vulkan_renderdoc_capture_all, false, - "Capture everything with RenderDoc.", "Vulkan"); -DEFINE_bool(vulkan_native_msaa, false, "Use native MSAA", "Vulkan"); -DEFINE_bool(vulkan_dump_disasm, false, - "Dump shader disassembly. NVIDIA only supported.", "Vulkan"); diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h deleted file mode 100644 index 153bc9bc5..000000000 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h +++ /dev/null @@ -1,20 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ -#define XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ - -#define FINE_GRAINED_DRAW_SCOPES 1 -#include "xenia/base/cvar.h" - -DECLARE_bool(vulkan_renderdoc_capture_all); -DECLARE_bool(vulkan_native_msaa); -DECLARE_bool(vulkan_dump_disasm); - -#endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.h b/src/xenia/gpu/vulkan/vulkan_graphics_system.h index 2433703f2..ae81e144c 100644 --- a/src/xenia/gpu/vulkan/vulkan_graphics_system.h +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.h @@ -26,7 +26,9 @@ class VulkanGraphicsSystem : public GraphicsSystem { static bool IsAvailable() { return true; } - std::string name() const override { return "Vulkan - obsolete"; } + std::string name() const override { + return "Vulkan - HEAVILY INCOMPLETE, early development"; + } X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state, ui::WindowedAppContext* app_context, diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index e1832a02c..39decc091 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -2,1634 +2,1986 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include +#include +#include +#include +#include +#include + +#include "third_party/glslang/SPIRV/SpvBuilder.h" +#include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" -#include "xenia/base/memory.h" #include "xenia/base/profiling.h" #include "xenia/base/xxhash.h" +#include "xenia/gpu/draw_util.h" #include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/registers.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan_util.h" -#include -#include - namespace xe { namespace gpu { namespace vulkan { -using xe::ui::vulkan::util::CheckResult; - -// Generated with `xb buildshaders`. -namespace shaders { -#include "xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/dummy_ps.h" -#include "xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/line_quad_list_gs.h" -#include "xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/point_list_gs.h" -#include "xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/quad_list_gs.h" -#include "xenia/gpu/vulkan/shaders/bytecode/vulkan_spirv/rect_list_gs.h" -} // namespace shaders - VulkanPipelineCache::VulkanPipelineCache( - RegisterFile* register_file, const ui::vulkan::VulkanProvider& provider) - : register_file_(register_file), provider_(provider) { - shader_translator_.reset(new SpirvShaderTranslator()); -} + VulkanCommandProcessor& command_processor, + const RegisterFile& register_file, + VulkanRenderTargetCache& render_target_cache, + VkShaderStageFlags guest_shader_vertex_stages) + : command_processor_(command_processor), + register_file_(register_file), + render_target_cache_(render_target_cache), + guest_shader_vertex_stages_(guest_shader_vertex_stages) {} VulkanPipelineCache::~VulkanPipelineCache() { Shutdown(); } -VkResult VulkanPipelineCache::Initialize( - VkDescriptorSetLayout uniform_descriptor_set_layout, - VkDescriptorSetLayout texture_descriptor_set_layout, - VkDescriptorSetLayout vertex_descriptor_set_layout) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status; +bool VulkanPipelineCache::Initialize() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); - // Initialize the shared driver pipeline cache. - // We'll likely want to serialize this and reuse it, if that proves to be - // useful. If the shaders are expensive and this helps we could do it per - // game, otherwise a single shared cache for render state/etc. - VkPipelineCacheCreateInfo pipeline_cache_info; - pipeline_cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; - pipeline_cache_info.pNext = nullptr; - pipeline_cache_info.flags = 0; - pipeline_cache_info.initialDataSize = 0; - pipeline_cache_info.pInitialData = nullptr; - status = dfn.vkCreatePipelineCache(device, &pipeline_cache_info, nullptr, - &pipeline_cache_); - if (status != VK_SUCCESS) { - return status; - } + shader_translator_ = std::make_unique( + SpirvShaderTranslator::Features(provider)); - // Descriptors used by the pipelines. - // These are the only ones we can ever bind. - VkDescriptorSetLayout set_layouts[] = { - // Per-draw constant register uniforms. - uniform_descriptor_set_layout, - // All texture bindings. - texture_descriptor_set_layout, - // Vertex bindings. - vertex_descriptor_set_layout, - }; - - // Push constants used for draw parameters. - // We need to keep these under 128b across all stages. - // TODO(benvanik): split between the stages? - VkPushConstantRange push_constant_ranges[1]; - push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | - VK_SHADER_STAGE_GEOMETRY_BIT | - VK_SHADER_STAGE_FRAGMENT_BIT; - push_constant_ranges[0].offset = 0; - push_constant_ranges[0].size = kSpirvPushConstantsSize; - - // Shared pipeline layout. - VkPipelineLayoutCreateInfo pipeline_layout_info; - pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_info.pNext = nullptr; - pipeline_layout_info.flags = 0; - pipeline_layout_info.setLayoutCount = - static_cast(xe::countof(set_layouts)); - pipeline_layout_info.pSetLayouts = set_layouts; - pipeline_layout_info.pushConstantRangeCount = - static_cast(xe::countof(push_constant_ranges)); - pipeline_layout_info.pPushConstantRanges = push_constant_ranges; - status = dfn.vkCreatePipelineLayout(device, &pipeline_layout_info, nullptr, - &pipeline_layout_); - if (status != VK_SUCCESS) { - return status; - } - - // Initialize our shared geometry shaders. - // These will be used as needed to emulate primitive types Vulkan doesn't - // support. - VkShaderModuleCreateInfo shader_module_info; - shader_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_module_info.pNext = nullptr; - shader_module_info.flags = 0; - shader_module_info.codeSize = sizeof(shaders::line_quad_list_gs); - shader_module_info.pCode = shaders::line_quad_list_gs; - status = dfn.vkCreateShaderModule(device, &shader_module_info, nullptr, - &geometry_shaders_.line_quad_list); - if (status != VK_SUCCESS) { - return status; - } - provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE, - uint64_t(geometry_shaders_.line_quad_list), - "S(g): Line Quad List"); - - shader_module_info.codeSize = sizeof(shaders::point_list_gs); - shader_module_info.pCode = shaders::point_list_gs; - status = dfn.vkCreateShaderModule(device, &shader_module_info, nullptr, - &geometry_shaders_.point_list); - if (status != VK_SUCCESS) { - return status; - } - provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE, - uint64_t(geometry_shaders_.point_list), - "S(g): Point List"); - - shader_module_info.codeSize = sizeof(shaders::quad_list_gs); - shader_module_info.pCode = shaders::quad_list_gs; - status = dfn.vkCreateShaderModule(device, &shader_module_info, nullptr, - &geometry_shaders_.quad_list); - if (status != VK_SUCCESS) { - return status; - } - provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE, - uint64_t(geometry_shaders_.quad_list), - "S(g): Quad List"); - - shader_module_info.codeSize = sizeof(shaders::rect_list_gs); - shader_module_info.pCode = shaders::rect_list_gs; - status = dfn.vkCreateShaderModule(device, &shader_module_info, nullptr, - &geometry_shaders_.rect_list); - if (status != VK_SUCCESS) { - return status; - } - provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE, - uint64_t(geometry_shaders_.rect_list), - "S(g): Rect List"); - - shader_module_info.codeSize = sizeof(shaders::dummy_ps); - shader_module_info.pCode = shaders::dummy_ps; - status = dfn.vkCreateShaderModule(device, &shader_module_info, nullptr, - &dummy_pixel_shader_); - if (status != VK_SUCCESS) { - return status; - } - provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE, - uint64_t(dummy_pixel_shader_), "S(g): Dummy"); - - return VK_SUCCESS; + return true; } void VulkanPipelineCache::Shutdown() { - ClearCache(); + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); + // Destroy all pipelines. + last_pipeline_ = nullptr; + for (const auto& pipeline_pair : pipelines_) { + if (pipeline_pair.second.pipeline != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, pipeline_pair.second.pipeline, nullptr); + } + } + pipelines_.clear(); - // Destroy geometry shaders. - if (geometry_shaders_.line_quad_list) { - dfn.vkDestroyShaderModule(device, geometry_shaders_.line_quad_list, - nullptr); - geometry_shaders_.line_quad_list = nullptr; - } - if (geometry_shaders_.point_list) { - dfn.vkDestroyShaderModule(device, geometry_shaders_.point_list, nullptr); - geometry_shaders_.point_list = nullptr; - } - if (geometry_shaders_.quad_list) { - dfn.vkDestroyShaderModule(device, geometry_shaders_.quad_list, nullptr); - geometry_shaders_.quad_list = nullptr; - } - if (geometry_shaders_.rect_list) { - dfn.vkDestroyShaderModule(device, geometry_shaders_.rect_list, nullptr); - geometry_shaders_.rect_list = nullptr; - } - if (dummy_pixel_shader_) { - dfn.vkDestroyShaderModule(device, dummy_pixel_shader_, nullptr); - dummy_pixel_shader_ = nullptr; + // Destroy all internal shaders. + for (const auto& geometry_shader_pair : geometry_shaders_) { + if (geometry_shader_pair.second != VK_NULL_HANDLE) { + dfn.vkDestroyShaderModule(device, geometry_shader_pair.second, nullptr); + } } + geometry_shaders_.clear(); - if (pipeline_layout_) { - dfn.vkDestroyPipelineLayout(device, pipeline_layout_, nullptr); - pipeline_layout_ = nullptr; - } - if (pipeline_cache_) { - dfn.vkDestroyPipelineCache(device, pipeline_cache_, nullptr); - pipeline_cache_ = nullptr; + // Destroy all translated shaders. + for (auto it : shaders_) { + delete it.second; } + shaders_.clear(); + texture_binding_layout_map_.clear(); + texture_binding_layouts_.clear(); + + // Shut down shader translation. + shader_translator_.reset(); } VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type, - uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { // Hash the input memory and lookup the shader. uint64_t data_hash = XXH3_64bits(host_address, dword_count * sizeof(uint32_t)); - auto it = shader_map_.find(data_hash); - if (it != shader_map_.end()) { + auto it = shaders_.find(data_hash); + if (it != shaders_.end()) { // Shader has been previously loaded. return it->second; } - // Always create the shader and stash it away. // We need to track it even if it fails translation so we know not to try // again. - VulkanShader* shader = new VulkanShader(provider_, shader_type, data_hash, - host_address, dword_count); - shader_map_.insert({data_hash, shader}); - + VulkanShader* shader = + new VulkanShader(command_processor_.GetVulkanProvider(), shader_type, + data_hash, host_address, dword_count); + shaders_.emplace(data_hash, shader); return shader; } -VulkanPipelineCache::UpdateStatus VulkanPipelineCache::ConfigurePipeline( - VkCommandBuffer command_buffer, const RenderState* render_state, - VulkanShader* vertex_shader, VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type, VkPipeline* pipeline_out) { -#if FINE_GRAINED_DRAW_SCOPES +SpirvShaderTranslator::Modification +VulkanPipelineCache::GetCurrentVertexShaderModification( + const Shader& shader, + Shader::HostVertexShaderType host_vertex_shader_type) const { + assert_true(shader.type() == xenos::ShaderType::kVertex); + assert_true(shader.is_ucode_analyzed()); + const auto& regs = register_file_; + auto sq_program_cntl = regs.Get(); + return SpirvShaderTranslator::Modification( + shader_translator_->GetDefaultVertexShaderModification( + shader.GetDynamicAddressableRegisterCount(sq_program_cntl.vs_num_reg), + host_vertex_shader_type)); +} + +SpirvShaderTranslator::Modification +VulkanPipelineCache::GetCurrentPixelShaderModification( + const Shader& shader, uint32_t normalized_color_mask) const { + assert_true(shader.type() == xenos::ShaderType::kPixel); + assert_true(shader.is_ucode_analyzed()); + const auto& regs = register_file_; + auto sq_program_cntl = regs.Get(); + + SpirvShaderTranslator::Modification modification( + shader_translator_->GetDefaultPixelShaderModification( + shader.GetDynamicAddressableRegisterCount( + sq_program_cntl.ps_num_reg))); + + if (sq_program_cntl.param_gen) { + auto sq_context_misc = regs.Get(); + if (sq_context_misc.param_gen_pos < + std::min(std::max(modification.pixel.dynamic_addressable_register_count, + shader.register_static_address_bound()), + xenos::kMaxInterpolators)) { + modification.pixel.param_gen_enable = 1; + modification.pixel.param_gen_interpolator = sq_context_misc.param_gen_pos; + auto vgt_draw_initiator = regs.Get(); + modification.pixel.param_gen_point = uint32_t( + vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList); + } + } + + using DepthStencilMode = + SpirvShaderTranslator::Modification::DepthStencilMode; + if (shader.implicit_early_z_write_allowed() && + (!shader.writes_color_target(0) || + !draw_util::DoesCoverageDependOnAlpha( + regs.Get()))) { + modification.pixel.depth_stencil_mode = DepthStencilMode::kEarlyHint; + } else { + modification.pixel.depth_stencil_mode = DepthStencilMode::kNoModifiers; + } + + return modification; +} + +bool VulkanPipelineCache::EnsureShadersTranslated( + VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader) { + // Edge flags are not supported yet (because polygon primitives are not). + assert_true(register_file_.Get().vs_export_mode != + xenos::VertexShaderExportMode::kPosition2VectorsEdge && + register_file_.Get().vs_export_mode != + xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill); + assert_false(register_file_.Get().gen_index_vtx); + if (!vertex_shader->is_translated()) { + vertex_shader->shader().AnalyzeUcode(ucode_disasm_buffer_); + if (!TranslateAnalyzedShader(*shader_translator_, *vertex_shader)) { + XELOGE("Failed to translate the vertex shader!"); + return false; + } + } + if (!vertex_shader->is_valid()) { + // Translation attempted previously, but not valid. + return false; + } + if (pixel_shader != nullptr) { + if (!pixel_shader->is_translated()) { + pixel_shader->shader().AnalyzeUcode(ucode_disasm_buffer_); + if (!TranslateAnalyzedShader(*shader_translator_, *pixel_shader)) { + XELOGE("Failed to translate the pixel shader!"); + return false; + } + } + if (!pixel_shader->is_valid()) { + // Translation attempted previously, but not valid. + return false; + } + } + return true; +} + +bool VulkanPipelineCache::ConfigurePipeline( + VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t normalized_color_mask, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES - assert_not_null(pipeline_out); - - // Perform a pass over all registers and state updating our cached structures. - // This will tell us if anything has changed that requires us to either build - // a new pipeline or use an existing one. - VkPipeline pipeline = nullptr; - auto update_status = UpdateState(vertex_shader, pixel_shader, primitive_type); - switch (update_status) { - case UpdateStatus::kCompatible: - // Requested pipeline is compatible with our previous one, so use that. - // Note that there still may be dynamic state that needs updating. - pipeline = current_pipeline_; - break; - case UpdateStatus::kMismatch: - // Pipeline state has changed. We need to either create a new one or find - // an old one that matches. - current_pipeline_ = nullptr; - break; - case UpdateStatus::kError: - // Error updating state - bail out. - // We are in an indeterminate state, so reset things for the next attempt. - current_pipeline_ = nullptr; - return update_status; + // Ensure shaders are translated - needed now for GetCurrentStateDescription. + if (!EnsureShadersTranslated(vertex_shader, pixel_shader)) { + return false; } - if (!pipeline) { - // Should have a hash key produced by the UpdateState pass. - uint64_t hash_key = XXH3_64bits_digest(&hash_state_); - pipeline = GetPipeline(render_state, hash_key); - current_pipeline_ = pipeline; - if (!pipeline) { - // Unable to create pipeline. - return UpdateStatus::kError; + + PipelineDescription description; + if (!GetCurrentStateDescription( + vertex_shader, pixel_shader, primitive_processing_result, + normalized_depth_control, normalized_color_mask, render_pass_key, + description)) { + return false; + } + if (last_pipeline_ && last_pipeline_->first == description) { + pipeline_out = last_pipeline_->second.pipeline; + pipeline_layout_out = last_pipeline_->second.pipeline_layout; + return true; + } + auto it = pipelines_.find(description); + if (it != pipelines_.end()) { + last_pipeline_ = &*it; + pipeline_out = it->second.pipeline; + pipeline_layout_out = it->second.pipeline_layout; + return true; + } + + // Create the pipeline if not the latest and not already existing. + const PipelineLayoutProvider* pipeline_layout = + command_processor_.GetPipelineLayout( + pixel_shader + ? static_cast(pixel_shader->shader()) + .GetTextureBindingsAfterTranslation() + .size() + : 0, + pixel_shader + ? static_cast(pixel_shader->shader()) + .GetSamplerBindingsAfterTranslation() + .size() + : 0, + static_cast(vertex_shader->shader()) + .GetTextureBindingsAfterTranslation() + .size(), + static_cast(vertex_shader->shader()) + .GetSamplerBindingsAfterTranslation() + .size()); + if (!pipeline_layout) { + return false; + } + VkShaderModule geometry_shader = VK_NULL_HANDLE; + GeometryShaderKey geometry_shader_key; + if (GetGeometryShaderKey(description.geometry_shader, geometry_shader_key)) { + geometry_shader = GetGeometryShader(geometry_shader_key); + if (geometry_shader == VK_NULL_HANDLE) { + return false; } } - - *pipeline_out = pipeline; - return update_status; + VkRenderPass render_pass = + render_target_cache_.GetRenderPass(render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return false; + } + PipelineCreationArguments creation_arguments; + auto& pipeline = + *pipelines_.emplace(description, Pipeline(pipeline_layout)).first; + creation_arguments.pipeline = &pipeline; + creation_arguments.vertex_shader = vertex_shader; + creation_arguments.pixel_shader = pixel_shader; + creation_arguments.geometry_shader = geometry_shader; + creation_arguments.render_pass = render_pass; + if (!EnsurePipelineCreated(creation_arguments)) { + return false; + } + pipeline_out = pipeline.second.pipeline; + pipeline_layout_out = pipeline_layout; + return true; } -void VulkanPipelineCache::ClearCache() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - // Destroy all pipelines. - for (auto it : cached_pipelines_) { - dfn.vkDestroyPipeline(device, it.second, nullptr); - } - cached_pipelines_.clear(); - COUNT_profile_set("gpu/pipeline_cache/pipelines", 0); - - // Destroy all shaders. - for (auto it : shader_map_) { - delete it.second; - } - shader_map_.clear(); -} - -VkPipeline VulkanPipelineCache::GetPipeline(const RenderState* render_state, - uint64_t hash_key) { - // Lookup the pipeline in the cache. - auto it = cached_pipelines_.find(hash_key); - if (it != cached_pipelines_.end()) { - // Found existing pipeline. - return it->second; - } - - VkPipelineDynamicStateCreateInfo dynamic_state_info; - dynamic_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_info.pNext = nullptr; - dynamic_state_info.flags = 0; - VkDynamicState dynamic_states[] = { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_LINE_WIDTH, - VK_DYNAMIC_STATE_DEPTH_BIAS, - VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }; - dynamic_state_info.dynamicStateCount = - static_cast(xe::countof(dynamic_states)); - dynamic_state_info.pDynamicStates = dynamic_states; - - VkGraphicsPipelineCreateInfo pipeline_info; - pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - pipeline_info.pNext = nullptr; - pipeline_info.flags = VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; - pipeline_info.stageCount = update_shader_stages_stage_count_; - pipeline_info.pStages = update_shader_stages_info_; - pipeline_info.pVertexInputState = &update_vertex_input_state_info_; - pipeline_info.pInputAssemblyState = &update_input_assembly_state_info_; - pipeline_info.pTessellationState = nullptr; - pipeline_info.pViewportState = &update_viewport_state_info_; - pipeline_info.pRasterizationState = &update_rasterization_state_info_; - pipeline_info.pMultisampleState = &update_multisample_state_info_; - pipeline_info.pDepthStencilState = &update_depth_stencil_state_info_; - pipeline_info.pColorBlendState = &update_color_blend_state_info_; - pipeline_info.pDynamicState = &dynamic_state_info; - pipeline_info.layout = pipeline_layout_; - pipeline_info.renderPass = render_state->render_pass_handle; - pipeline_info.subpass = 0; - pipeline_info.basePipelineHandle = nullptr; - pipeline_info.basePipelineIndex = -1; - VkPipeline pipeline = nullptr; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - auto result = dfn.vkCreateGraphicsPipelines( - device, pipeline_cache_, 1, &pipeline_info, nullptr, &pipeline); - if (result != VK_SUCCESS) { - XELOGE("vkCreateGraphicsPipelines failed with code {}", result); - assert_always(); - return nullptr; - } - - // Dump shader disassembly. - if (cvars::vulkan_dump_disasm) { - if (provider_.device_extensions().amd_shader_info) { - DumpShaderDisasmAMD(pipeline); - } else if (provider_.device_properties().vendorID == - uint32_t(ui::GraphicsProvider::GpuVendorID::kNvidia)) { - // NVIDIA cards - DumpShaderDisasmNV(pipeline_info); - } - } - - // Add to cache with the hash key for reuse. - cached_pipelines_.insert({hash_key, pipeline}); - COUNT_profile_set("gpu/pipeline_cache/pipelines", cached_pipelines_.size()); - - return pipeline; -} - -bool VulkanPipelineCache::TranslateShader( +bool VulkanPipelineCache::TranslateAnalyzedShader( + SpirvShaderTranslator& translator, VulkanShader::VulkanTranslation& translation) { - translation.shader().AnalyzeUcode(ucode_disasm_buffer_); + VulkanShader& shader = static_cast(translation.shader()); + // Perform translation. // If this fails the shader will be marked as invalid and ignored later. - if (!shader_translator_->TranslateAnalyzedShader(translation)) { - XELOGE("Shader translation failed; marking shader as ignored"); + if (!translator.TranslateAnalyzedShader(translation)) { + XELOGE("Shader {:016X} translation failed; marking as ignored", + shader.ucode_data_hash()); + return false; + } + if (translation.GetOrCreateShaderModule() == VK_NULL_HANDLE) { return false; } - // Prepare the shader for use (creates our VkShaderModule). - // It could still fail at this point. - if (!translation.Prepare()) { - XELOGE("Shader preparation failed; marking shader as ignored"); - return false; - } + // TODO(Triang3l): Log that the shader has been successfully translated in + // common code. - if (translation.is_valid()) { - XELOGGPU("Generated {} shader ({}b) - hash {:016X}:\n{}\n", - translation.shader().type() == xenos::ShaderType::kVertex - ? "vertex" - : "pixel", - translation.shader().ucode_dword_count() * 4, - translation.shader().ucode_data_hash(), - translation.shader().ucode_disassembly()); - } - - // Dump shader files if desired. - if (!cvars::dump_shaders.empty()) { - translation.Dump(cvars::dump_shaders, "vk"); - } - - return translation.is_valid(); -} - -static void DumpShaderStatisticsAMD(const VkShaderStatisticsInfoAMD& stats) { - XELOGI(" - resource usage:"); - XELOGI(" numUsedVgprs: {}", stats.resourceUsage.numUsedVgprs); - XELOGI(" numUsedSgprs: {}", stats.resourceUsage.numUsedSgprs); - XELOGI(" ldsSizePerLocalWorkGroup: {}", - stats.resourceUsage.ldsSizePerLocalWorkGroup); - XELOGI(" ldsUsageSizeInBytes : {}", - stats.resourceUsage.ldsUsageSizeInBytes); - XELOGI(" scratchMemUsageInBytes : {}", - stats.resourceUsage.scratchMemUsageInBytes); - XELOGI("numPhysicalVgprs : {}", stats.numPhysicalVgprs); - XELOGI("numPhysicalSgprs : {}", stats.numPhysicalSgprs); - XELOGI("numAvailableVgprs: {}", stats.numAvailableVgprs); - XELOGI("numAvailableSgprs: {}", stats.numAvailableSgprs); -} - -void VulkanPipelineCache::DumpShaderDisasmAMD(VkPipeline pipeline) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - size_t data_size = 0; - - VkShaderStatisticsInfoAMD stats; - data_size = sizeof(stats); - - // Vertex shader - status = dfn.vkGetShaderInfoAMD(device, pipeline, VK_SHADER_STAGE_VERTEX_BIT, - VK_SHADER_INFO_TYPE_STATISTICS_AMD, - &data_size, &stats); - if (status == VK_SUCCESS) { - XELOGI("AMD Vertex Shader Statistics:"); - DumpShaderStatisticsAMD(stats); - } - - // Fragment shader - status = dfn.vkGetShaderInfoAMD( - device, pipeline, VK_SHADER_STAGE_FRAGMENT_BIT, - VK_SHADER_INFO_TYPE_STATISTICS_AMD, &data_size, &stats); - if (status == VK_SUCCESS) { - XELOGI("AMD Fragment Shader Statistics:"); - DumpShaderStatisticsAMD(stats); - } - - // TODO(DrChat): Eventually dump the disasm... -} - -void VulkanPipelineCache::DumpShaderDisasmNV( - const VkGraphicsPipelineCreateInfo& pipeline_info) { - // !! HACK !!: This only works on NVidia drivers. Dumps shader disasm. - // This code is super ugly. Update this when NVidia includes an official - // way to dump shader disassembly. - - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - VkPipelineCacheCreateInfo pipeline_cache_info; - VkPipelineCache dummy_pipeline_cache; - pipeline_cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; - pipeline_cache_info.pNext = nullptr; - pipeline_cache_info.flags = 0; - pipeline_cache_info.initialDataSize = 0; - pipeline_cache_info.pInitialData = nullptr; - auto status = dfn.vkCreatePipelineCache(device, &pipeline_cache_info, nullptr, - &dummy_pipeline_cache); - CheckResult(status, "vkCreatePipelineCache"); - - // Create a pipeline on the dummy cache and dump it. - VkPipeline dummy_pipeline; - status = - dfn.vkCreateGraphicsPipelines(device, dummy_pipeline_cache, 1, - &pipeline_info, nullptr, &dummy_pipeline); - - std::vector pipeline_data; - size_t data_size = 0; - status = dfn.vkGetPipelineCacheData(device, dummy_pipeline_cache, &data_size, - nullptr); - if (status == VK_SUCCESS) { - pipeline_data.resize(data_size); - dfn.vkGetPipelineCacheData(device, dummy_pipeline_cache, &data_size, - pipeline_data.data()); - - // Scan the data for the disassembly. - std::string disasm_vp, disasm_fp; - - const char* disasm_start_vp = nullptr; - const char* disasm_start_fp = nullptr; - size_t search_offset = 0; - const char* search_start = - reinterpret_cast(pipeline_data.data()); - while (true) { - auto p = reinterpret_cast( - memchr(pipeline_data.data() + search_offset, '!', - pipeline_data.size() - search_offset)); - if (!p) { - break; - } - if (!strncmp(p, "!!NV", 4)) { - if (!strncmp(p + 4, "vp", 2)) { - disasm_start_vp = p; - } else if (!strncmp(p + 4, "fp", 2)) { - disasm_start_fp = p; - } - - if (disasm_start_fp && disasm_start_vp) { - // Found all we needed. + // Set up the texture binding layout. + if (shader.EnterBindingLayoutUserUIDSetup()) { + // Obtain the unique IDs of the binding layout if there are any texture + // bindings, for invalidation in the command processor. + size_t texture_binding_layout_uid = kLayoutUIDEmpty; + const std::vector& texture_bindings = + shader.GetTextureBindingsAfterTranslation(); + size_t texture_binding_count = texture_bindings.size(); + if (texture_binding_count) { + size_t texture_binding_layout_bytes = + texture_binding_count * sizeof(*texture_bindings.data()); + uint64_t texture_binding_layout_hash = + XXH3_64bits(texture_bindings.data(), texture_binding_layout_bytes); + auto found_range = + texture_binding_layout_map_.equal_range(texture_binding_layout_hash); + for (auto it = found_range.first; it != found_range.second; ++it) { + if (it->second.vector_span_length == texture_binding_count && + !std::memcmp( + texture_binding_layouts_.data() + it->second.vector_span_offset, + texture_bindings.data(), texture_binding_layout_bytes)) { + texture_binding_layout_uid = it->second.uid; break; } } - search_offset = p - search_start; - ++search_offset; + if (texture_binding_layout_uid == kLayoutUIDEmpty) { + static_assert( + kLayoutUIDEmpty == 0, + "Layout UID is size + 1 because it's assumed that 0 is the UID for " + "an empty layout"); + texture_binding_layout_uid = texture_binding_layout_map_.size() + 1; + LayoutUID new_uid; + new_uid.uid = texture_binding_layout_uid; + new_uid.vector_span_offset = texture_binding_layouts_.size(); + new_uid.vector_span_length = texture_binding_count; + texture_binding_layouts_.resize(new_uid.vector_span_offset + + texture_binding_count); + std::memcpy( + texture_binding_layouts_.data() + new_uid.vector_span_offset, + texture_bindings.data(), texture_binding_layout_bytes); + texture_binding_layout_map_.emplace(texture_binding_layout_hash, + new_uid); + } } - if (disasm_start_vp) { - disasm_vp = std::string(disasm_start_vp); + shader.SetTextureBindingLayoutUserUID(texture_binding_layout_uid); - // For some reason there's question marks all over the code. - disasm_vp.erase(std::remove(disasm_vp.begin(), disasm_vp.end(), '?'), - disasm_vp.end()); - } else { - disasm_vp = std::string("Shader disassembly not available."); - } - - if (disasm_start_fp) { - disasm_fp = std::string(disasm_start_fp); - - // For some reason there's question marks all over the code. - disasm_fp.erase(std::remove(disasm_fp.begin(), disasm_fp.end(), '?'), - disasm_fp.end()); - } else { - disasm_fp = std::string("Shader disassembly not available."); - } - - XELOGI("{}\n=====================================\n{}\n", disasm_vp, - disasm_fp); + // Use the sampler count for samplers because it's the only thing that must + // be the same for layouts to be compatible in this case + // (instruction-specified parameters are used as overrides for creating + // actual samplers). + static_assert( + kLayoutUIDEmpty == 0, + "Empty layout UID is assumed to be 0 because for bindful samplers, the " + "UID is their count"); + shader.SetSamplerBindingLayoutUserUID( + shader.GetSamplerBindingsAfterTranslation().size()); } - dfn.vkDestroyPipeline(device, dummy_pipeline, nullptr); - dfn.vkDestroyPipelineCache(device, dummy_pipeline_cache, nullptr); + return true; } -VkShaderModule VulkanPipelineCache::GetGeometryShader( - xenos::PrimitiveType primitive_type, bool is_line_mode) { - switch (primitive_type) { - case xenos::PrimitiveType::kLineList: - case xenos::PrimitiveType::kLineLoop: - case xenos::PrimitiveType::kLineStrip: - case xenos::PrimitiveType::kTriangleList: - case xenos::PrimitiveType::kTriangleFan: - case xenos::PrimitiveType::kTriangleStrip: - // Supported directly - no need to emulate. - return nullptr; - case xenos::PrimitiveType::kPointList: - return geometry_shaders_.point_list; - case xenos::PrimitiveType::kTriangleWithWFlags: - assert_always("Unknown geometry type"); - return nullptr; - case xenos::PrimitiveType::kRectangleList: - return geometry_shaders_.rect_list; - case xenos::PrimitiveType::kQuadList: - return is_line_mode ? geometry_shaders_.line_quad_list - : geometry_shaders_.quad_list; - case xenos::PrimitiveType::kQuadStrip: - // TODO(benvanik): quad strip geometry shader. - assert_always("Quad strips not implemented"); - return nullptr; - case xenos::PrimitiveType::kTrianglePatch: - case xenos::PrimitiveType::kQuadPatch: - assert_always("Tessellation is not implemented"); - return nullptr; - default: - assert_unhandled_case(primitive_type); - return nullptr; - } -} - -bool VulkanPipelineCache::SetDynamicState(VkCommandBuffer command_buffer, - bool full_update) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - auto& regs = set_dynamic_state_registers_; - - bool window_offset_dirty = SetShadowRegister(®s.pa_sc_window_offset, - XE_GPU_REG_PA_SC_WINDOW_OFFSET); - window_offset_dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - - // Window parameters. - // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h - // See r200UpdateWindow: - // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - int16_t window_offset_x = regs.pa_sc_window_offset & 0x7FFF; - int16_t window_offset_y = (regs.pa_sc_window_offset >> 16) & 0x7FFF; - if (window_offset_x & 0x4000) { - window_offset_x |= 0x8000; - } - if (window_offset_y & 0x4000) { - window_offset_y |= 0x8000; - } - - // VK_DYNAMIC_STATE_SCISSOR - bool scissor_state_dirty = full_update || window_offset_dirty; - scissor_state_dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); - scissor_state_dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - if (scissor_state_dirty) { - int32_t ws_x = regs.pa_sc_window_scissor_tl & 0x7FFF; - int32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; - int32_t ws_w = (regs.pa_sc_window_scissor_br & 0x7FFF) - ws_x; - int32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; - if (!(regs.pa_sc_window_scissor_tl & 0x80000000)) { - // ! WINDOW_OFFSET_DISABLE - ws_x += window_offset_x; - ws_y += window_offset_y; - } - - int32_t adj_x = ws_x - std::max(ws_x, 0); - int32_t adj_y = ws_y - std::max(ws_y, 0); - - VkRect2D scissor_rect; - scissor_rect.offset.x = ws_x - adj_x; - scissor_rect.offset.y = ws_y - adj_y; - scissor_rect.extent.width = std::max(ws_w + adj_x, 0); - scissor_rect.extent.height = std::max(ws_h + adj_y, 0); - dfn.vkCmdSetScissor(command_buffer, 0, 1, &scissor_rect); - } - - // VK_DYNAMIC_STATE_VIEWPORT - bool viewport_state_dirty = full_update || window_offset_dirty; - viewport_state_dirty |= - SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - viewport_state_dirty |= - SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); - viewport_state_dirty |= - SetShadowRegister(®s.pa_su_sc_vtx_cntl, XE_GPU_REG_PA_SU_VTX_CNTL); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, - XE_GPU_REG_PA_CL_VPORT_XOFFSET); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, - XE_GPU_REG_PA_CL_VPORT_YOFFSET); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, - XE_GPU_REG_PA_CL_VPORT_ZOFFSET); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, - XE_GPU_REG_PA_CL_VPORT_XSCALE); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, - XE_GPU_REG_PA_CL_VPORT_YSCALE); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, - XE_GPU_REG_PA_CL_VPORT_ZSCALE); - // RB_SURFACE_INFO - auto surface_msaa = - static_cast((regs.rb_surface_info >> 16) & 0x3); - - // Apply a multiplier to emulate MSAA. - float window_width_scalar = 1; - float window_height_scalar = 1; - switch (surface_msaa) { - case xenos::MsaaSamples::k1X: - break; - case xenos::MsaaSamples::k2X: - window_height_scalar = 2; - break; - case xenos::MsaaSamples::k4X: - window_width_scalar = window_height_scalar = 2; - break; - } - - // Whether each of the viewport settings are enabled. - // https://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - bool vport_xscale_enable = (regs.pa_cl_vte_cntl & (1 << 0)) > 0; - bool vport_xoffset_enable = (regs.pa_cl_vte_cntl & (1 << 1)) > 0; - bool vport_yscale_enable = (regs.pa_cl_vte_cntl & (1 << 2)) > 0; - bool vport_yoffset_enable = (regs.pa_cl_vte_cntl & (1 << 3)) > 0; - bool vport_zscale_enable = (regs.pa_cl_vte_cntl & (1 << 4)) > 0; - bool vport_zoffset_enable = (regs.pa_cl_vte_cntl & (1 << 5)) > 0; - assert_true(vport_xscale_enable == vport_yscale_enable == - vport_zscale_enable == vport_xoffset_enable == - vport_yoffset_enable == vport_zoffset_enable); - - int16_t vtx_window_offset_x = - (regs.pa_su_sc_mode_cntl >> 16) & 1 ? window_offset_x : 0; - int16_t vtx_window_offset_y = - (regs.pa_su_sc_mode_cntl >> 16) & 1 ? window_offset_y : 0; - - float vpw, vph, vpx, vpy; - if (vport_xscale_enable) { - float vox = vport_xoffset_enable ? regs.pa_cl_vport_xoffset : 0; - float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0; - float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1; - float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1; - - window_width_scalar = window_height_scalar = 1; - vpw = 2 * window_width_scalar * vsx; - vph = -2 * window_height_scalar * vsy; - vpx = window_width_scalar * vox - vpw / 2 + vtx_window_offset_x; - vpy = window_height_scalar * voy - vph / 2 + vtx_window_offset_y; - } else { - // TODO(DrChat): This should be the width/height of the target picture - vpw = 2560.0f; - vph = 2560.0f; - vpx = vtx_window_offset_x; - vpy = vtx_window_offset_y; - } - - if (viewport_state_dirty) { - VkViewport viewport_rect; - std::memset(&viewport_rect, 0, sizeof(VkViewport)); - viewport_rect.x = vpx; - viewport_rect.y = vpy; - viewport_rect.width = vpw; - viewport_rect.height = vph; - - float voz = vport_zoffset_enable ? regs.pa_cl_vport_zoffset : 0; - float vsz = vport_zscale_enable ? regs.pa_cl_vport_zscale : 1; - viewport_rect.minDepth = voz; - viewport_rect.maxDepth = voz + vsz; - assert_true(viewport_rect.minDepth >= 0 && viewport_rect.minDepth <= 1); - assert_true(viewport_rect.maxDepth >= -1 && viewport_rect.maxDepth <= 1); - - dfn.vkCmdSetViewport(command_buffer, 0, 1, &viewport_rect); - } - - // VK_DYNAMIC_STATE_DEPTH_BIAS - // No separate front/back bias in Vulkan - using what's more expected to work. - // No need to reset to 0 if not enabled in the pipeline - recheck conditions. - float depth_bias_scales[2] = {0}, depth_bias_offsets[2] = {0}; - auto cull_mode = regs.pa_su_sc_mode_cntl & 3; - if (cull_mode != 1) { - // Front faces are not culled. - depth_bias_scales[0] = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; - depth_bias_offsets[0] = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; - } - if (cull_mode != 2) { - // Back faces are not culled. - depth_bias_scales[1] = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; - depth_bias_offsets[1] = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; - } - if (depth_bias_scales[0] != 0.0f || depth_bias_scales[1] != 0.0f || - depth_bias_offsets[0] != 0.0f || depth_bias_offsets[1] != 0.0f) { - float depth_bias_scale, depth_bias_offset; - // Prefer front if not culled and offset for both is enabled. - // However, if none are culled, and there's no front offset, use back offset - // (since there was an intention to enable depth offset at all). - // As SetRenderState sets for both sides, this should be very rare anyway. - // TODO(Triang3l): Verify the intentions if this happens in real games. - if (depth_bias_scales[0] != 0.0f || depth_bias_offsets[0] != 0.0f) { - depth_bias_scale = depth_bias_scales[0]; - depth_bias_offset = depth_bias_offsets[0]; - } else { - depth_bias_scale = depth_bias_scales[1]; - depth_bias_offset = depth_bias_offsets[1]; - } - // Convert to Vulkan units based on the values in 415607E6: - // r_polygonOffsetScale is -1 there, but 32 in the register. - // r_polygonOffsetBias is -1 also, but passing 2/65536. - // 1/65536 and 2 scales are applied separately, however, and for shadow maps - // 0.5/65536 is passed (while sm_polygonOffsetBias is 0.5), and with 32768 - // it would be 0.25, which seems too small. So using 65536, assuming it's a - // common scale value (which also looks less arbitrary than 32768). - // TODO(Triang3l): Investigate, also considering the depth format (kD24FS8). - // Possibly refer to: - // https://www.winehq.org/pipermail/wine-patches/2015-July/141200.html - float depth_bias_scale_vulkan = depth_bias_scale * (1.0f / 32.0f); - float depth_bias_offset_vulkan = depth_bias_offset * 65536.0f; - if (full_update || - regs.pa_su_poly_offset_scale != depth_bias_scale_vulkan || - regs.pa_su_poly_offset_offset != depth_bias_offset_vulkan) { - regs.pa_su_poly_offset_scale = depth_bias_scale_vulkan; - regs.pa_su_poly_offset_offset = depth_bias_offset_vulkan; - dfn.vkCmdSetDepthBias(command_buffer, depth_bias_offset_vulkan, 0.0f, - depth_bias_scale_vulkan); - } - } else if (full_update) { - regs.pa_su_poly_offset_scale = 0.0f; - regs.pa_su_poly_offset_offset = 0.0f; - dfn.vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f); - } - - // VK_DYNAMIC_STATE_BLEND_CONSTANTS - bool blend_constant_state_dirty = full_update; - blend_constant_state_dirty |= - SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); - blend_constant_state_dirty |= - SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); - blend_constant_state_dirty |= - SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); - blend_constant_state_dirty |= - SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); - if (blend_constant_state_dirty) { - dfn.vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba); - } - - bool stencil_state_dirty = full_update; - stencil_state_dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - if (stencil_state_dirty) { - uint32_t stencil_ref = (regs.rb_stencilrefmask & 0xFF); - uint32_t stencil_read_mask = (regs.rb_stencilrefmask >> 8) & 0xFF; - uint32_t stencil_write_mask = (regs.rb_stencilrefmask >> 16) & 0xFF; - - // VK_DYNAMIC_STATE_STENCIL_REFERENCE - dfn.vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, - stencil_ref); - - // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK - dfn.vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, - stencil_read_mask); - - // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK - dfn.vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, - stencil_write_mask); - } - - bool push_constants_dirty = full_update || viewport_state_dirty; - push_constants_dirty |= SetShadowRegister(®s.sq_program_cntl.value, - XE_GPU_REG_SQ_PROGRAM_CNTL); - push_constants_dirty |= - SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); - push_constants_dirty |= - SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); - push_constants_dirty |= - SetShadowRegister(®s.rb_color_info.value, XE_GPU_REG_RB_COLOR_INFO); - push_constants_dirty |= - SetShadowRegister(®s.rb_color1_info.value, XE_GPU_REG_RB_COLOR1_INFO); - push_constants_dirty |= - SetShadowRegister(®s.rb_color2_info.value, XE_GPU_REG_RB_COLOR2_INFO); - push_constants_dirty |= - SetShadowRegister(®s.rb_color3_info.value, XE_GPU_REG_RB_COLOR3_INFO); - push_constants_dirty |= - SetShadowRegister(®s.rb_alpha_ref, XE_GPU_REG_RB_ALPHA_REF); - push_constants_dirty |= - SetShadowRegister(®s.pa_su_point_size, XE_GPU_REG_PA_SU_POINT_SIZE); - if (push_constants_dirty) { - // Normal vertex shaders only, for now. - assert_true(regs.sq_program_cntl.vs_export_mode == - xenos::VertexShaderExportMode::kPosition1Vector || - regs.sq_program_cntl.vs_export_mode == - xenos::VertexShaderExportMode::kPosition2VectorsSprite || - regs.sq_program_cntl.vs_export_mode == - xenos::VertexShaderExportMode::kMultipass); - assert_false(regs.sq_program_cntl.gen_index_vtx); - - SpirvPushConstants push_constants = {}; - - // Done in VS, no need to flush state. - if (vport_xscale_enable) { - push_constants.window_scale[0] = 1.0f; - push_constants.window_scale[1] = -1.0f; - push_constants.window_scale[2] = 0.f; - push_constants.window_scale[3] = 0.f; - } else { - // 1 / unscaled viewport w/h - push_constants.window_scale[0] = window_width_scalar / 1280.f; - push_constants.window_scale[1] = window_height_scalar / 1280.f; - push_constants.window_scale[2] = (-1280.f / window_width_scalar) + 0.5f; - push_constants.window_scale[3] = (-1280.f / window_height_scalar) + 0.5f; - } - - // https://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - // VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. - // = false: multiply the X, Y coordinates by 1/W0. - // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. - // = false: multiply the Z coordinate by 1/W0. - // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to - // get 1/W0. - float vtx_xy_fmt = (regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f; - float vtx_z_fmt = (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f; - float vtx_w0_fmt = (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f; - push_constants.vtx_fmt[0] = vtx_xy_fmt; - push_constants.vtx_fmt[1] = vtx_xy_fmt; - push_constants.vtx_fmt[2] = vtx_z_fmt; - push_constants.vtx_fmt[3] = vtx_w0_fmt; - - // Point size - push_constants.point_size[0] = - static_cast((regs.pa_su_point_size & 0xffff0000) >> 16) / 8.0f; - push_constants.point_size[1] = - static_cast((regs.pa_su_point_size & 0x0000ffff)) / 8.0f; - - reg::RB_COLOR_INFO color_info[4] = { - regs.rb_color_info, - regs.rb_color1_info, - regs.rb_color2_info, - regs.rb_color3_info, +void VulkanPipelineCache::WritePipelineRenderTargetDescription( + reg::RB_BLENDCONTROL blend_control, uint32_t write_mask, + PipelineRenderTarget& render_target_out) const { + if (write_mask) { + assert_zero(write_mask & ~uint32_t(0b1111)); + // 32 because of 0x1F mask, for safety (all unknown to zero). + static const PipelineBlendFactor kBlendFactorMap[32] = { + /* 0 */ PipelineBlendFactor::kZero, + /* 1 */ PipelineBlendFactor::kOne, + /* 2 */ PipelineBlendFactor::kZero, // ? + /* 3 */ PipelineBlendFactor::kZero, // ? + /* 4 */ PipelineBlendFactor::kSrcColor, + /* 5 */ PipelineBlendFactor::kOneMinusSrcColor, + /* 6 */ PipelineBlendFactor::kSrcAlpha, + /* 7 */ PipelineBlendFactor::kOneMinusSrcAlpha, + /* 8 */ PipelineBlendFactor::kDstColor, + /* 9 */ PipelineBlendFactor::kOneMinusDstColor, + /* 10 */ PipelineBlendFactor::kDstAlpha, + /* 11 */ PipelineBlendFactor::kOneMinusDstAlpha, + /* 12 */ PipelineBlendFactor::kConstantColor, + /* 13 */ PipelineBlendFactor::kOneMinusConstantColor, + /* 14 */ PipelineBlendFactor::kConstantAlpha, + /* 15 */ PipelineBlendFactor::kOneMinusConstantAlpha, + /* 16 */ PipelineBlendFactor::kSrcAlphaSaturate, }; - for (int i = 0; i < 4; i++) { - push_constants.color_exp_bias[i] = - static_cast(1 << color_info[i].color_exp_bias); + render_target_out.src_color_blend_factor = + kBlendFactorMap[uint32_t(blend_control.color_srcblend)]; + render_target_out.dst_color_blend_factor = + kBlendFactorMap[uint32_t(blend_control.color_destblend)]; + render_target_out.color_blend_op = blend_control.color_comb_fcn; + render_target_out.src_alpha_blend_factor = + kBlendFactorMap[uint32_t(blend_control.alpha_srcblend)]; + render_target_out.dst_alpha_blend_factor = + kBlendFactorMap[uint32_t(blend_control.alpha_destblend)]; + render_target_out.alpha_blend_op = blend_control.alpha_comb_fcn; + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features && + !device_portability_subset_features->constantAlphaColorBlendFactors) { + if (blend_control.color_srcblend == xenos::BlendFactor::kConstantAlpha) { + render_target_out.src_color_blend_factor = + PipelineBlendFactor::kConstantColor; + } else if (blend_control.color_srcblend == + xenos::BlendFactor::kOneMinusConstantAlpha) { + render_target_out.src_color_blend_factor = + PipelineBlendFactor::kOneMinusConstantColor; + } + if (blend_control.color_destblend == xenos::BlendFactor::kConstantAlpha) { + render_target_out.dst_color_blend_factor = + PipelineBlendFactor::kConstantColor; + } else if (blend_control.color_destblend == + xenos::BlendFactor::kOneMinusConstantAlpha) { + render_target_out.dst_color_blend_factor = + PipelineBlendFactor::kOneMinusConstantColor; + } } - - // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE - // Emulated in shader. - // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; - // ALPHATESTENABLE - push_constants.alpha_test[0] = - (regs.rb_colorcontrol & 0x8) != 0 ? 1.0f : 0.0f; - // ALPHAFUNC - push_constants.alpha_test[1] = - static_cast(regs.rb_colorcontrol & 0x7); - // ALPHAREF - push_constants.alpha_test[2] = regs.rb_alpha_ref; - - // Whether to populate a register in the pixel shader with frag coord. - int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF; - push_constants.ps_param_gen = - regs.sq_program_cntl.param_gen ? ps_param_gen : -1; - - dfn.vkCmdPushConstants(command_buffer, pipeline_layout_, - VK_SHADER_STAGE_VERTEX_BIT | - VK_SHADER_STAGE_GEOMETRY_BIT | - VK_SHADER_STAGE_FRAGMENT_BIT, - 0, kSpirvPushConstantsSize, &push_constants); + } else { + render_target_out.src_color_blend_factor = PipelineBlendFactor::kOne; + render_target_out.dst_color_blend_factor = PipelineBlendFactor::kZero; + render_target_out.color_blend_op = xenos::BlendOp::kAdd; + render_target_out.src_alpha_blend_factor = PipelineBlendFactor::kOne; + render_target_out.dst_alpha_blend_factor = PipelineBlendFactor::kZero; + render_target_out.alpha_blend_op = xenos::BlendOp::kAdd; } - - if (full_update) { - // VK_DYNAMIC_STATE_LINE_WIDTH - dfn.vkCmdSetLineWidth(command_buffer, 1.0f); - - // VK_DYNAMIC_STATE_DEPTH_BOUNDS - dfn.vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f); - } - - return true; + render_target_out.color_write_mask = write_mask; } -bool VulkanPipelineCache::SetShadowRegister(uint32_t* dest, - uint32_t register_name) { - uint32_t value = register_file_->values[register_name].u32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} +bool VulkanPipelineCache::GetCurrentStateDescription( + const VulkanShader::VulkanTranslation* vertex_shader, + const VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t normalized_color_mask, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + PipelineDescription& description_out) const { + description_out.Reset(); -bool VulkanPipelineCache::SetShadowRegister(float* dest, - uint32_t register_name) { - float value = register_file_->values[register_name].f32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); -bool VulkanPipelineCache::SetShadowRegisterArray(uint32_t* dest, uint32_t num, - uint32_t register_name) { - bool dirty = false; - for (uint32_t i = 0; i < num; i++) { - uint32_t value = register_file_->values[register_name + i].u32; - if (dest[i] == value) { - continue; - } + const RegisterFile& regs = register_file_; + auto pa_su_sc_mode_cntl = regs.Get(); - dest[i] = value; - dirty |= true; - } - - return dirty; -} - -VulkanPipelineCache::UpdateStatus VulkanPipelineCache::UpdateState( - VulkanShader* vertex_shader, VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type) { - bool mismatch = false; - - // Reset hash so we can build it up. - XXH3_64bits_reset(&hash_state_); - -#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ - { \ - if (status == UpdateStatus::kError) { \ - XELOGE(error_message); \ - return status; \ - } else if (status == UpdateStatus::kMismatch) { \ - mismatch = true; \ - } \ - } - - UpdateStatus status; - status = UpdateRenderTargetState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update render target state"); - status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages"); - status = UpdateVertexInputState(vertex_shader); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update vertex input state"); - status = UpdateInputAssemblyState(primitive_type); - CHECK_UPDATE_STATUS(status, mismatch, - "Unable to update input assembly state"); - status = UpdateViewportState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); - status = UpdateRasterizationState(primitive_type); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterization state"); - status = UpdateMultisampleState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update multisample state"); - status = UpdateDepthStencilState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); - status = UpdateColorBlendState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update color blend state"); - - return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; -} - -VulkanPipelineCache::UpdateStatus -VulkanPipelineCache::UpdateRenderTargetState() { - auto& regs = update_render_targets_regs_; - bool dirty = false; - - // Check the render target formats - struct { - reg::RB_COLOR_INFO rb_color_info; - reg::RB_DEPTH_INFO rb_depth_info; - reg::RB_COLOR_INFO rb_color1_info; - reg::RB_COLOR_INFO rb_color2_info; - reg::RB_COLOR_INFO rb_color3_info; - }* cur_regs = reinterpret_cast( - ®ister_file_->values[XE_GPU_REG_RB_COLOR_INFO].u32); - - dirty |= - regs.rb_color_info.color_format != cur_regs->rb_color_info.color_format; - dirty |= - regs.rb_depth_info.depth_format != cur_regs->rb_depth_info.depth_format; - dirty |= - regs.rb_color1_info.color_format != cur_regs->rb_color1_info.color_format; - dirty |= - regs.rb_color2_info.color_format != cur_regs->rb_color2_info.color_format; - dirty |= - regs.rb_color3_info.color_format != cur_regs->rb_color3_info.color_format; - - // And copy the regs over. - regs.rb_color_info.color_format = cur_regs->rb_color_info.color_format; - regs.rb_depth_info.depth_format = cur_regs->rb_depth_info.depth_format; - regs.rb_color1_info.color_format = cur_regs->rb_color1_info.color_format; - regs.rb_color2_info.color_format = cur_regs->rb_color2_info.color_format; - regs.rb_color3_info.color_format = cur_regs->rb_color3_info.color_format; - XXH3_64bits_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - return UpdateStatus::kMismatch; -} - -VulkanPipelineCache::UpdateStatus VulkanPipelineCache::UpdateShaderStages( - VulkanShader* vertex_shader, VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type) { - auto& regs = update_shader_stages_regs_; - - // These are the constant base addresses/ranges for shaders. - // We have these hardcoded right now cause nothing seems to differ. - assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == - 0x000FF000 || - register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == - 0x000FF100 || - register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); - - bool dirty = false; - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.sq_program_cntl.value, - XE_GPU_REG_SQ_PROGRAM_CNTL); - dirty |= regs.vertex_shader != vertex_shader; - dirty |= regs.pixel_shader != pixel_shader; - dirty |= regs.primitive_type != primitive_type; - regs.vertex_shader = vertex_shader; - regs.pixel_shader = pixel_shader; - regs.primitive_type = primitive_type; - XXH3_64bits_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - VulkanShader::VulkanTranslation* vertex_shader_translation = - static_cast( - vertex_shader->GetOrCreateTranslation( - shader_translator_->GetDefaultVertexShaderModification( - vertex_shader->GetDynamicAddressableRegisterCount( - regs.sq_program_cntl.vs_num_reg)))); - if (!vertex_shader_translation->is_translated() && - !TranslateShader(*vertex_shader_translation)) { - XELOGE("Failed to translate the vertex shader!"); - return UpdateStatus::kError; - } - - VulkanShader::VulkanTranslation* pixel_shader_translation = nullptr; + description_out.vertex_shader_hash = + vertex_shader->shader().ucode_data_hash(); + description_out.vertex_shader_modification = vertex_shader->modification(); if (pixel_shader) { - pixel_shader_translation = static_cast( - pixel_shader->GetOrCreateTranslation( - shader_translator_->GetDefaultPixelShaderModification( - pixel_shader->GetDynamicAddressableRegisterCount( - regs.sq_program_cntl.ps_num_reg)))); - if (!pixel_shader_translation->is_translated() && - !TranslateShader(*pixel_shader_translation)) { - XELOGE("Failed to translate the pixel shader!"); - return UpdateStatus::kError; - } + description_out.pixel_shader_hash = + pixel_shader->shader().ucode_data_hash(); + description_out.pixel_shader_modification = pixel_shader->modification(); } + description_out.render_pass_key = render_pass_key; - update_shader_stages_stage_count_ = 0; - - auto& vertex_pipeline_stage = - update_shader_stages_info_[update_shader_stages_stage_count_++]; - vertex_pipeline_stage.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - vertex_pipeline_stage.pNext = nullptr; - vertex_pipeline_stage.flags = 0; - vertex_pipeline_stage.stage = VK_SHADER_STAGE_VERTEX_BIT; - vertex_pipeline_stage.module = vertex_shader_translation->shader_module(); - vertex_pipeline_stage.pName = "main"; - vertex_pipeline_stage.pSpecializationInfo = nullptr; - - bool is_line_mode = false; - if (((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0) { - uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7; - if (front_poly_mode == 1) { - is_line_mode = true; - } - } - auto geometry_shader = GetGeometryShader(primitive_type, is_line_mode); - if (geometry_shader) { - auto& geometry_pipeline_stage = - update_shader_stages_info_[update_shader_stages_stage_count_++]; - geometry_pipeline_stage.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - geometry_pipeline_stage.pNext = nullptr; - geometry_pipeline_stage.flags = 0; - geometry_pipeline_stage.stage = VK_SHADER_STAGE_GEOMETRY_BIT; - geometry_pipeline_stage.module = geometry_shader; - geometry_pipeline_stage.pName = "main"; - geometry_pipeline_stage.pSpecializationInfo = nullptr; - } - - auto& pixel_pipeline_stage = - update_shader_stages_info_[update_shader_stages_stage_count_++]; - pixel_pipeline_stage.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - pixel_pipeline_stage.pNext = nullptr; - pixel_pipeline_stage.flags = 0; - pixel_pipeline_stage.stage = VK_SHADER_STAGE_FRAGMENT_BIT; - pixel_pipeline_stage.module = pixel_shader_translation - ? pixel_shader_translation->shader_module() - : dummy_pixel_shader_; - pixel_pipeline_stage.pName = "main"; - pixel_pipeline_stage.pSpecializationInfo = nullptr; - - return UpdateStatus::kMismatch; -} - -VulkanPipelineCache::UpdateStatus VulkanPipelineCache::UpdateVertexInputState( - VulkanShader* vertex_shader) { - auto& regs = update_vertex_input_state_regs_; - auto& state_info = update_vertex_input_state_info_; - - bool dirty = false; - dirty |= vertex_shader != regs.vertex_shader; - regs.vertex_shader = vertex_shader; - XXH3_64bits_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - // We don't use vertex inputs. - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - state_info.vertexBindingDescriptionCount = 0; - state_info.vertexAttributeDescriptionCount = 0; - state_info.pVertexBindingDescriptions = nullptr; - state_info.pVertexAttributeDescriptions = nullptr; - - return UpdateStatus::kCompatible; -} - -VulkanPipelineCache::UpdateStatus VulkanPipelineCache::UpdateInputAssemblyState( - xenos::PrimitiveType primitive_type) { - auto& regs = update_input_assembly_state_regs_; - auto& state_info = update_input_assembly_state_info_; - - bool dirty = false; - dirty |= primitive_type != regs.primitive_type; - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, - XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); - regs.primitive_type = primitive_type; - XXH3_64bits_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - switch (primitive_type) { + // TODO(Triang3l): Implement primitive types currently using geometry shaders + // without them. + PipelineGeometryShader geometry_shader = PipelineGeometryShader::kNone; + PipelinePrimitiveTopology primitive_topology; + switch (primitive_processing_result.host_primitive_type) { case xenos::PrimitiveType::kPointList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + primitive_topology = PipelinePrimitiveTopology::kPointList; break; case xenos::PrimitiveType::kLineList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + primitive_topology = PipelinePrimitiveTopology::kLineList; break; case xenos::PrimitiveType::kLineStrip: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; - break; - case xenos::PrimitiveType::kLineLoop: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + primitive_topology = PipelinePrimitiveTopology::kLineStrip; break; case xenos::PrimitiveType::kTriangleList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - break; - case xenos::PrimitiveType::kTriangleStrip: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + primitive_topology = PipelinePrimitiveTopology::kTriangleList; break; case xenos::PrimitiveType::kTriangleFan: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + // The check should be performed at primitive processing time. + assert_true(!device_portability_subset_features || + device_portability_subset_features->triangleFans); + primitive_topology = PipelinePrimitiveTopology::kTriangleFan; + break; + case xenos::PrimitiveType::kTriangleStrip: + primitive_topology = PipelinePrimitiveTopology::kTriangleStrip; break; case xenos::PrimitiveType::kRectangleList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + geometry_shader = PipelineGeometryShader::kRectangleList; + primitive_topology = PipelinePrimitiveTopology::kTriangleList; break; case xenos::PrimitiveType::kQuadList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + geometry_shader = PipelineGeometryShader::kQuadList; + primitive_topology = PipelinePrimitiveTopology::kLineListWithAdjacency; break; default: - case xenos::PrimitiveType::kTriangleWithWFlags: - XELOGE("unsupported primitive type {}", primitive_type); - assert_unhandled_case(primitive_type); - return UpdateStatus::kError; + // TODO(Triang3l): All primitive types and tessellation. + return false; } + description_out.geometry_shader = geometry_shader; + description_out.primitive_topology = primitive_topology; + description_out.primitive_restart = + primitive_processing_result.host_primitive_reset_enabled; - // TODO(benvanik): anything we can do about this? Vulkan seems to only support - // first. - assert_zero(regs.pa_su_sc_mode_cntl & (1 << 19)); - // if (regs.pa_su_sc_mode_cntl & (1 << 19)) { - // glProvokingVertex(GL_LAST_VERTEX_CONVENTION); - // } else { - // glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); - // } + description_out.depth_clamp_enable = + regs.Get().clip_disable; - // Primitive restart index is handled in the buffer cache. - if (regs.pa_su_sc_mode_cntl & (1 << 21)) { - state_info.primitiveRestartEnable = VK_TRUE; + // TODO(Triang3l): Tessellation. + bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); + if (primitive_polygonal) { + // Vulkan only allows the polygon mode to be set for both faces - pick the + // most special one (more likely to represent the developer's deliberate + // intentions - fill is very generic, wireframe is common in debug, points + // are for pretty unusual things, but closer to debug purposes too - on the + // Xenos, points have the lowest register value and triangles have the + // highest) based on which faces are not culled. + bool cull_front = pa_su_sc_mode_cntl.cull_front; + bool cull_back = pa_su_sc_mode_cntl.cull_back; + description_out.cull_front = cull_front; + description_out.cull_back = cull_back; + if (device_features.fillModeNonSolid) { + xenos::PolygonType polygon_type = xenos::PolygonType::kTriangles; + if (!cull_front) { + polygon_type = + std::min(polygon_type, pa_su_sc_mode_cntl.polymode_front_ptype); + } + if (!cull_back) { + polygon_type = + std::min(polygon_type, pa_su_sc_mode_cntl.polymode_back_ptype); + } + if (pa_su_sc_mode_cntl.poly_mode != xenos::PolygonModeEnable::kDualMode) { + polygon_type = xenos::PolygonType::kTriangles; + } + switch (polygon_type) { + case xenos::PolygonType::kPoints: + // When points are not supported, use lines instead, preserving + // debug-like purpose. + description_out.polygon_mode = + (!device_portability_subset_features || + device_portability_subset_features->pointPolygons) + ? PipelinePolygonMode::kPoint + : PipelinePolygonMode::kLine; + break; + case xenos::PolygonType::kLines: + description_out.polygon_mode = PipelinePolygonMode::kLine; + break; + case xenos::PolygonType::kTriangles: + description_out.polygon_mode = PipelinePolygonMode::kFill; + break; + default: + assert_unhandled_case(polygon_type); + return false; + } + } else { + description_out.polygon_mode = PipelinePolygonMode::kFill; + } + description_out.front_face_clockwise = pa_su_sc_mode_cntl.face != 0; } else { - state_info.primitiveRestartEnable = VK_FALSE; + description_out.polygon_mode = PipelinePolygonMode::kFill; } - return UpdateStatus::kMismatch; -} + // TODO(Triang3l): Skip depth / stencil and color state for the fragment + // shader interlock RB implementation. -VulkanPipelineCache::UpdateStatus VulkanPipelineCache::UpdateViewportState() { - auto& state_info = update_viewport_state_info_; - - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - state_info.viewportCount = 1; - state_info.scissorCount = 1; - - // Ignored; set dynamically. - state_info.pViewports = nullptr; - state_info.pScissors = nullptr; - - return UpdateStatus::kCompatible; -} - -VulkanPipelineCache::UpdateStatus VulkanPipelineCache::UpdateRasterizationState( - xenos::PrimitiveType primitive_type) { - auto& regs = update_rasterization_state_regs_; - auto& state_info = update_rasterization_state_info_; - - bool dirty = false; - dirty |= regs.primitive_type != primitive_type; - dirty |= SetShadowRegister(®s.pa_cl_clip_cntl, XE_GPU_REG_PA_CL_CLIP_CNTL); - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl, - XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br, - XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); - dirty |= SetShadowRegister(®s.pa_sc_viz_query, XE_GPU_REG_PA_SC_VIZ_QUERY); - dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, - XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); - regs.primitive_type = primitive_type; - - // Vulkan doesn't support separate depth biases for different sides. - // SetRenderState also accepts only one argument, so they should be rare. - // The culling mode must match the one in SetDynamicState, so not applying - // the primitive type exceptions to this (very unlikely to happen anyway). - bool depth_bias_enable = false; - uint32_t cull_mode = regs.pa_su_sc_mode_cntl & 0x3; - if (cull_mode != 1) { - float depth_bias_scale = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; - float depth_bias_offset = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; - depth_bias_enable = (depth_bias_scale != 0.0f && depth_bias_offset != 0.0f); - } - if (!depth_bias_enable && cull_mode != 2) { - float depth_bias_scale = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; - float depth_bias_offset = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; - depth_bias_enable = (depth_bias_scale != 0.0f && depth_bias_offset != 0.0f); - } - if (regs.pa_su_poly_offset_enable != - static_cast(depth_bias_enable)) { - regs.pa_su_poly_offset_enable = static_cast(depth_bias_enable); - dirty = true; + if (render_pass_key.depth_and_color_used & 1) { + if (normalized_depth_control.z_enable) { + description_out.depth_write_enable = + normalized_depth_control.z_write_enable; + description_out.depth_compare_op = normalized_depth_control.zfunc; + } else { + description_out.depth_compare_op = xenos::CompareFunction::kAlways; + } + if (normalized_depth_control.stencil_enable) { + description_out.stencil_test_enable = 1; + description_out.stencil_front_fail_op = + normalized_depth_control.stencilfail; + description_out.stencil_front_pass_op = + normalized_depth_control.stencilzpass; + description_out.stencil_front_depth_fail_op = + normalized_depth_control.stencilzfail; + description_out.stencil_front_compare_op = + normalized_depth_control.stencilfunc; + if (primitive_polygonal && normalized_depth_control.backface_enable) { + description_out.stencil_back_fail_op = + normalized_depth_control.stencilfail_bf; + description_out.stencil_back_pass_op = + normalized_depth_control.stencilzpass_bf; + description_out.stencil_back_depth_fail_op = + normalized_depth_control.stencilzfail_bf; + description_out.stencil_back_compare_op = + normalized_depth_control.stencilfunc_bf; + } else { + description_out.stencil_back_fail_op = + description_out.stencil_front_fail_op; + description_out.stencil_back_pass_op = + description_out.stencil_front_pass_op; + description_out.stencil_back_depth_fail_op = + description_out.stencil_front_depth_fail_op; + description_out.stencil_back_compare_op = + description_out.stencil_front_compare_op; + } + } } - XXH3_64bits_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - // ZCLIP_NEAR_DISABLE - // state_info.depthClampEnable = !(regs.pa_cl_clip_cntl & (1 << 26)); - // RASTERIZER_DISABLE - // state_info.rasterizerDiscardEnable = !!(regs.pa_cl_clip_cntl & (1 << 22)); - - // CLIP_DISABLE - state_info.depthClampEnable = !!(regs.pa_cl_clip_cntl & (1 << 16)); - state_info.rasterizerDiscardEnable = VK_FALSE; - - bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0; - if (poly_mode) { - uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7; - uint32_t back_poly_mode = (regs.pa_su_sc_mode_cntl >> 8) & 0x7; - // Vulkan only supports both matching. - assert_true(front_poly_mode == back_poly_mode); - static const VkPolygonMode kFillModes[3] = { - VK_POLYGON_MODE_POINT, - VK_POLYGON_MODE_LINE, - VK_POLYGON_MODE_FILL, - }; - state_info.polygonMode = kFillModes[front_poly_mode]; - } else { - state_info.polygonMode = VK_POLYGON_MODE_FILL; - } - - switch (cull_mode) { - case 0: - state_info.cullMode = VK_CULL_MODE_NONE; - break; - case 1: - state_info.cullMode = VK_CULL_MODE_FRONT_BIT; - break; - case 2: - state_info.cullMode = VK_CULL_MODE_BACK_BIT; - break; - case 3: - // Cull both sides? - assert_always(); - break; - } - if (regs.pa_su_sc_mode_cntl & 0x4) { - state_info.frontFace = VK_FRONT_FACE_CLOCKWISE; - } else { - state_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; - } - if (primitive_type == xenos::PrimitiveType::kRectangleList) { - // Rectangle lists aren't culled. There may be other things they skip too. - state_info.cullMode = VK_CULL_MODE_NONE; - } else if (primitive_type == xenos::PrimitiveType::kPointList) { - // Face culling doesn't apply to point primitives. - state_info.cullMode = VK_CULL_MODE_NONE; - } - - state_info.depthBiasEnable = depth_bias_enable ? VK_TRUE : VK_FALSE; - - // Ignored; set dynamically: - state_info.depthBiasConstantFactor = 0; - state_info.depthBiasClamp = 0; - state_info.depthBiasSlopeFactor = 0; - state_info.lineWidth = 1.0f; - - return UpdateStatus::kMismatch; -} - -VulkanPipelineCache::UpdateStatus -VulkanPipelineCache::UpdateMultisampleState() { - auto& regs = update_multisample_state_regs_; - auto& state_info = update_multisample_state_info_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.pa_sc_aa_config, XE_GPU_REG_PA_SC_AA_CONFIG); - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - XXH3_64bits_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - // PA_SC_AA_CONFIG MSAA_NUM_SAMPLES (0x7) - // PA_SC_AA_MASK (0xFFFF) - // PA_SU_SC_MODE_CNTL MSAA_ENABLE (0x10000) - // If set, all samples will be sampled at set locations. Otherwise, they're - // all sampled from the pixel center. - if (cvars::vulkan_native_msaa) { - auto msaa_num_samples = - static_cast((regs.rb_surface_info >> 16) & 0x3); - switch (msaa_num_samples) { - case xenos::MsaaSamples::k1X: - state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - break; - case xenos::MsaaSamples::k2X: - state_info.rasterizationSamples = VK_SAMPLE_COUNT_2_BIT; - break; - case xenos::MsaaSamples::k4X: - state_info.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(msaa_num_samples); - break; + // Color blending and write masks (filled only for the attachments present in + // the render pass object). + uint32_t render_pass_color_rts = render_pass_key.depth_and_color_used >> 1; + if (device_features.independentBlend) { + uint32_t render_pass_color_rts_remaining = render_pass_color_rts; + uint32_t color_rt_index; + while (xe::bit_scan_forward(render_pass_color_rts_remaining, + &color_rt_index)) { + render_pass_color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + WritePipelineRenderTargetDescription( + regs.Get( + reg::RB_BLENDCONTROL::rt_register_indices[color_rt_index]), + (normalized_color_mask >> (color_rt_index * 4)) & 0b1111, + description_out.render_targets[color_rt_index]); } } else { - state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + // Take the blend control for the first render target that the guest wants + // to write to (consider it the most important) and use it for all render + // targets, if any. + // TODO(Triang3l): Implement an option for independent blending via multiple + // draw calls with different pipelines maybe? Though independent blending + // support is pretty wide, with a quite prominent exception of Adreno 4xx + // apparently. + uint32_t render_pass_color_rts_remaining = render_pass_color_rts; + uint32_t render_pass_first_color_rt_index; + if (xe::bit_scan_forward(render_pass_color_rts_remaining, + &render_pass_first_color_rt_index)) { + render_pass_color_rts_remaining &= + ~(uint32_t(1) << render_pass_first_color_rt_index); + PipelineRenderTarget& render_pass_first_color_rt = + description_out.render_targets[render_pass_first_color_rt_index]; + uint32_t common_blend_rt_index; + if (xe::bit_scan_forward(normalized_color_mask, &common_blend_rt_index)) { + common_blend_rt_index >>= 2; + // If a common write mask will be used for multiple render targets, use + // the original RB_COLOR_MASK instead of the normalized color mask as + // the normalized color mask has non-existent components forced to + // written (don't need reading to be preserved), while the number of + // components may vary between render targets. The attachments in the + // pass that must not be written to at all will be excluded via a shader + // modification. + WritePipelineRenderTargetDescription( + regs.Get( + reg::RB_BLENDCONTROL::rt_register_indices + [common_blend_rt_index]), + (((normalized_color_mask & + ~(uint32_t(0b1111) << (4 * common_blend_rt_index))) + ? regs[XE_GPU_REG_RB_COLOR_MASK].u32 + : normalized_color_mask) >> + (4 * common_blend_rt_index)) & + 0b1111, + render_pass_first_color_rt); + } else { + // No render targets are written to, though the render pass still may + // contain color attachments - set them to not written and not blending. + render_pass_first_color_rt.src_color_blend_factor = + PipelineBlendFactor::kOne; + render_pass_first_color_rt.dst_color_blend_factor = + PipelineBlendFactor::kZero; + render_pass_first_color_rt.color_blend_op = xenos::BlendOp::kAdd; + render_pass_first_color_rt.src_alpha_blend_factor = + PipelineBlendFactor::kOne; + render_pass_first_color_rt.dst_alpha_blend_factor = + PipelineBlendFactor::kZero; + render_pass_first_color_rt.alpha_blend_op = xenos::BlendOp::kAdd; + } + // Reuse the same blending settings for all render targets in the pass, + // for description consistency. + uint32_t color_rt_index; + while (xe::bit_scan_forward(render_pass_color_rts_remaining, + &color_rt_index)) { + render_pass_color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + description_out.render_targets[color_rt_index] = + render_pass_first_color_rt; + } + } } - state_info.sampleShadingEnable = VK_FALSE; - state_info.minSampleShading = 0; - state_info.pSampleMask = nullptr; - state_info.alphaToCoverageEnable = VK_FALSE; - state_info.alphaToOneEnable = VK_FALSE; - - return UpdateStatus::kMismatch; + return true; } -VulkanPipelineCache::UpdateStatus -VulkanPipelineCache::UpdateDepthStencilState() { - auto& regs = update_depth_stencil_state_regs_; - auto& state_info = update_depth_stencil_state_info_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - XXH3_64bits_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; +bool VulkanPipelineCache::ArePipelineRequirementsMet( + const PipelineDescription& description) const { + VkShaderStageFlags vertex_shader_stage = + Shader::IsHostVertexShaderTypeDomain( + SpirvShaderTranslator::Modification( + description.vertex_shader_modification) + .vertex.host_vertex_shader_type) + ? VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT + : VK_SHADER_STAGE_VERTEX_BIT; + if (!(guest_shader_vertex_stages_ & vertex_shader_stage)) { + return false; } - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); - static const VkCompareOp compare_func_map[] = { - /* 0 */ VK_COMPARE_OP_NEVER, - /* 1 */ VK_COMPARE_OP_LESS, - /* 2 */ VK_COMPARE_OP_EQUAL, - /* 3 */ VK_COMPARE_OP_LESS_OR_EQUAL, - /* 4 */ VK_COMPARE_OP_GREATER, - /* 5 */ VK_COMPARE_OP_NOT_EQUAL, - /* 6 */ VK_COMPARE_OP_GREATER_OR_EQUAL, - /* 7 */ VK_COMPARE_OP_ALWAYS, - }; - static const VkStencilOp stencil_op_map[] = { - /* 0 */ VK_STENCIL_OP_KEEP, - /* 1 */ VK_STENCIL_OP_ZERO, - /* 2 */ VK_STENCIL_OP_REPLACE, - /* 3 */ VK_STENCIL_OP_INCREMENT_AND_CLAMP, - /* 4 */ VK_STENCIL_OP_DECREMENT_AND_CLAMP, - /* 5 */ VK_STENCIL_OP_INVERT, - /* 6 */ VK_STENCIL_OP_INCREMENT_AND_WRAP, - /* 7 */ VK_STENCIL_OP_DECREMENT_AND_WRAP, - }; + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features) { + if (description.primitive_topology == + PipelinePrimitiveTopology::kTriangleFan && + !device_portability_subset_features->triangleFans) { + return false; + } - // Depth state - // TODO: EARLY_Z_ENABLE (needs to be enabled in shaders) - state_info.depthWriteEnable = !!(regs.rb_depthcontrol & 0x4); - state_info.depthTestEnable = !!(regs.rb_depthcontrol & 0x2); - state_info.stencilTestEnable = !!(regs.rb_depthcontrol & 0x1); + if (description.polygon_mode == PipelinePolygonMode::kPoint && + !device_portability_subset_features->pointPolygons) { + return false; + } - state_info.depthCompareOp = - compare_func_map[(regs.rb_depthcontrol >> 4) & 0x7]; - state_info.depthBoundsTestEnable = VK_FALSE; + if (!device_portability_subset_features->constantAlphaColorBlendFactors) { + uint32_t color_rts_remaining = + description.render_pass_key.depth_and_color_used >> 1; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor == + PipelineBlendFactor::kConstantAlpha || + color_rt.src_color_blend_factor == + PipelineBlendFactor::kOneMinusConstantAlpha || + color_rt.dst_color_blend_factor == + PipelineBlendFactor::kConstantAlpha || + color_rt.dst_color_blend_factor == + PipelineBlendFactor::kOneMinusConstantAlpha) { + return false; + } + } + } + } - // Stencil state - state_info.front.compareOp = - compare_func_map[(regs.rb_depthcontrol >> 8) & 0x7]; - state_info.front.failOp = stencil_op_map[(regs.rb_depthcontrol >> 11) & 0x7]; - state_info.front.passOp = stencil_op_map[(regs.rb_depthcontrol >> 14) & 0x7]; - state_info.front.depthFailOp = - stencil_op_map[(regs.rb_depthcontrol >> 17) & 0x7]; + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - // BACKFACE_ENABLE - if (!!(regs.rb_depthcontrol & 0x80)) { - state_info.back.compareOp = - compare_func_map[(regs.rb_depthcontrol >> 20) & 0x7]; - state_info.back.failOp = stencil_op_map[(regs.rb_depthcontrol >> 23) & 0x7]; - state_info.back.passOp = stencil_op_map[(regs.rb_depthcontrol >> 26) & 0x7]; - state_info.back.depthFailOp = - stencil_op_map[(regs.rb_depthcontrol >> 29) & 0x7]; + if (!device_features.geometryShader && + description.geometry_shader != PipelineGeometryShader::kNone) { + return false; + } + + if (!device_features.fillModeNonSolid && + description.polygon_mode != PipelinePolygonMode::kFill) { + return false; + } + + if (!device_features.independentBlend) { + uint32_t color_rts_remaining = + description.render_pass_key.depth_and_color_used >> 1; + uint32_t first_color_rt_index; + if (xe::bit_scan_forward(color_rts_remaining, &first_color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << first_color_rt_index); + const PipelineRenderTarget& first_color_rt = + description.render_targets[first_color_rt_index]; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor != + first_color_rt.src_color_blend_factor || + color_rt.dst_color_blend_factor != + first_color_rt.dst_color_blend_factor || + color_rt.color_blend_op != first_color_rt.color_blend_op || + color_rt.src_alpha_blend_factor != + first_color_rt.src_alpha_blend_factor || + color_rt.dst_alpha_blend_factor != + first_color_rt.dst_alpha_blend_factor || + color_rt.alpha_blend_op != first_color_rt.alpha_blend_op || + color_rt.color_write_mask != first_color_rt.color_write_mask) { + return false; + } + } + } + } + + return true; +} + +bool VulkanPipelineCache::GetGeometryShaderKey( + PipelineGeometryShader geometry_shader_type, GeometryShaderKey& key_out) { + if (geometry_shader_type == PipelineGeometryShader::kNone) { + return false; + } + GeometryShaderKey key; + key.type = geometry_shader_type; + // TODO(Triang3l): Make the linkage parameters depend on the real needs of the + // vertex and the pixel shader. + key.interpolator_count = xenos::kMaxInterpolators; + key.user_clip_plane_count = /* 6 */ 0; + key.user_clip_plane_cull = 0; + key.has_vertex_kill_and = /* 1 */ 0; + key.has_point_size = /* 1 */ 0; + key.has_point_coordinates = /* 1 */ 0; + key_out = key; + return true; +} + +VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { + auto it = geometry_shaders_.find(key); + if (it != geometry_shaders_.end()) { + return it->second; + } + + std::vector id_vector_temp; + std::vector uint_vector_temp; + + spv::ExecutionMode input_primitive_execution_mode = spv::ExecutionMode(0); + uint32_t input_primitive_vertex_count = 0; + spv::ExecutionMode output_primitive_execution_mode = spv::ExecutionMode(0); + uint32_t output_max_vertices = 0; + switch (key.type) { + case PipelineGeometryShader::kRectangleList: + // Triangle to a strip of 2 triangles. + input_primitive_execution_mode = spv::ExecutionModeTriangles; + input_primitive_vertex_count = 3; + output_primitive_execution_mode = spv::ExecutionModeOutputTriangleStrip; + output_max_vertices = 4; + break; + case PipelineGeometryShader::kQuadList: + // 4 vertices passed via a line list with adjacency to a strip of 2 + // triangles. + input_primitive_execution_mode = spv::ExecutionModeInputLinesAdjacency; + input_primitive_vertex_count = 4; + output_primitive_execution_mode = spv::ExecutionModeOutputTriangleStrip; + output_max_vertices = 4; + break; + default: + assert_unhandled_case(key.type); + } + + uint32_t clip_distance_count = + key.user_clip_plane_cull ? 0 : key.user_clip_plane_count; + uint32_t cull_distance_count = + (key.user_clip_plane_cull ? key.user_clip_plane_count : 0) + + key.has_vertex_kill_and; + + spv::Builder builder(spv::Spv_1_0, + (SpirvShaderTranslator::kSpirvMagicToolId << 16) | 1, + nullptr); + spv::Id ext_inst_glsl_std_450 = builder.import("GLSL.std.450"); + builder.addCapability(spv::CapabilityGeometry); + if (clip_distance_count) { + builder.addCapability(spv::CapabilityClipDistance); + } + if (cull_distance_count) { + builder.addCapability(spv::CapabilityCullDistance); + } + builder.setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); + builder.setSource(spv::SourceLanguageUnknown, 0); + + // TODO(Triang3l): Shader float controls (NaN preservation most importantly). + + std::vector main_interface; + + spv::Id type_void = builder.makeVoidType(); + spv::Id type_bool = builder.makeBoolType(); + spv::Id type_bool4 = builder.makeVectorType(type_bool, 4); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_float = builder.makeFloatType(32); + spv::Id type_float4 = builder.makeVectorType(type_float, 4); + spv::Id type_clip_distances = + clip_distance_count + ? builder.makeArrayType( + type_float, builder.makeUintConstant(clip_distance_count), 0) + : spv::NoType; + spv::Id type_cull_distances = + cull_distance_count + ? builder.makeArrayType( + type_float, builder.makeUintConstant(cull_distance_count), 0) + : spv::NoType; + spv::Id type_interpolators = + key.interpolator_count + ? builder.makeArrayType( + type_float4, builder.makeUintConstant(key.interpolator_count), + 0) + : spv::NoType; + spv::Id type_point_coordinates = key.has_point_coordinates + ? builder.makeVectorType(type_float, 2) + : spv::NoType; + + // Inputs and outputs - matching glslang order, in gl_PerVertex gl_in[], + // user-defined outputs, user-defined inputs, out gl_PerVertex. + // TODO(Triang3l): Point parameters from the system uniform buffer. + + spv::Id const_input_primitive_vertex_count = + builder.makeUintConstant(input_primitive_vertex_count); + + // in gl_PerVertex gl_in[]. + // gl_Position. + id_vector_temp.clear(); + uint32_t member_in_gl_per_vertex_position = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_float4); + spv::Id const_member_in_gl_per_vertex_position = + builder.makeIntConstant(int32_t(member_in_gl_per_vertex_position)); + // gl_ClipDistance. + uint32_t member_in_gl_per_vertex_clip_distance = UINT32_MAX; + spv::Id const_member_in_gl_per_vertex_clip_distance = spv::NoResult; + if (clip_distance_count) { + member_in_gl_per_vertex_clip_distance = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_clip_distances); + const_member_in_gl_per_vertex_clip_distance = + builder.makeIntConstant(int32_t(member_in_gl_per_vertex_clip_distance)); + } + // gl_CullDistance. + uint32_t member_in_gl_per_vertex_cull_distance = UINT32_MAX; + if (cull_distance_count) { + member_in_gl_per_vertex_cull_distance = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_cull_distances); + } + // Structure and array. + spv::Id type_struct_in_gl_per_vertex = + builder.makeStructType(id_vector_temp, "gl_PerVertex"); + builder.addMemberName(type_struct_in_gl_per_vertex, + member_in_gl_per_vertex_position, "gl_Position"); + builder.addMemberDecoration(type_struct_in_gl_per_vertex, + member_in_gl_per_vertex_position, + spv::DecorationBuiltIn, spv::BuiltInPosition); + if (clip_distance_count) { + builder.addMemberName(type_struct_in_gl_per_vertex, + member_in_gl_per_vertex_clip_distance, + "gl_ClipDistance"); + builder.addMemberDecoration( + type_struct_in_gl_per_vertex, member_in_gl_per_vertex_clip_distance, + spv::DecorationBuiltIn, spv::BuiltInClipDistance); + } + if (cull_distance_count) { + builder.addMemberName(type_struct_in_gl_per_vertex, + member_in_gl_per_vertex_cull_distance, + "gl_CullDistance"); + builder.addMemberDecoration( + type_struct_in_gl_per_vertex, member_in_gl_per_vertex_cull_distance, + spv::DecorationBuiltIn, spv::BuiltInCullDistance); + } + builder.addDecoration(type_struct_in_gl_per_vertex, spv::DecorationBlock); + spv::Id type_array_in_gl_per_vertex = builder.makeArrayType( + type_struct_in_gl_per_vertex, const_input_primitive_vertex_count, 0); + spv::Id in_gl_per_vertex = + builder.createVariable(spv::NoPrecision, spv::StorageClassInput, + type_array_in_gl_per_vertex, "gl_in"); + main_interface.push_back(in_gl_per_vertex); + + // Interpolators output. + spv::Id out_interpolators = spv::NoResult; + if (key.interpolator_count) { + out_interpolators = + builder.createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_interpolators, "xe_out_interpolators"); + builder.addDecoration(out_interpolators, spv::DecorationLocation, 0); + builder.addDecoration(out_interpolators, spv::DecorationInvariant); + main_interface.push_back(out_interpolators); + } + + // Point coordinate output. + spv::Id out_point_coordinates = spv::NoResult; + if (key.has_point_coordinates) { + out_point_coordinates = builder.createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_point_coordinates, + "xe_out_point_coordinates"); + builder.addDecoration(out_point_coordinates, spv::DecorationLocation, + key.interpolator_count); + builder.addDecoration(out_point_coordinates, spv::DecorationInvariant); + main_interface.push_back(out_point_coordinates); + } + + // Interpolator input. + spv::Id in_interpolators = spv::NoResult; + if (key.interpolator_count) { + in_interpolators = builder.createVariable( + spv::NoPrecision, spv::StorageClassInput, + builder.makeArrayType(type_interpolators, + const_input_primitive_vertex_count, 0), + "xe_in_interpolators"); + builder.addDecoration(in_interpolators, spv::DecorationLocation, 0); + main_interface.push_back(in_interpolators); + } + + // Point size input. + spv::Id in_point_size = spv::NoResult; + if (key.has_point_size) { + in_point_size = builder.createVariable( + spv::NoPrecision, spv::StorageClassInput, + builder.makeArrayType(type_float, const_input_primitive_vertex_count, + 0), + "xe_in_point_size"); + builder.addDecoration(in_point_size, spv::DecorationLocation, + key.interpolator_count); + main_interface.push_back(in_point_size); + } + + // out gl_PerVertex. + // gl_Position. + id_vector_temp.clear(); + uint32_t member_out_gl_per_vertex_position = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_float4); + spv::Id const_member_out_gl_per_vertex_position = + builder.makeIntConstant(int32_t(member_out_gl_per_vertex_position)); + // gl_ClipDistance. + uint32_t member_out_gl_per_vertex_clip_distance = UINT32_MAX; + spv::Id const_member_out_gl_per_vertex_clip_distance = spv::NoResult; + if (clip_distance_count) { + member_out_gl_per_vertex_clip_distance = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_clip_distances); + const_member_out_gl_per_vertex_clip_distance = builder.makeIntConstant( + int32_t(member_out_gl_per_vertex_clip_distance)); + } + // Structure. + spv::Id type_struct_out_gl_per_vertex = + builder.makeStructType(id_vector_temp, "gl_PerVertex"); + builder.addMemberName(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_position, "gl_Position"); + builder.addMemberDecoration(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_position, + spv::DecorationInvariant); + builder.addMemberDecoration(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_position, + spv::DecorationBuiltIn, spv::BuiltInPosition); + if (clip_distance_count) { + builder.addMemberName(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_clip_distance, + "gl_ClipDistance"); + builder.addMemberDecoration(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_clip_distance, + spv::DecorationInvariant); + builder.addMemberDecoration( + type_struct_out_gl_per_vertex, member_out_gl_per_vertex_clip_distance, + spv::DecorationBuiltIn, spv::BuiltInClipDistance); + } + builder.addDecoration(type_struct_out_gl_per_vertex, spv::DecorationBlock); + spv::Id out_gl_per_vertex = + builder.createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_struct_out_gl_per_vertex, ""); + main_interface.push_back(out_gl_per_vertex); + + // Begin the main function. + std::vector main_param_types; + std::vector> main_precisions; + spv::Block* main_entry; + spv::Function* main_function = + builder.makeFunctionEntry(spv::NoPrecision, type_void, "main", + main_param_types, main_precisions, &main_entry); + spv::Instruction* entry_point = + builder.addEntryPoint(spv::ExecutionModelGeometry, main_function, "main"); + for (spv::Id interface_id : main_interface) { + entry_point->addIdOperand(interface_id); + } + builder.addExecutionMode(main_function, input_primitive_execution_mode); + builder.addExecutionMode(main_function, spv::ExecutionModeInvocations, 1); + builder.addExecutionMode(main_function, output_primitive_execution_mode); + builder.addExecutionMode(main_function, spv::ExecutionModeOutputVertices, + int(output_max_vertices)); + + // Note that after every OpEmitVertex, all output variables are undefined. + + // Discard the whole primitive if any vertex has a NaN position (may also be + // set to NaN for emulation of vertex killing with the OR operator). + for (uint32_t i = 0; i < input_primitive_vertex_count; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(builder.makeIntConstant(int32_t(i))); + id_vector_temp.push_back(const_member_in_gl_per_vertex_position); + spv::Id position_is_nan = builder.createUnaryOp( + spv::OpAny, type_bool, + builder.createUnaryOp( + spv::OpIsNan, type_bool4, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision))); + spv::Block& discard_predecessor = *builder.getBuildPoint(); + spv::Block& discard_then_block = builder.makeNewBlock(); + spv::Block& discard_merge_block = builder.makeNewBlock(); + { + std::unique_ptr selection_merge_op( + std::make_unique(spv::OpSelectionMerge)); + selection_merge_op->addIdOperand(discard_merge_block.getId()); + selection_merge_op->addImmediateOperand( + spv::SelectionControlDontFlattenMask); + discard_predecessor.addInstruction(std::move(selection_merge_op)); + } + { + std::unique_ptr branch_conditional_op( + std::make_unique(spv::OpBranchConditional)); + branch_conditional_op->addIdOperand(position_is_nan); + branch_conditional_op->addIdOperand(discard_then_block.getId()); + branch_conditional_op->addIdOperand(discard_merge_block.getId()); + branch_conditional_op->addImmediateOperand(1); + branch_conditional_op->addImmediateOperand(2); + discard_predecessor.addInstruction(std::move(branch_conditional_op)); + } + discard_then_block.addPredecessor(&discard_predecessor); + discard_merge_block.addPredecessor(&discard_predecessor); + builder.setBuildPoint(&discard_then_block); + builder.createNoResultOp(spv::OpReturn); + builder.setBuildPoint(&discard_merge_block); + } + + // Cull the whole primitive if any cull distance for all vertices in the + // primitive is < 0. + // TODO(Triang3l): For points, handle ps_ucp_mode (transform the host clip + // space to the guest one, calculate the distances to the user clip planes, + // cull using the distance from the center for modes 0, 1 and 2, cull and clip + // per-vertex for modes 2 and 3) - except for the vertex kill flag. + if (cull_distance_count) { + spv::Id const_member_in_gl_per_vertex_cull_distance = + builder.makeIntConstant(int32_t(member_in_gl_per_vertex_cull_distance)); + spv::Id const_float_0 = builder.makeFloatConstant(0.0f); + spv::Id cull_condition = spv::NoResult; + for (uint32_t i = 0; i < cull_distance_count; ++i) { + for (uint32_t j = 0; j < input_primitive_vertex_count; ++j) { + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back(builder.makeIntConstant(int32_t(j))); + id_vector_temp.push_back(const_member_in_gl_per_vertex_cull_distance); + id_vector_temp.push_back(builder.makeIntConstant(int32_t(i))); + spv::Id cull_distance_is_negative = builder.createBinOp( + spv::OpFOrdLessThan, type_bool, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision), + const_float_0); + if (cull_condition != spv::NoResult) { + cull_condition = + builder.createBinOp(spv::OpLogicalAnd, type_bool, cull_condition, + cull_distance_is_negative); + } else { + cull_condition = cull_distance_is_negative; + } + } + } + assert_true(cull_condition != spv::NoResult); + spv::Block& discard_predecessor = *builder.getBuildPoint(); + spv::Block& discard_then_block = builder.makeNewBlock(); + spv::Block& discard_merge_block = builder.makeNewBlock(); + { + std::unique_ptr selection_merge_op( + std::make_unique(spv::OpSelectionMerge)); + selection_merge_op->addIdOperand(discard_merge_block.getId()); + selection_merge_op->addImmediateOperand( + spv::SelectionControlDontFlattenMask); + discard_predecessor.addInstruction(std::move(selection_merge_op)); + } + { + std::unique_ptr branch_conditional_op( + std::make_unique(spv::OpBranchConditional)); + branch_conditional_op->addIdOperand(cull_condition); + branch_conditional_op->addIdOperand(discard_then_block.getId()); + branch_conditional_op->addIdOperand(discard_merge_block.getId()); + branch_conditional_op->addImmediateOperand(1); + branch_conditional_op->addImmediateOperand(2); + discard_predecessor.addInstruction(std::move(branch_conditional_op)); + } + discard_then_block.addPredecessor(&discard_predecessor); + discard_merge_block.addPredecessor(&discard_predecessor); + builder.setBuildPoint(&discard_then_block); + builder.createNoResultOp(spv::OpReturn); + builder.setBuildPoint(&discard_merge_block); + } + + switch (key.type) { + case PipelineGeometryShader::kRectangleList: { + // Construct a strip with the fourth vertex generated by mirroring a + // vertex across the longest edge (the diagonal). + // + // Possible options: + // + // 0---1 + // | /| + // | / | - 12 is the longest edge, strip 0123 (most commonly used) + // |/ | v3 = v0 + (v1 - v0) + (v2 - v0), or v3 = -v0 + v1 + v2 + // 2--[3] + // + // 1---2 + // | /| + // | / | - 20 is the longest edge, strip 1203 + // |/ | + // 0--[3] + // + // 2---0 + // | /| + // | / | - 01 is the longest edge, strip 2013 + // |/ | + // 1--[3] + + spv::Id const_int_0 = builder.makeIntConstant(0); + spv::Id const_int_1 = builder.makeIntConstant(1); + spv::Id const_int_2 = builder.makeIntConstant(2); + spv::Id const_int_3 = builder.makeIntConstant(3); + + // Get squares of edge lengths to choose the longest edge. + // [0] - 12, [1] - 20, [2] - 01. + spv::Id edge_lengths[3]; + id_vector_temp.resize(3); + id_vector_temp[1] = const_member_in_gl_per_vertex_position; + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp[0] = builder.makeIntConstant(int32_t((1 + i) % 3)); + id_vector_temp[2] = const_int_0; + spv::Id edge_0_x = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[2] = const_int_1; + spv::Id edge_0_y = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[0] = builder.makeIntConstant(int32_t((2 + i) % 3)); + id_vector_temp[2] = const_int_0; + spv::Id edge_1_x = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[2] = const_int_1; + spv::Id edge_1_y = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + spv::Id edge_x = + builder.createBinOp(spv::OpFSub, type_float, edge_1_x, edge_0_x); + spv::Id edge_y = + builder.createBinOp(spv::OpFSub, type_float, edge_1_y, edge_0_y); + edge_lengths[i] = builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, edge_x, edge_x), + builder.createBinOp(spv::OpFMul, type_float, edge_y, edge_y)); + } + + // Choose the index of the first vertex in the strip based on which edge + // is the longest, and calculate the indices of the other vertices. + spv::Id vertex_indices[3]; + // If 12 > 20 && 12 > 01, then 12 is the longest edge, and the strip is + // 0123. Otherwise, if 20 > 01, then 20 is the longest, and the strip is + // 1203, but if not, 01 is the longest, and the strip is 2013. + vertex_indices[0] = builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp( + spv::OpLogicalAnd, type_bool, + builder.createBinOp(spv::OpFOrdGreaterThan, type_bool, + edge_lengths[0], edge_lengths[1]), + builder.createBinOp(spv::OpFOrdGreaterThan, type_bool, + edge_lengths[0], edge_lengths[2])), + const_int_0, + builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp(spv::OpFOrdGreaterThan, type_bool, + edge_lengths[1], edge_lengths[2]), + const_int_1, const_int_2)); + for (uint32_t i = 1; i < 3; ++i) { + // vertex_indices[i] = (vertex_indices[0] + i) % 3 + spv::Id vertex_index_without_wrapping = + builder.createBinOp(spv::OpIAdd, type_int, vertex_indices[0], + builder.makeIntConstant(int32_t(i))); + vertex_indices[i] = builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp(spv::OpSLessThan, type_bool, + vertex_index_without_wrapping, const_int_3), + vertex_index_without_wrapping, + builder.createBinOp(spv::OpISub, type_int, + vertex_index_without_wrapping, const_int_3)); + } + + // Initialize the point coordinates output for safety if this shader type + // is used with has_point_coordinates for some reason. + spv::Id const_point_coordinates_zero = spv::NoResult; + if (key.has_point_coordinates) { + spv::Id const_float_0 = builder.makeFloatConstant(0.0f); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_float_0); + id_vector_temp.push_back(const_float_0); + const_point_coordinates_zero = builder.makeCompositeConstant( + type_point_coordinates, id_vector_temp); + } + + // Emit the triangle in the strip that consists of the original vertices. + for (uint32_t i = 0; i < 3; ++i) { + spv::Id vertex_index = vertex_indices[i]; + // Interpolators. + if (key.interpolator_count) { + id_vector_temp.clear(); + id_vector_temp.push_back(vertex_index); + builder.createStore( + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_interpolators, id_vector_temp), + spv::NoPrecision), + out_interpolators); + } + // Point coordinates. + if (key.has_point_coordinates) { + builder.createStore(const_point_coordinates_zero, + out_point_coordinates); + } + // Position. + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(vertex_index); + id_vector_temp.push_back(const_member_in_gl_per_vertex_position); + spv::Id vertex_position = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.push_back(const_member_out_gl_per_vertex_position); + builder.createStore( + vertex_position, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + // Clip distances. + if (clip_distance_count) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(vertex_index); + id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance); + spv::Id vertex_clip_distances = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.push_back( + const_member_out_gl_per_vertex_clip_distance); + builder.createStore( + vertex_clip_distances, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + } + // Emit the vertex. + builder.createNoResultOp(spv::OpEmitVertex); + } + + // Construct the fourth vertex. + // Interpolators. + for (uint32_t i = 0; i < key.interpolator_count; ++i) { + spv::Id const_int_i = builder.makeIntConstant(int32_t(i)); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(vertex_indices[0]); + id_vector_temp.push_back(const_int_i); + spv::Id vertex_interpolator_v0 = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_interpolators, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[0] = vertex_indices[1]; + spv::Id vertex_interpolator_v01 = builder.createBinOp( + spv::OpFSub, type_float4, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_interpolators, id_vector_temp), + spv::NoPrecision), + vertex_interpolator_v0); + builder.addDecoration(vertex_interpolator_v01, + spv::DecorationNoContraction); + id_vector_temp[0] = vertex_indices[2]; + spv::Id vertex_interpolator_v3 = builder.createBinOp( + spv::OpFAdd, type_float4, vertex_interpolator_v01, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_interpolators, id_vector_temp), + spv::NoPrecision)); + builder.addDecoration(vertex_interpolator_v3, + spv::DecorationNoContraction); + id_vector_temp.clear(); + id_vector_temp.push_back(const_int_i); + builder.createStore( + vertex_interpolator_v3, + builder.createAccessChain(spv::StorageClassOutput, + out_interpolators, id_vector_temp)); + } + // Point coordinates. + if (key.has_point_coordinates) { + builder.createStore(const_point_coordinates_zero, + out_point_coordinates); + } + // Position. + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(vertex_indices[0]); + id_vector_temp.push_back(const_member_in_gl_per_vertex_position); + spv::Id vertex_position_v0 = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[0] = vertex_indices[1]; + spv::Id vertex_position_v01 = builder.createBinOp( + spv::OpFSub, type_float4, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision), + vertex_position_v0); + builder.addDecoration(vertex_position_v01, spv::DecorationNoContraction); + id_vector_temp[0] = vertex_indices[2]; + spv::Id vertex_position_v3 = builder.createBinOp( + spv::OpFAdd, type_float4, vertex_position_v01, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision)); + builder.addDecoration(vertex_position_v3, spv::DecorationNoContraction); + id_vector_temp.clear(); + id_vector_temp.push_back(const_member_out_gl_per_vertex_position); + builder.createStore( + vertex_position_v3, + builder.createAccessChain(spv::StorageClassOutput, out_gl_per_vertex, + id_vector_temp)); + // Clip distances. + for (uint32_t i = 0; i < clip_distance_count; ++i) { + spv::Id const_int_i = builder.makeIntConstant(int32_t(i)); + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back(vertex_indices[0]); + id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance); + id_vector_temp.push_back(const_int_i); + spv::Id vertex_clip_distance_v0 = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[0] = vertex_indices[1]; + spv::Id vertex_clip_distance_v01 = builder.createBinOp( + spv::OpFSub, type_float, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision), + vertex_clip_distance_v0); + builder.addDecoration(vertex_clip_distance_v01, + spv::DecorationNoContraction); + id_vector_temp[0] = vertex_indices[2]; + spv::Id vertex_clip_distance_v3 = builder.createBinOp( + spv::OpFAdd, type_float, vertex_clip_distance_v01, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision)); + builder.addDecoration(vertex_clip_distance_v3, + spv::DecorationNoContraction); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance); + id_vector_temp.push_back(const_int_i); + builder.createStore( + vertex_clip_distance_v3, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + } + // Emit the vertex. + builder.createNoResultOp(spv::OpEmitVertex); + builder.createNoResultOp(spv::OpEndPrimitive); + } break; + + case PipelineGeometryShader::kQuadList: { + // Initialize the point coordinates output for safety if this shader type + // is used with has_point_coordinates for some reason. + spv::Id const_point_coordinates_zero = spv::NoResult; + if (key.has_point_coordinates) { + spv::Id const_float_0 = builder.makeFloatConstant(0.0f); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_float_0); + id_vector_temp.push_back(const_float_0); + const_point_coordinates_zero = builder.makeCompositeConstant( + type_point_coordinates, id_vector_temp); + } + + // Build the triangle strip from the original quad vertices in the + // 0, 1, 3, 2 order (like specified for GL_QUAD_STRIP). + // TODO(Triang3l): Find the correct decomposition of quads into triangles + // on the real hardware. + for (uint32_t i = 0; i < 4; ++i) { + spv::Id const_vertex_index = + builder.makeIntConstant(int32_t(i ^ (i >> 1))); + // Interpolators. + if (key.interpolator_count) { + id_vector_temp.clear(); + id_vector_temp.push_back(const_vertex_index); + builder.createStore( + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_interpolators, id_vector_temp), + spv::NoPrecision), + out_interpolators); + } + // Point coordinates. + if (key.has_point_coordinates) { + builder.createStore(const_point_coordinates_zero, + out_point_coordinates); + } + // Position. + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_vertex_index); + id_vector_temp.push_back(const_member_in_gl_per_vertex_position); + spv::Id vertex_position = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.push_back(const_member_out_gl_per_vertex_position); + builder.createStore( + vertex_position, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + // Clip distances. + if (clip_distance_count) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_vertex_index); + id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance); + spv::Id vertex_clip_distances = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.push_back( + const_member_out_gl_per_vertex_clip_distance); + builder.createStore( + vertex_clip_distances, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + } + // Emit the vertex. + builder.createNoResultOp(spv::OpEmitVertex); + } + builder.createNoResultOp(spv::OpEndPrimitive); + } break; + + default: + assert_unhandled_case(key.type); + } + + // End the main function. + builder.leaveFunction(); + + // Serialize the shader code. + std::vector shader_code; + builder.dump(shader_code); + + // Create the shader module, and store the handle even if creation fails not + // to try to create it again later. + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + VkShaderModule shader_module = ui::vulkan::util::CreateShaderModule( + provider, reinterpret_cast(shader_code.data()), + sizeof(uint32_t) * shader_code.size()); + if (shader_module == VK_NULL_HANDLE) { + XELOGE( + "VulkanPipelineCache: Failed to create the primitive type geometry " + "shader 0x{:08X}", + key.key); + } + geometry_shaders_.emplace(key, shader_module); + return shader_module; +} + +bool VulkanPipelineCache::EnsurePipelineCreated( + const PipelineCreationArguments& creation_arguments) { + if (creation_arguments.pipeline->second.pipeline != VK_NULL_HANDLE) { + return true; + } + + // This function preferably should validate the description to prevent + // unsupported behavior that may be dangerous/crashing because pipelines can + // be created from the disk storage. + + if (creation_arguments.pixel_shader) { + XELOGGPU("Creating graphics pipeline state with VS {:016X}, PS {:016X}", + creation_arguments.vertex_shader->shader().ucode_data_hash(), + creation_arguments.pixel_shader->shader().ucode_data_hash()); } else { - // Back state is identical to front state. - std::memcpy(&state_info.back, &state_info.front, sizeof(VkStencilOpState)); + XELOGGPU("Creating graphics pipeline state with VS {:016X}", + creation_arguments.vertex_shader->shader().ucode_data_hash()); } - // Ignored; set dynamically. - state_info.minDepthBounds = 0; - state_info.maxDepthBounds = 0; - state_info.front.compareMask = 0; - state_info.front.writeMask = 0; - state_info.front.reference = 0; - state_info.back.compareMask = 0; - state_info.back.writeMask = 0; - state_info.back.reference = 0; - - return UpdateStatus::kMismatch; -} - -VulkanPipelineCache::UpdateStatus VulkanPipelineCache::UpdateColorBlendState() { - auto& regs = update_color_blend_state_regs_; - auto& state_info = update_color_blend_state_info_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL0); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL1); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL2); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL3); - dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); - XXH3_64bits_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; + const PipelineDescription& description = creation_arguments.pipeline->first; + if (!ArePipelineRequirementsMet(description)) { + assert_always( + "When creating a new pipeline, the description must not require " + "unsupported features, and when loading the pipeline storage, " + "pipelines with unsupported features must be filtered out"); + return false; } - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - state_info.logicOpEnable = VK_FALSE; - state_info.logicOp = VK_LOGIC_OP_NO_OP; + std::array shader_stages; + uint32_t shader_stage_count = 0; - auto enable_mode = static_cast(regs.rb_modecontrol & 0x7); - - static const VkBlendFactor kBlendFactorMap[] = { - /* 0 */ VK_BLEND_FACTOR_ZERO, - /* 1 */ VK_BLEND_FACTOR_ONE, - /* 2 */ VK_BLEND_FACTOR_ZERO, // ? - /* 3 */ VK_BLEND_FACTOR_ZERO, // ? - /* 4 */ VK_BLEND_FACTOR_SRC_COLOR, - /* 5 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, - /* 6 */ VK_BLEND_FACTOR_SRC_ALPHA, - /* 7 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, - /* 8 */ VK_BLEND_FACTOR_DST_COLOR, - /* 9 */ VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, - /* 10 */ VK_BLEND_FACTOR_DST_ALPHA, - /* 11 */ VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, - /* 12 */ VK_BLEND_FACTOR_CONSTANT_COLOR, - /* 13 */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, - /* 14 */ VK_BLEND_FACTOR_CONSTANT_ALPHA, - /* 15 */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, - /* 16 */ VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, - }; - static const VkBlendOp kBlendOpMap[] = { - /* 0 */ VK_BLEND_OP_ADD, - /* 1 */ VK_BLEND_OP_SUBTRACT, - /* 2 */ VK_BLEND_OP_MIN, - /* 3 */ VK_BLEND_OP_MAX, - /* 4 */ VK_BLEND_OP_REVERSE_SUBTRACT, - }; - auto& attachment_states = update_color_blend_attachment_states_; - for (int i = 0; i < 4; ++i) { - uint32_t blend_control = regs.rb_blendcontrol[i]; - auto& attachment_state = attachment_states[i]; - attachment_state.blendEnable = (blend_control & 0x1FFF1FFF) != 0x00010001; - // A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND - attachment_state.srcColorBlendFactor = - kBlendFactorMap[(blend_control & 0x0000001F) >> 0]; - // A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND - attachment_state.dstColorBlendFactor = - kBlendFactorMap[(blend_control & 0x00001F00) >> 8]; - // A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN - attachment_state.colorBlendOp = - kBlendOpMap[(blend_control & 0x000000E0) >> 5]; - // A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND - attachment_state.srcAlphaBlendFactor = - kBlendFactorMap[(blend_control & 0x001F0000) >> 16]; - // A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND - attachment_state.dstAlphaBlendFactor = - kBlendFactorMap[(blend_control & 0x1F000000) >> 24]; - // A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN - attachment_state.alphaBlendOp = - kBlendOpMap[(blend_control & 0x00E00000) >> 21]; - // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE - // Lines up with VkColorComponentFlagBits, where R=bit 1, G=bit 2, etc.. - uint32_t write_mask = (regs.rb_color_mask >> (i * 4)) & 0xF; - attachment_state.colorWriteMask = - enable_mode == xenos::ModeControl::kColorDepth ? write_mask : 0; + // Vertex or tessellation evaluation shader. + assert_true(creation_arguments.vertex_shader->is_translated()); + if (!creation_arguments.vertex_shader->is_valid()) { + return false; + } + VkPipelineShaderStageCreateInfo& shader_stage_vertex = + shader_stages[shader_stage_count++]; + shader_stage_vertex.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stage_vertex.pNext = nullptr; + shader_stage_vertex.flags = 0; + shader_stage_vertex.stage = VK_SHADER_STAGE_VERTEX_BIT; + shader_stage_vertex.module = + creation_arguments.vertex_shader->shader_module(); + assert_true(shader_stage_vertex.module != VK_NULL_HANDLE); + shader_stage_vertex.pName = "main"; + shader_stage_vertex.pSpecializationInfo = nullptr; + // Geometry shader. + if (creation_arguments.geometry_shader != VK_NULL_HANDLE) { + VkPipelineShaderStageCreateInfo& shader_stage_geometry = + shader_stages[shader_stage_count++]; + shader_stage_geometry.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stage_geometry.pNext = nullptr; + shader_stage_geometry.flags = 0; + shader_stage_geometry.stage = VK_SHADER_STAGE_GEOMETRY_BIT; + shader_stage_geometry.module = creation_arguments.geometry_shader; + shader_stage_geometry.pName = "main"; + shader_stage_geometry.pSpecializationInfo = nullptr; + } + // Pixel shader. + if (creation_arguments.pixel_shader) { + assert_true(creation_arguments.pixel_shader->is_translated()); + if (!creation_arguments.pixel_shader->is_valid()) { + return false; + } + VkPipelineShaderStageCreateInfo& shader_stage_fragment = + shader_stages[shader_stage_count++]; + shader_stage_fragment.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stage_fragment.pNext = nullptr; + shader_stage_fragment.flags = 0; + shader_stage_fragment.stage = VK_SHADER_STAGE_FRAGMENT_BIT; + shader_stage_fragment.module = + creation_arguments.pixel_shader->shader_module(); + assert_true(shader_stage_fragment.module != VK_NULL_HANDLE); + shader_stage_fragment.pName = "main"; + shader_stage_fragment.pSpecializationInfo = nullptr; } - state_info.attachmentCount = 4; - state_info.pAttachments = attachment_states; + VkPipelineVertexInputStateCreateInfo vertex_input_state = {}; + vertex_input_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - // Ignored; set dynamically. - state_info.blendConstants[0] = 0.0f; - state_info.blendConstants[1] = 0.0f; - state_info.blendConstants[2] = 0.0f; - state_info.blendConstants[3] = 0.0f; + VkPipelineInputAssemblyStateCreateInfo input_assembly_state; + input_assembly_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_state.pNext = nullptr; + input_assembly_state.flags = 0; + switch (description.primitive_topology) { + case PipelinePrimitiveTopology::kPointList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } + break; + case PipelinePrimitiveTopology::kLineList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } + break; + case PipelinePrimitiveTopology::kLineStrip: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + break; + case PipelinePrimitiveTopology::kTriangleList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } + break; + case PipelinePrimitiveTopology::kTriangleStrip: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + break; + case PipelinePrimitiveTopology::kTriangleFan: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + break; + case PipelinePrimitiveTopology::kLineListWithAdjacency: + input_assembly_state.topology = + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } + break; + case PipelinePrimitiveTopology::kPatchList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } + break; + default: + assert_unhandled_case(description.primitive_topology); + return false; + } + input_assembly_state.primitiveRestartEnable = + description.primitive_restart ? VK_TRUE : VK_FALSE; - return UpdateStatus::kMismatch; + VkPipelineViewportStateCreateInfo viewport_state; + viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state.pNext = nullptr; + viewport_state.flags = 0; + viewport_state.viewportCount = 1; + viewport_state.pViewports = nullptr; + viewport_state.scissorCount = 1; + viewport_state.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_state = {}; + rasterization_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_state.depthClampEnable = + description.depth_clamp_enable ? VK_TRUE : VK_FALSE; + switch (description.polygon_mode) { + case PipelinePolygonMode::kFill: + rasterization_state.polygonMode = VK_POLYGON_MODE_FILL; + break; + case PipelinePolygonMode::kLine: + rasterization_state.polygonMode = VK_POLYGON_MODE_LINE; + break; + case PipelinePolygonMode::kPoint: + rasterization_state.polygonMode = VK_POLYGON_MODE_POINT; + break; + default: + assert_unhandled_case(description.polygon_mode); + return false; + } + rasterization_state.cullMode = VK_CULL_MODE_NONE; + if (description.cull_front) { + rasterization_state.cullMode |= VK_CULL_MODE_FRONT_BIT; + } + if (description.cull_back) { + rasterization_state.cullMode |= VK_CULL_MODE_BACK_BIT; + } + rasterization_state.frontFace = description.front_face_clockwise + ? VK_FRONT_FACE_CLOCKWISE + : VK_FRONT_FACE_COUNTER_CLOCKWISE; + // Depth bias is dynamic (even toggling - pipeline creation is expensive). + // "If no depth attachment is present, r is undefined" in the depth bias + // formula, though Z has no effect on anything if a depth attachment is not + // used (the guest shader can't access Z), enabling only when there's a + // depth / stencil attachment for correctness. + // TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB + // implementation. + rasterization_state.depthBiasEnable = + (description.render_pass_key.depth_and_color_used & 0b1) ? VK_TRUE + : VK_FALSE; + // TODO(Triang3l): Wide lines. + rasterization_state.lineWidth = 1.0f; + + VkSampleMask sample_mask = UINT32_MAX; + VkPipelineMultisampleStateCreateInfo multisample_state = {}; + multisample_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + if (description.render_pass_key.msaa_samples == xenos::MsaaSamples::k2X && + !render_target_cache_.IsMsaa2xSupported( + description.render_pass_key.depth_and_color_used != 0)) { + // Using sample 0 as 0 and 3 as 1 for 2x instead (not exactly the same + // sample locations, but still top-left and bottom-right - however, this can + // be adjusted with custom sample locations). + multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT; + sample_mask = 0b1001; + // TODO(Triang3l): Research sample mask behavior without attachments (in + // Direct3D, it's completely ignored in this case). + multisample_state.pSampleMask = &sample_mask; + } else { + multisample_state.rasterizationSamples = VkSampleCountFlagBits( + uint32_t(1) << uint32_t(description.render_pass_key.msaa_samples)); + } + + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {}; + depth_stencil_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depth_stencil_state.pNext = nullptr; + if (description.depth_write_enable || + description.depth_compare_op != xenos::CompareFunction::kAlways) { + depth_stencil_state.depthTestEnable = VK_TRUE; + depth_stencil_state.depthWriteEnable = + description.depth_write_enable ? VK_TRUE : VK_FALSE; + depth_stencil_state.depthCompareOp = VkCompareOp( + uint32_t(VK_COMPARE_OP_NEVER) + uint32_t(description.depth_compare_op)); + } + if (description.stencil_test_enable) { + depth_stencil_state.stencilTestEnable = VK_TRUE; + depth_stencil_state.front.failOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_fail_op)); + depth_stencil_state.front.passOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_pass_op)); + depth_stencil_state.front.depthFailOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_depth_fail_op)); + depth_stencil_state.front.compareOp = + VkCompareOp(uint32_t(VK_COMPARE_OP_NEVER) + + uint32_t(description.stencil_front_compare_op)); + depth_stencil_state.back.failOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_fail_op)); + depth_stencil_state.back.passOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_pass_op)); + depth_stencil_state.back.depthFailOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_depth_fail_op)); + depth_stencil_state.back.compareOp = + VkCompareOp(uint32_t(VK_COMPARE_OP_NEVER) + + uint32_t(description.stencil_back_compare_op)); + } + + VkPipelineColorBlendAttachmentState + color_blend_attachments[xenos::kMaxColorRenderTargets] = {}; + uint32_t color_rts_used = + description.render_pass_key.depth_and_color_used >> 1; + { + static const VkBlendFactor kBlendFactorMap[] = { + VK_BLEND_FACTOR_ZERO, + VK_BLEND_FACTOR_ONE, + VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, + VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, + VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, + VK_BLEND_FACTOR_CONSTANT_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + VK_BLEND_FACTOR_CONSTANT_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, + }; + // 8 entries for safety since 3 bits from the guest are passed directly. + static const VkBlendOp kBlendOpMap[] = {VK_BLEND_OP_ADD, + VK_BLEND_OP_SUBTRACT, + VK_BLEND_OP_MIN, + VK_BLEND_OP_MAX, + VK_BLEND_OP_REVERSE_SUBTRACT, + VK_BLEND_OP_ADD, + VK_BLEND_OP_ADD, + VK_BLEND_OP_ADD}; + uint32_t color_rts_remaining = color_rts_used; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + VkPipelineColorBlendAttachmentState& color_blend_attachment = + color_blend_attachments[color_rt_index]; + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor != PipelineBlendFactor::kOne || + color_rt.dst_color_blend_factor != PipelineBlendFactor::kZero || + color_rt.color_blend_op != xenos::BlendOp::kAdd || + color_rt.src_alpha_blend_factor != PipelineBlendFactor::kOne || + color_rt.dst_alpha_blend_factor != PipelineBlendFactor::kZero || + color_rt.alpha_blend_op != xenos::BlendOp::kAdd) { + color_blend_attachment.blendEnable = VK_TRUE; + color_blend_attachment.srcColorBlendFactor = + kBlendFactorMap[uint32_t(color_rt.src_color_blend_factor)]; + color_blend_attachment.dstColorBlendFactor = + kBlendFactorMap[uint32_t(color_rt.dst_color_blend_factor)]; + color_blend_attachment.colorBlendOp = + kBlendOpMap[uint32_t(color_rt.color_blend_op)]; + color_blend_attachment.srcAlphaBlendFactor = + kBlendFactorMap[uint32_t(color_rt.src_alpha_blend_factor)]; + color_blend_attachment.dstAlphaBlendFactor = + kBlendFactorMap[uint32_t(color_rt.dst_alpha_blend_factor)]; + color_blend_attachment.alphaBlendOp = + kBlendOpMap[uint32_t(color_rt.alpha_blend_op)]; + } + color_blend_attachment.colorWriteMask = + VkColorComponentFlags(color_rt.color_write_mask); + if (!device_features.independentBlend) { + // For non-independent blend, the pAttachments element for the first + // actually used color will be replicated into all. + break; + } + } + } + VkPipelineColorBlendStateCreateInfo color_blend_state = {}; + color_blend_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_state.attachmentCount = 32 - xe::lzcnt(color_rts_used); + color_blend_state.pAttachments = color_blend_attachments; + if (color_rts_used && !device_features.independentBlend) { + // "If the independent blending feature is not enabled, all elements of + // pAttachments must be identical." + uint32_t first_color_rt_index; + xe::bit_scan_forward(color_rts_used, &first_color_rt_index); + for (uint32_t i = 0; i < color_blend_state.attachmentCount; ++i) { + if (i == first_color_rt_index) { + continue; + } + color_blend_attachments[i] = + color_blend_attachments[first_color_rt_index]; + } + } + + std::array dynamic_states; + VkPipelineDynamicStateCreateInfo dynamic_state; + dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state.pNext = nullptr; + dynamic_state.flags = 0; + dynamic_state.dynamicStateCount = 0; + dynamic_state.pDynamicStates = dynamic_states.data(); + // Regardless of whether some of this state actually has any effect on the + // pipeline, marking all as dynamic because otherwise, binding any pipeline + // with such state not marked as dynamic will cause the dynamic state to be + // invalidated (again, even if it has no effect). + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_DEPTH_BIAS; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_BLEND_CONSTANTS; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_REFERENCE; + + VkGraphicsPipelineCreateInfo pipeline_create_info; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_create_info.pNext = nullptr; + pipeline_create_info.flags = 0; + pipeline_create_info.stageCount = shader_stage_count; + pipeline_create_info.pStages = shader_stages.data(); + pipeline_create_info.pVertexInputState = &vertex_input_state; + pipeline_create_info.pInputAssemblyState = &input_assembly_state; + pipeline_create_info.pTessellationState = nullptr; + pipeline_create_info.pViewportState = &viewport_state; + pipeline_create_info.pRasterizationState = &rasterization_state; + pipeline_create_info.pMultisampleState = &multisample_state; + pipeline_create_info.pDepthStencilState = &depth_stencil_state; + pipeline_create_info.pColorBlendState = &color_blend_state; + pipeline_create_info.pDynamicState = &dynamic_state; + pipeline_create_info.layout = + creation_arguments.pipeline->second.pipeline_layout->GetPipelineLayout(); + pipeline_create_info.renderPass = creation_arguments.render_pass; + pipeline_create_info.subpass = 0; + pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_create_info.basePipelineIndex = -1; + + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkPipeline pipeline; + if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipeline) != VK_SUCCESS) { + // TODO(Triang3l): Move these error messages outside. + /* if (creation_arguments.pixel_shader) { + XELOGE( + "Failed to create graphics pipeline with VS {:016X}, PS {:016X}", + creation_arguments.vertex_shader->shader().ucode_data_hash(), + creation_arguments.pixel_shader->shader().ucode_data_hash()); + } else { + XELOGE("Failed to create graphics pipeline with VS {:016X}", + creation_arguments.vertex_shader->shader().ucode_data_hash()); + } */ + return false; + } + creation_arguments.pipeline->second.pipeline = pipeline; + return true; } } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 77afd9a9b..141d756c8 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -2,312 +2,322 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ -#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_CACHE_H_ -#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_CACHE_H_ +#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_ +#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_ +#include +#include +#include +#include #include +#include -#include "xenia/base/string_buffer.h" +#include "xenia/base/hash.h" +#include "xenia/base/platform.h" #include "xenia/base/xxhash.h" +#include "xenia/gpu/primitive_processor.h" #include "xenia/gpu/register_file.h" +#include "xenia/gpu/registers.h" #include "xenia/gpu/spirv_shader_translator.h" -#include "xenia/gpu/vulkan/render_cache.h" +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/xenos.h" -#include "xenia/ui/spirv/spirv_disassembler.h" #include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { namespace gpu { namespace vulkan { -// Configures and caches pipelines based on render state. -// This is responsible for properly setting all state required for a draw -// including shaders, various blend/etc options, and input configuration. +class VulkanCommandProcessor; + +// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D +// implementations. class VulkanPipelineCache { public: - enum class UpdateStatus { - kCompatible, - kMismatch, - kError, + static constexpr size_t kLayoutUIDEmpty = 0; + + class PipelineLayoutProvider { + public: + virtual ~PipelineLayoutProvider() {} + virtual VkPipelineLayout GetPipelineLayout() const = 0; + + protected: + PipelineLayoutProvider() = default; }; - VulkanPipelineCache(RegisterFile* register_file, - const ui::vulkan::VulkanProvider& provider); + VulkanPipelineCache(VulkanCommandProcessor& command_processor, + const RegisterFile& register_file, + VulkanRenderTargetCache& render_target_cache, + VkShaderStageFlags guest_shader_vertex_stages); ~VulkanPipelineCache(); - VkResult Initialize(VkDescriptorSetLayout uniform_descriptor_set_layout, - VkDescriptorSetLayout texture_descriptor_set_layout, - VkDescriptorSetLayout vertex_descriptor_set_layout); + bool Initialize(); void Shutdown(); - // Loads a shader from the cache, possibly translating it. VulkanShader* LoadShader(xenos::ShaderType shader_type, - uint32_t guest_address, const uint32_t* host_address, - uint32_t dword_count); + const uint32_t* host_address, uint32_t dword_count); + // Analyze shader microcode on the translator thread. + void AnalyzeShaderUcode(Shader& shader) { + shader.AnalyzeUcode(ucode_disasm_buffer_); + } - // Configures a pipeline using the current render state and the given render - // pass. If a previously available pipeline is available it will be used, - // otherwise a new one may be created. Any state that can be set dynamically - // in the command buffer is issued at this time. - // Returns whether the pipeline could be successfully created. - UpdateStatus ConfigurePipeline(VkCommandBuffer command_buffer, - const RenderState* render_state, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type, - VkPipeline* pipeline_out); + // Retrieves the shader modification for the current state. The shader must + // have microcode analyzed. + SpirvShaderTranslator::Modification GetCurrentVertexShaderModification( + const Shader& shader, + Shader::HostVertexShaderType host_vertex_shader_type) const; + SpirvShaderTranslator::Modification GetCurrentPixelShaderModification( + const Shader& shader, uint32_t normalized_color_mask) const; - // Sets required dynamic state on the command buffer. - // Only state that has changed since the last call will be set unless - // full_update is true. - bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update); - - // Pipeline layout shared by all pipelines. - VkPipelineLayout pipeline_layout() const { return pipeline_layout_; } - - // Clears all cached content. - void ClearCache(); + bool EnsureShadersTranslated(VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader); + // TODO(Triang3l): Return a deferred creation handle. + bool ConfigurePipeline( + VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t normalized_color_mask, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out); private: - // Creates or retrieves an existing pipeline for the currently configured - // state. - VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key); + enum class PipelineGeometryShader : uint32_t { + kNone, + kRectangleList, + kQuadList, + }; - bool TranslateShader(VulkanShader::VulkanTranslation& translation); + enum class PipelinePrimitiveTopology : uint32_t { + kPointList, + kLineList, + kLineStrip, + kTriangleList, + kTriangleStrip, + kTriangleFan, + kLineListWithAdjacency, + kPatchList, + }; - void DumpShaderDisasmAMD(VkPipeline pipeline); - void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info); + enum class PipelinePolygonMode : uint32_t { + kFill, + kLine, + kPoint, + }; - // Gets a geometry shader used to emulate the given primitive type. - // Returns nullptr if the primitive doesn't need to be emulated. - VkShaderModule GetGeometryShader(xenos::PrimitiveType primitive_type, - bool is_line_mode); + enum class PipelineBlendFactor : uint32_t { + kZero, + kOne, + kSrcColor, + kOneMinusSrcColor, + kDstColor, + kOneMinusDstColor, + kSrcAlpha, + kOneMinusSrcAlpha, + kDstAlpha, + kOneMinusDstAlpha, + kConstantColor, + kOneMinusConstantColor, + kConstantAlpha, + kOneMinusConstantAlpha, + kSrcAlphaSaturate, + }; - RegisterFile* register_file_ = nullptr; - const ui::vulkan::VulkanProvider& provider_; + // Update PipelineDescription::kVersion if anything is changed! + XEPACKEDSTRUCT(PipelineRenderTarget, { + PipelineBlendFactor src_color_blend_factor : 4; // 4 + PipelineBlendFactor dst_color_blend_factor : 4; // 8 + xenos::BlendOp color_blend_op : 3; // 11 + PipelineBlendFactor src_alpha_blend_factor : 4; // 15 + PipelineBlendFactor dst_alpha_blend_factor : 4; // 19 + xenos::BlendOp alpha_blend_op : 3; // 22 + uint32_t color_write_mask : 4; // 26 + }); - // Temporary storage for AnalyzeUcode calls. + XEPACKEDSTRUCT(PipelineDescription, { + uint64_t vertex_shader_hash; + uint64_t vertex_shader_modification; + // 0 if no pixel shader. + uint64_t pixel_shader_hash; + uint64_t pixel_shader_modification; + VulkanRenderTargetCache::RenderPassKey render_pass_key; + + // Shader stages. + PipelineGeometryShader geometry_shader : 2; // 2 + // Input assembly. + PipelinePrimitiveTopology primitive_topology : 3; // 5 + uint32_t primitive_restart : 1; // 6 + // Rasterization. + uint32_t depth_clamp_enable : 1; // 7 + PipelinePolygonMode polygon_mode : 2; // 9 + uint32_t cull_front : 1; // 10 + uint32_t cull_back : 1; // 11 + uint32_t front_face_clockwise : 1; // 12 + // Depth / stencil. + uint32_t depth_write_enable : 1; // 13 + xenos::CompareFunction depth_compare_op : 3; // 15 + uint32_t stencil_test_enable : 1; // 17 + xenos::StencilOp stencil_front_fail_op : 3; // 20 + xenos::StencilOp stencil_front_pass_op : 3; // 23 + xenos::StencilOp stencil_front_depth_fail_op : 3; // 26 + xenos::CompareFunction stencil_front_compare_op : 3; // 29 + xenos::StencilOp stencil_back_fail_op : 3; // 32 + + xenos::StencilOp stencil_back_pass_op : 3; // 3 + xenos::StencilOp stencil_back_depth_fail_op : 3; // 6 + xenos::CompareFunction stencil_back_compare_op : 3; // 9 + + // Filled only for the attachments present in the render pass object. + PipelineRenderTarget render_targets[xenos::kMaxColorRenderTargets]; + + // Including all the padding, for a stable hash. + PipelineDescription() { Reset(); } + PipelineDescription(const PipelineDescription& description) { + std::memcpy(this, &description, sizeof(*this)); + } + PipelineDescription& operator=(const PipelineDescription& description) { + std::memcpy(this, &description, sizeof(*this)); + return *this; + } + bool operator==(const PipelineDescription& description) const { + return std::memcmp(this, &description, sizeof(*this)) == 0; + } + void Reset() { std::memset(this, 0, sizeof(*this)); } + uint64_t GetHash() const { return XXH3_64bits(this, sizeof(*this)); } + struct Hasher { + size_t operator()(const PipelineDescription& description) const { + return size_t(description.GetHash()); + } + }; + }); + + struct Pipeline { + VkPipeline pipeline = VK_NULL_HANDLE; + // The layouts are owned by the VulkanCommandProcessor, and must not be + // destroyed by it while the pipeline cache is active. + const PipelineLayoutProvider* pipeline_layout; + Pipeline(const PipelineLayoutProvider* pipeline_layout_provider) + : pipeline_layout(pipeline_layout_provider) {} + }; + + // Description that can be passed from the command processor thread to the + // creation threads, with everything needed from caches pre-looked-up. + struct PipelineCreationArguments { + std::pair* pipeline; + const VulkanShader::VulkanTranslation* vertex_shader; + const VulkanShader::VulkanTranslation* pixel_shader; + VkShaderModule geometry_shader; + VkRenderPass render_pass; + }; + + union GeometryShaderKey { + uint32_t key; + struct { + PipelineGeometryShader type : 2; + uint32_t interpolator_count : 5; + uint32_t user_clip_plane_count : 3; + uint32_t user_clip_plane_cull : 1; + uint32_t has_vertex_kill_and : 1; + uint32_t has_point_size : 1; + uint32_t has_point_coordinates : 1; + }; + + GeometryShaderKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const GeometryShaderKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const GeometryShaderKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const GeometryShaderKey& other_key) const { + return !(*this == other_key); + } + }; + + // Can be called from multiple threads. + bool TranslateAnalyzedShader(SpirvShaderTranslator& translator, + VulkanShader::VulkanTranslation& translation); + + void WritePipelineRenderTargetDescription( + reg::RB_BLENDCONTROL blend_control, uint32_t write_mask, + PipelineRenderTarget& render_target_out) const; + bool GetCurrentStateDescription( + const VulkanShader::VulkanTranslation* vertex_shader, + const VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t normalized_color_mask, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + PipelineDescription& description_out) const; + + // Whether the pipeline for the given description is supported by the device. + bool ArePipelineRequirementsMet(const PipelineDescription& description) const; + + static bool GetGeometryShaderKey(PipelineGeometryShader geometry_shader_type, + GeometryShaderKey& key_out); + VkShaderModule GetGeometryShader(GeometryShaderKey key); + + // Can be called from creation threads - all needed data must be fully set up + // at the point of the call: shaders must be translated, pipeline layout and + // render pass objects must be available. + bool EnsurePipelineCreated( + const PipelineCreationArguments& creation_arguments); + + VulkanCommandProcessor& command_processor_; + const RegisterFile& register_file_; + VulkanRenderTargetCache& render_target_cache_; + VkShaderStageFlags guest_shader_vertex_stages_; + + // Temporary storage for AnalyzeUcode calls on the processor thread. StringBuffer ucode_disasm_buffer_; - // Reusable shader translator. - std::unique_ptr shader_translator_ = nullptr; - // Disassembler used to get the SPIRV disasm. Only used in debug. - xe::ui::spirv::SpirvDisassembler disassembler_; - // All loaded shaders mapped by their guest hash key. - std::unordered_map shader_map_; + // Reusable shader translator on the command processor thread. + std::unique_ptr shader_translator_; - // Vulkan pipeline cache, which in theory helps us out. - // This can be serialized to disk and reused, if we want. - VkPipelineCache pipeline_cache_ = nullptr; - // Layout used for all pipelines describing our uniforms, textures, and push - // constants. - VkPipelineLayout pipeline_layout_ = nullptr; + struct LayoutUID { + size_t uid; + size_t vector_span_offset; + size_t vector_span_length; + }; + std::mutex layouts_mutex_; + // Texture binding layouts of different shaders, for obtaining layout UIDs. + std::vector texture_binding_layouts_; + // Map of texture binding layouts used by shaders, for obtaining UIDs. Keys + // are XXH3 hashes of layouts, values need manual collision resolution using + // layout_vector_offset:layout_length of texture_binding_layouts_. + std::unordered_multimap> + texture_binding_layout_map_; - // Shared geometry shaders. - struct { - VkShaderModule line_quad_list; - VkShaderModule point_list; - VkShaderModule quad_list; - VkShaderModule rect_list; - } geometry_shaders_; + // Ucode hash -> shader. + std::unordered_map> + shaders_; - // Shared dummy pixel shader. - VkShaderModule dummy_pixel_shader_; + // Geometry shaders for Xenos primitive types not supported by Vulkan. + // Stores VK_NULL_HANDLE if failed to create. + std::unordered_map + geometry_shaders_; - // Hash state used to incrementally produce pipeline hashes during update. - // By the time the full update pass has run the hash will represent the - // current state in a way that can uniquely identify the produced VkPipeline. - XXH3_state_t hash_state_; - // All previously generated pipelines mapped by hash. - std::unordered_map cached_pipelines_; + std::unordered_map + pipelines_; - // Previously used pipeline. This matches our current state settings - // and allows us to quickly(ish) reuse the pipeline if no registers have - // changed. - VkPipeline current_pipeline_ = nullptr; - - private: - UpdateStatus UpdateState(VulkanShader* vertex_shader, - VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type); - - UpdateStatus UpdateRenderTargetState(); - UpdateStatus UpdateShaderStages(VulkanShader* vertex_shader, - VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type); - UpdateStatus UpdateVertexInputState(VulkanShader* vertex_shader); - UpdateStatus UpdateInputAssemblyState(xenos::PrimitiveType primitive_type); - UpdateStatus UpdateViewportState(); - UpdateStatus UpdateRasterizationState(xenos::PrimitiveType primitive_type); - UpdateStatus UpdateMultisampleState(); - UpdateStatus UpdateDepthStencilState(); - UpdateStatus UpdateColorBlendState(); - - bool SetShadowRegister(uint32_t* dest, uint32_t register_name); - bool SetShadowRegister(float* dest, uint32_t register_name); - bool SetShadowRegisterArray(uint32_t* dest, uint32_t num, - uint32_t register_name); - - struct UpdateRenderTargetsRegisters { - uint32_t rb_modecontrol; - reg::RB_SURFACE_INFO rb_surface_info; - reg::RB_COLOR_INFO rb_color_info; - reg::RB_DEPTH_INFO rb_depth_info; - reg::RB_COLOR_INFO rb_color1_info; - reg::RB_COLOR_INFO rb_color2_info; - reg::RB_COLOR_INFO rb_color3_info; - uint32_t rb_color_mask; - uint32_t rb_depthcontrol; - uint32_t rb_stencilrefmask; - - UpdateRenderTargetsRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_render_targets_regs_; - - struct UpdateShaderStagesRegisters { - xenos::PrimitiveType primitive_type; - uint32_t pa_su_sc_mode_cntl; - reg::SQ_PROGRAM_CNTL sq_program_cntl; - VulkanShader* vertex_shader; - VulkanShader* pixel_shader; - - UpdateShaderStagesRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_shader_stages_regs_; - VkPipelineShaderStageCreateInfo update_shader_stages_info_[3]; - uint32_t update_shader_stages_stage_count_ = 0; - - struct UpdateVertexInputStateRegisters { - VulkanShader* vertex_shader; - - UpdateVertexInputStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_vertex_input_state_regs_; - VkPipelineVertexInputStateCreateInfo update_vertex_input_state_info_; - VkVertexInputBindingDescription update_vertex_input_state_binding_descrs_[32]; - VkVertexInputAttributeDescription - update_vertex_input_state_attrib_descrs_[96]; - - struct UpdateInputAssemblyStateRegisters { - xenos::PrimitiveType primitive_type; - uint32_t pa_su_sc_mode_cntl; - uint32_t multi_prim_ib_reset_index; - - UpdateInputAssemblyStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_input_assembly_state_regs_; - VkPipelineInputAssemblyStateCreateInfo update_input_assembly_state_info_; - - struct UpdateViewportStateRegisters { - // uint32_t pa_cl_clip_cntl; - uint32_t rb_surface_info; - uint32_t pa_cl_vte_cntl; - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_window_offset; - uint32_t pa_sc_window_scissor_tl; - uint32_t pa_sc_window_scissor_br; - float pa_cl_vport_xoffset; - float pa_cl_vport_yoffset; - float pa_cl_vport_zoffset; - float pa_cl_vport_xscale; - float pa_cl_vport_yscale; - float pa_cl_vport_zscale; - - UpdateViewportStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_viewport_state_regs_; - VkPipelineViewportStateCreateInfo update_viewport_state_info_; - - struct UpdateRasterizationStateRegisters { - xenos::PrimitiveType primitive_type; - uint32_t pa_cl_clip_cntl; - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_screen_scissor_tl; - uint32_t pa_sc_screen_scissor_br; - uint32_t pa_sc_viz_query; - uint32_t pa_su_poly_offset_enable; - uint32_t multi_prim_ib_reset_index; - - UpdateRasterizationStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_rasterization_state_regs_; - VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_; - - struct UpdateMultisampleStateeRegisters { - uint32_t pa_sc_aa_config; - uint32_t pa_su_sc_mode_cntl; - uint32_t rb_surface_info; - - UpdateMultisampleStateeRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_multisample_state_regs_; - VkPipelineMultisampleStateCreateInfo update_multisample_state_info_; - - struct UpdateDepthStencilStateRegisters { - uint32_t rb_depthcontrol; - uint32_t rb_stencilrefmask; - - UpdateDepthStencilStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_depth_stencil_state_regs_; - VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_; - - struct UpdateColorBlendStateRegisters { - uint32_t rb_color_mask; - uint32_t rb_blendcontrol[4]; - uint32_t rb_modecontrol; - - UpdateColorBlendStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_color_blend_state_regs_; - VkPipelineColorBlendStateCreateInfo update_color_blend_state_info_; - VkPipelineColorBlendAttachmentState update_color_blend_attachment_states_[4]; - - struct SetDynamicStateRegisters { - uint32_t pa_sc_window_offset; - - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_window_scissor_tl; - uint32_t pa_sc_window_scissor_br; - - uint32_t rb_surface_info; - uint32_t pa_su_sc_vtx_cntl; - // Bias is in Vulkan units because depth format may potentially effect it. - float pa_su_poly_offset_scale; - float pa_su_poly_offset_offset; - uint32_t pa_cl_vte_cntl; - float pa_cl_vport_xoffset; - float pa_cl_vport_yoffset; - float pa_cl_vport_zoffset; - float pa_cl_vport_xscale; - float pa_cl_vport_yscale; - float pa_cl_vport_zscale; - - float rb_blend_rgba[4]; - uint32_t rb_stencilrefmask; - - reg::SQ_PROGRAM_CNTL sq_program_cntl; - uint32_t sq_context_misc; - uint32_t rb_colorcontrol; - reg::RB_COLOR_INFO rb_color_info; - reg::RB_COLOR_INFO rb_color1_info; - reg::RB_COLOR_INFO rb_color2_info; - reg::RB_COLOR_INFO rb_color3_info; - float rb_alpha_ref; - uint32_t pa_su_point_size; - - SetDynamicStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } set_dynamic_state_registers_; + // Previously used pipeline, to avoid lookups if the state wasn't changed. + const std::pair* last_pipeline_ = + nullptr; }; } // namespace vulkan } // namespace gpu } // namespace xe -#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_CACHE_H_ +#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc new file mode 100644 index 000000000..058b6a5d1 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -0,0 +1,229 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2021 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_primitive_processor.h" + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); } + +bool VulkanPrimitiveProcessor::Initialize() { + // TODO(Triang3l): fullDrawIndexUint32 feature check and indirect index fetch. + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (!InitializeCommon(true, + !device_portability_subset_features || + device_portability_subset_features->triangleFans, + false, device_features.geometryShader)) { + Shutdown(); + return false; + } + frame_index_buffer_pool_ = + std::make_unique( + command_processor_.GetVulkanProvider(), + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + std::max(size_t(kMinRequiredConvertedIndexBufferSize), + ui::GraphicsUploadBufferPool::kDefaultPageSize)); + return true; +} + +void VulkanPrimitiveProcessor::Shutdown(bool from_destructor) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + frame_index_buffers_.clear(); + frame_index_buffer_pool_.reset(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_upload_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_upload_memory_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_memory_); + + if (!from_destructor) { + ShutdownCommon(); + } +} + +void VulkanPrimitiveProcessor::CompletedSubmissionUpdated() { + if (builtin_index_buffer_upload_ != VK_NULL_HANDLE && + command_processor_.GetCompletedSubmission() >= + builtin_index_buffer_upload_submission_) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_upload_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_upload_memory_); + } +} + +void VulkanPrimitiveProcessor::BeginSubmission() { + if (builtin_index_buffer_upload_ != VK_NULL_HANDLE && + builtin_index_buffer_upload_submission_ == UINT64_MAX) { + // No need to submit deferred barriers - builtin_index_buffer_ has never + // been used yet, and builtin_index_buffer_upload_ is written before + // submitting commands reading it. + + command_processor_.EndRenderPass(); + + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + VkBufferCopy* copy_region = command_buffer.CmdCopyBufferEmplace( + builtin_index_buffer_upload_, builtin_index_buffer_, 1); + copy_region->srcOffset = 0; + copy_region->dstOffset = 0; + copy_region->size = builtin_index_buffer_size_; + + command_processor_.PushBufferMemoryBarrier( + builtin_index_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_INDEX_READ_BIT); + + builtin_index_buffer_upload_submission_ = + command_processor_.GetCurrentSubmission(); + } +} + +void VulkanPrimitiveProcessor::BeginFrame() { + frame_index_buffer_pool_->Reclaim(command_processor_.GetCompletedFrame()); +} + +void VulkanPrimitiveProcessor::EndSubmission() { + frame_index_buffer_pool_->FlushWrites(); +} + +void VulkanPrimitiveProcessor::EndFrame() { + ClearPerFrameCache(); + frame_index_buffers_.clear(); +} + +bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer( + uint32_t index_count, std::function fill_callback) { + assert_not_zero(index_count); + assert_true(builtin_index_buffer_ == VK_NULL_HANDLE); + assert_true(builtin_index_buffer_memory_ == VK_NULL_HANDLE); + assert_true(builtin_index_buffer_upload_ == VK_NULL_HANDLE); + assert_true(builtin_index_buffer_upload_memory_ == VK_NULL_HANDLE); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + builtin_index_buffer_size_ = VkDeviceSize(sizeof(uint16_t) * index_count); + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, builtin_index_buffer_size_, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, builtin_index_buffer_, + builtin_index_buffer_memory_)) { + XELOGE( + "Vulkan primitive processor: Failed to create the built-in index " + "buffer GPU resource with {} 16-bit indices", + index_count); + return false; + } + uint32_t upload_memory_type; + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, builtin_index_buffer_size_, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + ui::vulkan::util::MemoryPurpose::kUpload, + builtin_index_buffer_upload_, builtin_index_buffer_upload_memory_, + &upload_memory_type)) { + XELOGE( + "Vulkan primitive processor: Failed to create the built-in index " + "buffer upload resource with {} 16-bit indices", + index_count); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_memory_); + return false; + } + + void* mapping; + if (dfn.vkMapMemory(device, builtin_index_buffer_upload_memory_, 0, + VK_WHOLE_SIZE, 0, &mapping) != VK_SUCCESS) { + XELOGE( + "Vulkan primitive processor: Failed to map the built-in index buffer " + "upload resource with {} 16-bit indices", + index_count); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_upload_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_upload_memory_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_memory_); + return false; + } + fill_callback(reinterpret_cast(mapping)); + ui::vulkan::util::FlushMappedMemoryRange( + provider, builtin_index_buffer_memory_, upload_memory_type); + dfn.vkUnmapMemory(device, builtin_index_buffer_upload_memory_); + + // Schedule uploading in the first submission. + builtin_index_buffer_upload_submission_ = UINT64_MAX; + return true; +} + +void* VulkanPrimitiveProcessor::RequestHostConvertedIndexBufferForCurrentFrame( + xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd, + uint32_t coalignment_original_address, size_t& backend_handle_out) { + size_t index_size = format == xenos::IndexFormat::kInt16 ? sizeof(uint16_t) + : sizeof(uint32_t); + VkBuffer buffer; + VkDeviceSize offset; + uint8_t* mapping = frame_index_buffer_pool_->Request( + command_processor_.GetCurrentFrame(), + index_size * index_count + + (coalign_for_simd ? XE_GPU_PRIMITIVE_PROCESSOR_SIMD_SIZE : 0), + index_size, buffer, offset); + if (!mapping) { + return nullptr; + } + if (coalign_for_simd) { + ptrdiff_t coalignment_offset = + GetSimdCoalignmentOffset(mapping, coalignment_original_address); + mapping += coalignment_offset; + offset = VkDeviceSize(offset + coalignment_offset); + } + backend_handle_out = frame_index_buffers_.size(); + frame_index_buffers_.emplace_back(buffer, offset); + return mapping; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.h b/src/xenia/gpu/vulkan/vulkan_primitive_processor.h new file mode 100644 index 000000000..50e729577 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.h @@ -0,0 +1,92 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2021 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_ +#define XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_ + +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/gpu/primitive_processor.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class VulkanPrimitiveProcessor final : public PrimitiveProcessor { + public: + VulkanPrimitiveProcessor(const RegisterFile& register_file, Memory& memory, + TraceWriter& trace_writer, + SharedMemory& shared_memory, + VulkanCommandProcessor& command_processor) + : PrimitiveProcessor(register_file, memory, trace_writer, shared_memory), + command_processor_(command_processor) {} + ~VulkanPrimitiveProcessor(); + + bool Initialize(); + void Shutdown(bool from_destructor = false); + void ClearCache() { frame_index_buffer_pool_->ClearCache(); } + + void CompletedSubmissionUpdated(); + void BeginSubmission(); + void BeginFrame(); + void EndSubmission(); + void EndFrame(); + + std::pair GetBuiltinIndexBuffer(size_t handle) const { + assert_not_null(builtin_index_buffer_); + return std::make_pair( + builtin_index_buffer_, + VkDeviceSize(GetBuiltinIndexBufferOffsetBytes(handle))); + } + std::pair GetConvertedIndexBuffer( + size_t handle) const { + return frame_index_buffers_[handle]; + } + + protected: + bool InitializeBuiltin16BitIndexBuffer( + uint32_t index_count, + std::function fill_callback) override; + + void* RequestHostConvertedIndexBufferForCurrentFrame( + xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd, + uint32_t coalignment_original_address, + size_t& backend_handle_out) override; + + private: + VulkanCommandProcessor& command_processor_; + + VkDeviceSize builtin_index_buffer_size_ = 0; + VkBuffer builtin_index_buffer_ = VK_NULL_HANDLE; + VkDeviceMemory builtin_index_buffer_memory_ = VK_NULL_HANDLE; + // Temporary buffer copied in the beginning of the first submission for + // uploading to builtin_index_buffer_, destroyed when the submission when it + // was uploaded is completed. + VkBuffer builtin_index_buffer_upload_ = VK_NULL_HANDLE; + VkDeviceMemory builtin_index_buffer_upload_memory_ = VK_NULL_HANDLE; + // UINT64_MAX means not uploaded yet and needs uploading in the first + // submission (if the upload buffer exists at all). + uint64_t builtin_index_buffer_upload_submission_ = UINT64_MAX; + + std::unique_ptr frame_index_buffer_pool_; + // Indexed by the backend handles. + std::deque> frame_index_buffers_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc new file mode 100644 index 000000000..46e261ac5 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -0,0 +1,5962 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "third_party/glslang/SPIRV/SpvBuilder.h" +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/gpu/draw_util.h" +#include "xenia/gpu/registers.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/texture_cache.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Generated with `xb buildshaders`. +namespace shaders { +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_1xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_2xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_4xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/passthrough_position_xy_vs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_1x2xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_1x2xmsaa_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_4xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_4xmsaa_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_64bpp_1x2xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_64bpp_1x2xmsaa_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_64bpp_4xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_64bpp_4xmsaa_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_128bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_128bpp_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_16bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_16bpp_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_32bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_32bpp_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_64bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_64bpp_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_8bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_8bpp_scaled_cs.h" +} // namespace shaders + +const VulkanRenderTargetCache::ResolveCopyShaderCode + VulkanRenderTargetCache::kResolveCopyShaders[size_t( + draw_util::ResolveCopyShaderIndex::kCount)] = { + {shaders::resolve_fast_32bpp_1x2xmsaa_cs, + sizeof(shaders::resolve_fast_32bpp_1x2xmsaa_cs), + shaders::resolve_fast_32bpp_1x2xmsaa_scaled_cs, + sizeof(shaders::resolve_fast_32bpp_1x2xmsaa_scaled_cs)}, + {shaders::resolve_fast_32bpp_4xmsaa_cs, + sizeof(shaders::resolve_fast_32bpp_4xmsaa_cs), + shaders::resolve_fast_32bpp_4xmsaa_scaled_cs, + sizeof(shaders::resolve_fast_32bpp_4xmsaa_scaled_cs)}, + {shaders::resolve_fast_64bpp_1x2xmsaa_cs, + sizeof(shaders::resolve_fast_64bpp_1x2xmsaa_cs), + shaders::resolve_fast_64bpp_1x2xmsaa_scaled_cs, + sizeof(shaders::resolve_fast_64bpp_1x2xmsaa_scaled_cs)}, + {shaders::resolve_fast_64bpp_4xmsaa_cs, + sizeof(shaders::resolve_fast_64bpp_4xmsaa_cs), + shaders::resolve_fast_64bpp_4xmsaa_scaled_cs, + sizeof(shaders::resolve_fast_64bpp_4xmsaa_scaled_cs)}, + {shaders::resolve_full_8bpp_cs, sizeof(shaders::resolve_full_8bpp_cs), + shaders::resolve_full_8bpp_scaled_cs, + sizeof(shaders::resolve_full_8bpp_scaled_cs)}, + {shaders::resolve_full_16bpp_cs, sizeof(shaders::resolve_full_16bpp_cs), + shaders::resolve_full_16bpp_scaled_cs, + sizeof(shaders::resolve_full_16bpp_scaled_cs)}, + {shaders::resolve_full_32bpp_cs, sizeof(shaders::resolve_full_32bpp_cs), + shaders::resolve_full_32bpp_scaled_cs, + sizeof(shaders::resolve_full_32bpp_scaled_cs)}, + {shaders::resolve_full_64bpp_cs, sizeof(shaders::resolve_full_64bpp_cs), + shaders::resolve_full_64bpp_scaled_cs, + sizeof(shaders::resolve_full_64bpp_scaled_cs)}, + {shaders::resolve_full_128bpp_cs, + sizeof(shaders::resolve_full_128bpp_cs), + shaders::resolve_full_128bpp_scaled_cs, + sizeof(shaders::resolve_full_128bpp_scaled_cs)}, +}; + +const VulkanRenderTargetCache::TransferPipelineLayoutInfo + VulkanRenderTargetCache::kTransferPipelineLayoutInfos[size_t( + TransferPipelineLayoutIndex::kCount)] = { + // kColor + {kTransferUsedDescriptorSetColorTextureBit, + kTransferUsedPushConstantDwordAddressBit}, + // kDepth + {kTransferUsedDescriptorSetDepthStencilTexturesBit, + kTransferUsedPushConstantDwordAddressBit}, + // kColorToStencilBit + {kTransferUsedDescriptorSetColorTextureBit, + kTransferUsedPushConstantDwordAddressBit | + kTransferUsedPushConstantDwordStencilMaskBit}, + // kDepthToStencilBit + {kTransferUsedDescriptorSetDepthStencilTexturesBit, + kTransferUsedPushConstantDwordAddressBit | + kTransferUsedPushConstantDwordStencilMaskBit}, + // kColorAndHostDepthTexture + {kTransferUsedDescriptorSetHostDepthStencilTexturesBit | + kTransferUsedDescriptorSetColorTextureBit, + kTransferUsedPushConstantDwordHostDepthAddressBit | + kTransferUsedPushConstantDwordAddressBit}, + // kColorAndHostDepthBuffer + {kTransferUsedDescriptorSetHostDepthBufferBit | + kTransferUsedDescriptorSetColorTextureBit, + kTransferUsedPushConstantDwordHostDepthAddressBit | + kTransferUsedPushConstantDwordAddressBit}, + // kDepthAndHostDepthTexture + {kTransferUsedDescriptorSetHostDepthStencilTexturesBit | + kTransferUsedDescriptorSetDepthStencilTexturesBit, + kTransferUsedPushConstantDwordHostDepthAddressBit | + kTransferUsedPushConstantDwordAddressBit}, + // kDepthAndHostDepthBuffer + {kTransferUsedDescriptorSetHostDepthBufferBit | + kTransferUsedDescriptorSetDepthStencilTexturesBit, + kTransferUsedPushConstantDwordHostDepthAddressBit | + kTransferUsedPushConstantDwordAddressBit}, +}; + +const VulkanRenderTargetCache::TransferModeInfo + VulkanRenderTargetCache::kTransferModes[size_t(TransferMode::kCount)] = { + // kColorToDepth + {TransferOutput::kDepth, TransferPipelineLayoutIndex::kColor}, + // kColorToColor + {TransferOutput::kColor, TransferPipelineLayoutIndex::kColor}, + // kDepthToDepth + {TransferOutput::kDepth, TransferPipelineLayoutIndex::kDepth}, + // kDepthToColor + {TransferOutput::kColor, TransferPipelineLayoutIndex::kDepth}, + // kColorToStencilBit + {TransferOutput::kStencilBit, + TransferPipelineLayoutIndex::kColorToStencilBit}, + // kDepthToStencilBit + {TransferOutput::kStencilBit, + TransferPipelineLayoutIndex::kDepthToStencilBit}, + // kColorAndHostDepthToDepth + {TransferOutput::kDepth, + TransferPipelineLayoutIndex::kColorAndHostDepthTexture}, + // kDepthAndHostDepthToDepth + {TransferOutput::kDepth, + TransferPipelineLayoutIndex::kDepthAndHostDepthTexture}, + // kColorAndHostDepthCopyToDepth + {TransferOutput::kDepth, + TransferPipelineLayoutIndex::kColorAndHostDepthBuffer}, + // kDepthAndHostDepthCopyToDepth + {TransferOutput::kDepth, + TransferPipelineLayoutIndex::kDepthAndHostDepthBuffer}, +}; + +VulkanRenderTargetCache::VulkanRenderTargetCache( + const RegisterFile& register_file, const Memory& memory, + TraceWriter& trace_writer, uint32_t draw_resolution_scale_x, + uint32_t draw_resolution_scale_y, VulkanCommandProcessor& command_processor) + : RenderTargetCache(register_file, memory, &trace_writer, + draw_resolution_scale_x, draw_resolution_scale_y), + command_processor_(command_processor), + trace_writer_(trace_writer) {} + +VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(true); } + +bool VulkanRenderTargetCache::Initialize() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::InstanceFunctions& ifn = provider.ifn(); + VkPhysicalDevice physical_device = provider.physical_device(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Format support. + constexpr VkFormatFeatureFlags kUsedDepthFormatFeatures = + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + VkFormatProperties depth_unorm24_properties; + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_D24_UNORM_S8_UINT, &depth_unorm24_properties); + depth_unorm24_vulkan_format_supported_ = + (depth_unorm24_properties.optimalTilingFeatures & + kUsedDepthFormatFeatures) == kUsedDepthFormatFeatures; + + // Descriptor set layouts. + VkDescriptorSetLayoutBinding descriptor_set_layout_bindings[2]; + descriptor_set_layout_bindings[0].binding = 0; + descriptor_set_layout_bindings[0].descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_set_layout_bindings[0].descriptorCount = 1; + descriptor_set_layout_bindings[0].stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT; + descriptor_set_layout_bindings[0].pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = descriptor_set_layout_bindings; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_storage_buffer_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the descriptor set layout " + "with one storage buffer"); + Shutdown(); + return false; + } + descriptor_set_layout_bindings[0].descriptorType = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_sampled_image_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the descriptor set layout " + "with one sampled image"); + Shutdown(); + return false; + } + descriptor_set_layout_bindings[1].binding = 1; + descriptor_set_layout_bindings[1].descriptorType = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_layout_bindings[1].descriptorCount = 1; + descriptor_set_layout_bindings[1].stageFlags = + descriptor_set_layout_bindings[0].stageFlags; + descriptor_set_layout_bindings[1].pImmutableSamplers = nullptr; + descriptor_set_layout_create_info.bindingCount = 2; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_sampled_image_x2_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the descriptor set layout " + "with two sampled images"); + Shutdown(); + return false; + } + + // Descriptor set pools. + // The pool sizes were chosen without a specific reason. + VkDescriptorPoolSize descriptor_set_layout_size; + descriptor_set_layout_size.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_layout_size.descriptorCount = 1; + descriptor_set_pool_sampled_image_ = + std::make_unique( + provider, 256, 1, &descriptor_set_layout_size, + descriptor_set_layout_sampled_image_); + descriptor_set_layout_size.descriptorCount = 2; + descriptor_set_pool_sampled_image_x2_ = + std::make_unique( + provider, 256, 1, &descriptor_set_layout_size, + descriptor_set_layout_sampled_image_x2_); + + // EDRAM contents reinterpretation buffer. + // 90 MB with 9x resolution scaling - within the minimum + // maxStorageBufferRange. + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, + VkDeviceSize(xenos::kEdramSizeBytes * + (draw_resolution_scale_x() * draw_resolution_scale_y())), + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, edram_buffer_, + edram_buffer_memory_)) { + XELOGE("VulkanRenderTargetCache: Failed to create the EDRAM buffer"); + Shutdown(); + return false; + } + if (GetPath() == Path::kPixelShaderInterlock) { + // The first operation will likely be drawing. + edram_buffer_usage_ = EdramBufferUsage::kFragmentReadWrite; + } else { + // The first operation will likely be depth self-comparison. + edram_buffer_usage_ = EdramBufferUsage::kFragmentRead; + } + edram_buffer_modification_status_ = + EdramBufferModificationStatus::kUnmodified; + VkDescriptorPoolSize edram_storage_buffer_descriptor_pool_size; + edram_storage_buffer_descriptor_pool_size.type = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + edram_storage_buffer_descriptor_pool_size.descriptorCount = 1; + VkDescriptorPoolCreateInfo edram_storage_buffer_descriptor_pool_create_info; + edram_storage_buffer_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + edram_storage_buffer_descriptor_pool_create_info.pNext = nullptr; + edram_storage_buffer_descriptor_pool_create_info.flags = 0; + edram_storage_buffer_descriptor_pool_create_info.maxSets = 1; + edram_storage_buffer_descriptor_pool_create_info.poolSizeCount = 1; + edram_storage_buffer_descriptor_pool_create_info.pPoolSizes = + &edram_storage_buffer_descriptor_pool_size; + if (dfn.vkCreateDescriptorPool( + device, &edram_storage_buffer_descriptor_pool_create_info, nullptr, + &edram_storage_buffer_descriptor_pool_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the EDRAM buffer storage " + "buffer descriptor pool"); + Shutdown(); + return false; + } + VkDescriptorSetAllocateInfo edram_storage_buffer_descriptor_set_allocate_info; + edram_storage_buffer_descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + edram_storage_buffer_descriptor_set_allocate_info.pNext = nullptr; + edram_storage_buffer_descriptor_set_allocate_info.descriptorPool = + edram_storage_buffer_descriptor_pool_; + edram_storage_buffer_descriptor_set_allocate_info.descriptorSetCount = 1; + edram_storage_buffer_descriptor_set_allocate_info.pSetLayouts = + &descriptor_set_layout_storage_buffer_; + if (dfn.vkAllocateDescriptorSets( + device, &edram_storage_buffer_descriptor_set_allocate_info, + &edram_storage_buffer_descriptor_set_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to allocate the EDRAM buffer storage " + "buffer descriptor set"); + Shutdown(); + return false; + } + VkDescriptorBufferInfo edram_storage_buffer_descriptor_buffer_info; + edram_storage_buffer_descriptor_buffer_info.buffer = edram_buffer_; + edram_storage_buffer_descriptor_buffer_info.offset = 0; + edram_storage_buffer_descriptor_buffer_info.range = VK_WHOLE_SIZE; + VkWriteDescriptorSet edram_storage_buffer_descriptor_write; + edram_storage_buffer_descriptor_write.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + edram_storage_buffer_descriptor_write.pNext = nullptr; + edram_storage_buffer_descriptor_write.dstSet = + edram_storage_buffer_descriptor_set_; + edram_storage_buffer_descriptor_write.dstBinding = 0; + edram_storage_buffer_descriptor_write.dstArrayElement = 0; + edram_storage_buffer_descriptor_write.descriptorCount = 1; + edram_storage_buffer_descriptor_write.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + edram_storage_buffer_descriptor_write.pImageInfo = nullptr; + edram_storage_buffer_descriptor_write.pBufferInfo = + &edram_storage_buffer_descriptor_buffer_info; + edram_storage_buffer_descriptor_write.pTexelBufferView = nullptr; + dfn.vkUpdateDescriptorSets(device, 1, &edram_storage_buffer_descriptor_write, + 0, nullptr); + + bool draw_resolution_scaled = IsDrawResolutionScaled(); + + // Resolve copy pipeline layout. + VkDescriptorSetLayout + resolve_copy_descriptor_set_layouts[kResolveCopyDescriptorSetCount] = {}; + resolve_copy_descriptor_set_layouts[kResolveCopyDescriptorSetEdram] = + descriptor_set_layout_storage_buffer_; + resolve_copy_descriptor_set_layouts[kResolveCopyDescriptorSetDest] = + command_processor_.GetSingleTransientDescriptorLayout( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + VkPushConstantRange resolve_copy_push_constant_range; + resolve_copy_push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + resolve_copy_push_constant_range.offset = 0; + // Potentially binding all of the shared memory at 1x resolution, but only + // portions with scaled resolution. + resolve_copy_push_constant_range.size = + draw_resolution_scaled + ? sizeof(draw_util::ResolveCopyShaderConstants::DestRelative) + : sizeof(draw_util::ResolveCopyShaderConstants); + VkPipelineLayoutCreateInfo resolve_copy_pipeline_layout_create_info; + resolve_copy_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + resolve_copy_pipeline_layout_create_info.pNext = nullptr; + resolve_copy_pipeline_layout_create_info.flags = 0; + resolve_copy_pipeline_layout_create_info.setLayoutCount = + kResolveCopyDescriptorSetCount; + resolve_copy_pipeline_layout_create_info.pSetLayouts = + resolve_copy_descriptor_set_layouts; + resolve_copy_pipeline_layout_create_info.pushConstantRangeCount = 1; + resolve_copy_pipeline_layout_create_info.pPushConstantRanges = + &resolve_copy_push_constant_range; + if (dfn.vkCreatePipelineLayout( + device, &resolve_copy_pipeline_layout_create_info, nullptr, + &resolve_copy_pipeline_layout_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the resolve copy pipeline " + "layout"); + Shutdown(); + return false; + } + + // Resolve copy pipelines. + for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount); + ++i) { + const draw_util::ResolveCopyShaderInfo& resolve_copy_shader_info = + draw_util::resolve_copy_shader_info[i]; + const ResolveCopyShaderCode& resolve_copy_shader_code = + kResolveCopyShaders[i]; + // Somewhat verification whether resolve_copy_shaders_ is up to date. + assert_true(resolve_copy_shader_code.unscaled && + resolve_copy_shader_code.unscaled_size_bytes && + resolve_copy_shader_code.scaled && + resolve_copy_shader_code.scaled_size_bytes); + VkPipeline resolve_copy_pipeline = ui::vulkan::util::CreateComputePipeline( + provider, resolve_copy_pipeline_layout_, + draw_resolution_scaled ? resolve_copy_shader_code.scaled + : resolve_copy_shader_code.unscaled, + draw_resolution_scaled ? resolve_copy_shader_code.scaled_size_bytes + : resolve_copy_shader_code.unscaled_size_bytes); + if (resolve_copy_pipeline == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the resolve copy " + "pipeline {}", + resolve_copy_shader_info.debug_name); + Shutdown(); + return false; + } + provider.SetDeviceObjectName(VK_OBJECT_TYPE_PIPELINE, resolve_copy_pipeline, + resolve_copy_shader_info.debug_name); + resolve_copy_pipelines_[i] = resolve_copy_pipeline; + } + + // TODO(Triang3l): All paths (FSI). + + depth_float24_round_ = cvars::depth_float24_round; + + // TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in + // transfers. + if (cvars::native_2x_msaa) { + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + // Multisampled integer sampled images are optional in Vulkan and in Xenia. + msaa_2x_attachments_supported_ = + (device_limits.framebufferColorSampleCounts & + device_limits.framebufferDepthSampleCounts & + device_limits.framebufferStencilSampleCounts & + device_limits.sampledImageColorSampleCounts & + device_limits.sampledImageDepthSampleCounts & + device_limits.sampledImageStencilSampleCounts & + VK_SAMPLE_COUNT_2_BIT) && + (device_limits.sampledImageIntegerSampleCounts & + (VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT)) != + VK_SAMPLE_COUNT_4_BIT; + msaa_2x_no_attachments_supported_ = + (device_limits.framebufferNoAttachmentsSampleCounts & + VK_SAMPLE_COUNT_2_BIT) != 0; + } else { + msaa_2x_attachments_supported_ = false; + msaa_2x_no_attachments_supported_ = false; + } + + // Host depth storing pipeline layout. + VkDescriptorSetLayout host_depth_store_descriptor_set_layouts[] = { + // Destination EDRAM storage buffer. + descriptor_set_layout_storage_buffer_, + // Source depth / stencil texture (only depth is used). + descriptor_set_layout_sampled_image_x2_, + }; + VkPushConstantRange host_depth_store_push_constant_range; + host_depth_store_push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + host_depth_store_push_constant_range.offset = 0; + host_depth_store_push_constant_range.size = sizeof(HostDepthStoreConstants); + VkPipelineLayoutCreateInfo host_depth_store_pipeline_layout_create_info; + host_depth_store_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + host_depth_store_pipeline_layout_create_info.pNext = nullptr; + host_depth_store_pipeline_layout_create_info.flags = 0; + host_depth_store_pipeline_layout_create_info.setLayoutCount = + uint32_t(xe::countof(host_depth_store_descriptor_set_layouts)); + host_depth_store_pipeline_layout_create_info.pSetLayouts = + host_depth_store_descriptor_set_layouts; + host_depth_store_pipeline_layout_create_info.pushConstantRangeCount = 1; + host_depth_store_pipeline_layout_create_info.pPushConstantRanges = + &host_depth_store_push_constant_range; + if (dfn.vkCreatePipelineLayout( + device, &host_depth_store_pipeline_layout_create_info, nullptr, + &host_depth_store_pipeline_layout_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the host depth storing " + "pipeline layout"); + Shutdown(); + return false; + } + const std::pair host_depth_store_shaders[] = { + {shaders::host_depth_store_1xmsaa_cs, + sizeof(shaders::host_depth_store_1xmsaa_cs)}, + {shaders::host_depth_store_2xmsaa_cs, + sizeof(shaders::host_depth_store_2xmsaa_cs)}, + {shaders::host_depth_store_4xmsaa_cs, + sizeof(shaders::host_depth_store_4xmsaa_cs)}, + }; + for (size_t i = 0; i < xe::countof(host_depth_store_shaders); ++i) { + const std::pair host_depth_store_shader = + host_depth_store_shaders[i]; + VkPipeline host_depth_store_pipeline = + ui::vulkan::util::CreateComputePipeline( + provider, host_depth_store_pipeline_layout_, + host_depth_store_shader.first, host_depth_store_shader.second); + if (host_depth_store_pipeline == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the {}-sample host depth " + "storing pipeline", + uint32_t(1) << i); + Shutdown(); + return false; + } + host_depth_store_pipelines_[i] = host_depth_store_pipeline; + } + + // Transfer and clear vertex buffer, for quads of up to tile granularity. + transfer_vertex_buffer_pool_ = + std::make_unique( + provider, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + std::max(ui::vulkan::VulkanUploadBufferPool::kDefaultPageSize, + sizeof(float) * 2 * 6 * + Transfer::kMaxCutoutBorderRectangles * + xenos::kEdramTileCount)); + + // Transfer vertex shader. + transfer_passthrough_vertex_shader_ = ui::vulkan::util::CreateShaderModule( + provider, shaders::passthrough_position_xy_vs, + sizeof(shaders::passthrough_position_xy_vs)); + if (transfer_passthrough_vertex_shader_ == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target ownership " + "transfer vertex shader"); + Shutdown(); + return false; + } + + // Transfer pipeline layouts. + VkDescriptorSetLayout transfer_pipeline_layout_descriptor_set_layouts + [kTransferUsedDescriptorSetCount]; + VkPushConstantRange transfer_pipeline_layout_push_constant_range; + transfer_pipeline_layout_push_constant_range.stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT; + transfer_pipeline_layout_push_constant_range.offset = 0; + VkPipelineLayoutCreateInfo transfer_pipeline_layout_create_info; + transfer_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + transfer_pipeline_layout_create_info.pNext = nullptr; + transfer_pipeline_layout_create_info.flags = 0; + transfer_pipeline_layout_create_info.pSetLayouts = + transfer_pipeline_layout_descriptor_set_layouts; + transfer_pipeline_layout_create_info.pPushConstantRanges = + &transfer_pipeline_layout_push_constant_range; + for (size_t i = 0; i < size_t(TransferPipelineLayoutIndex::kCount); ++i) { + const TransferPipelineLayoutInfo& transfer_pipeline_layout_info = + kTransferPipelineLayoutInfos[i]; + transfer_pipeline_layout_create_info.setLayoutCount = 0; + uint32_t transfer_pipeline_layout_descriptor_sets_remaining = + transfer_pipeline_layout_info.used_descriptor_sets; + uint32_t transfer_pipeline_layout_descriptor_set_index; + while ( + xe::bit_scan_forward(transfer_pipeline_layout_descriptor_sets_remaining, + &transfer_pipeline_layout_descriptor_set_index)) { + transfer_pipeline_layout_descriptor_sets_remaining &= + ~(uint32_t(1) << transfer_pipeline_layout_descriptor_set_index); + VkDescriptorSetLayout transfer_pipeline_layout_descriptor_set_layout = + VK_NULL_HANDLE; + switch (TransferUsedDescriptorSet( + transfer_pipeline_layout_descriptor_set_index)) { + case kTransferUsedDescriptorSetHostDepthBuffer: + transfer_pipeline_layout_descriptor_set_layout = + descriptor_set_layout_storage_buffer_; + break; + case kTransferUsedDescriptorSetHostDepthStencilTextures: + case kTransferUsedDescriptorSetDepthStencilTextures: + transfer_pipeline_layout_descriptor_set_layout = + descriptor_set_layout_sampled_image_x2_; + break; + case kTransferUsedDescriptorSetColorTexture: + transfer_pipeline_layout_descriptor_set_layout = + descriptor_set_layout_sampled_image_; + break; + default: + assert_unhandled_case(TransferUsedDescriptorSet( + transfer_pipeline_layout_descriptor_set_index)); + } + transfer_pipeline_layout_descriptor_set_layouts + [transfer_pipeline_layout_create_info.setLayoutCount++] = + transfer_pipeline_layout_descriptor_set_layout; + } + transfer_pipeline_layout_push_constant_range.size = uint32_t( + sizeof(uint32_t) * + xe::bit_count(transfer_pipeline_layout_info.used_push_constant_dwords)); + transfer_pipeline_layout_create_info.pushConstantRangeCount = + transfer_pipeline_layout_info.used_push_constant_dwords ? 1 : 0; + if (dfn.vkCreatePipelineLayout( + device, &transfer_pipeline_layout_create_info, nullptr, + &transfer_pipeline_layouts_[i]) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target " + "ownership transfer pipeline layout {}", + i); + Shutdown(); + return false; + } + } + + // Dump pipeline layouts. + VkDescriptorSetLayout + dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetCount]; + dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetEdram] = + descriptor_set_layout_storage_buffer_; + dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetSource] = + descriptor_set_layout_sampled_image_; + VkPushConstantRange dump_pipeline_layout_push_constant_range; + dump_pipeline_layout_push_constant_range.stageFlags = + VK_SHADER_STAGE_COMPUTE_BIT; + dump_pipeline_layout_push_constant_range.offset = 0; + dump_pipeline_layout_push_constant_range.size = + sizeof(uint32_t) * kDumpPushConstantCount; + VkPipelineLayoutCreateInfo dump_pipeline_layout_create_info; + dump_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + dump_pipeline_layout_create_info.pNext = nullptr; + dump_pipeline_layout_create_info.flags = 0; + dump_pipeline_layout_create_info.setLayoutCount = + uint32_t(xe::countof(dump_pipeline_layout_descriptor_set_layouts)); + dump_pipeline_layout_create_info.pSetLayouts = + dump_pipeline_layout_descriptor_set_layouts; + dump_pipeline_layout_create_info.pushConstantRangeCount = 1; + dump_pipeline_layout_create_info.pPushConstantRanges = + &dump_pipeline_layout_push_constant_range; + if (dfn.vkCreatePipelineLayout(device, &dump_pipeline_layout_create_info, + nullptr, + &dump_pipeline_layout_color_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the color render target " + "dumping pipeline layout"); + Shutdown(); + return false; + } + dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetSource] = + descriptor_set_layout_sampled_image_x2_; + if (dfn.vkCreatePipelineLayout(device, &dump_pipeline_layout_create_info, + nullptr, + &dump_pipeline_layout_depth_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the depth render target " + "dumping pipeline layout"); + Shutdown(); + return false; + } + + InitializeCommon(); + return true; +} + +void VulkanRenderTargetCache::Shutdown(bool from_destructor) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + for (const auto& dump_pipeline_pair : dump_pipelines_) { + // May be null to prevent recreation attempts. + if (dump_pipeline_pair.second != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, dump_pipeline_pair.second, nullptr); + } + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + dump_pipeline_layout_depth_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + dump_pipeline_layout_color_); + + for (const auto& transfer_pipeline_array_pair : transfer_pipelines_) { + for (VkPipeline transfer_pipeline : transfer_pipeline_array_pair.second) { + // May be null to prevent recreation attempts. + if (transfer_pipeline != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, transfer_pipeline, nullptr); + } + } + } + transfer_pipelines_.clear(); + for (const auto& transfer_shader_pair : transfer_shaders_) { + if (transfer_shader_pair.second != VK_NULL_HANDLE) { + dfn.vkDestroyShaderModule(device, transfer_shader_pair.second, nullptr); + } + } + transfer_shaders_.clear(); + for (size_t i = 0; i < size_t(TransferPipelineLayoutIndex::kCount); ++i) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + transfer_pipeline_layouts_[i]); + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, + transfer_passthrough_vertex_shader_); + transfer_vertex_buffer_pool_.reset(); + + for (size_t i = 0; i < xe::countof(host_depth_store_pipelines_); ++i) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, + host_depth_store_pipelines_[i]); + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + host_depth_store_pipeline_layout_); + + last_update_framebuffer_ = VK_NULL_HANDLE; + for (const auto& framebuffer_pair : framebuffers_) { + dfn.vkDestroyFramebuffer(device, framebuffer_pair.second.framebuffer, + nullptr); + } + framebuffers_.clear(); + + last_update_render_pass_ = VK_NULL_HANDLE; + for (const auto& render_pass_pair : render_passes_) { + if (render_pass_pair.second != VK_NULL_HANDLE) { + dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); + } + } + render_passes_.clear(); + + for (VkPipeline& resolve_copy_pipeline : resolve_copy_pipelines_) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, + resolve_copy_pipeline); + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + resolve_copy_pipeline_layout_); + + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, + edram_storage_buffer_descriptor_pool_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + edram_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + edram_buffer_memory_); + + descriptor_set_pool_sampled_image_x2_.reset(); + descriptor_set_pool_sampled_image_.reset(); + + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_sampled_image_x2_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, + device, + descriptor_set_layout_sampled_image_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, + device, + descriptor_set_layout_storage_buffer_); + + if (!from_destructor) { + ShutdownCommon(); + } +} + +void VulkanRenderTargetCache::ClearCache() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Framebuffer objects must be destroyed because they reference views of + // attachment images, which may be removed by the common ClearCache. + last_update_framebuffer_ = VK_NULL_HANDLE; + for (const auto& framebuffer_pair : framebuffers_) { + dfn.vkDestroyFramebuffer(device, framebuffer_pair.second.framebuffer, + nullptr); + } + framebuffers_.clear(); + + last_update_render_pass_ = VK_NULL_HANDLE; + for (const auto& render_pass_pair : render_passes_) { + dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); + } + render_passes_.clear(); + + RenderTargetCache::ClearCache(); +} + +void VulkanRenderTargetCache::CompletedSubmissionUpdated() { + if (transfer_vertex_buffer_pool_) { + transfer_vertex_buffer_pool_->Reclaim( + command_processor_.GetCompletedSubmission()); + } +} + +void VulkanRenderTargetCache::EndSubmission() { + if (transfer_vertex_buffer_pool_) { + transfer_vertex_buffer_pool_->FlushWrites(); + } +} + +bool VulkanRenderTargetCache::Resolve(const Memory& memory, + VulkanSharedMemory& shared_memory, + VulkanTextureCache& texture_cache, + uint32_t& written_address_out, + uint32_t& written_length_out) { + written_address_out = 0; + written_length_out = 0; + + bool draw_resolution_scaled = IsDrawResolutionScaled(); + + draw_util::ResolveInfo resolve_info; + if (!draw_util::GetResolveInfo( + register_file(), memory, trace_writer_, draw_resolution_scale_x(), + draw_resolution_scale_y(), IsFixedRG16TruncatedToMinus1To1(), + IsFixedRGBA16TruncatedToMinus1To1(), resolve_info)) { + return false; + } + + // Nothing to copy/clear. + if (!resolve_info.coordinate_info.width_div_8 || + !resolve_info.coordinate_info.height_div_8) { + return true; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + // Copying. + bool copied = false; + if (resolve_info.copy_dest_extent_length) { + if (GetPath() == Path::kHostRenderTargets) { + // Dump the current contents of the render targets owning the affected + // range to edram_buffer_. + // TODO(Triang3l): Direct host render target -> shared memory resolve + // shaders for non-converting cases. + uint32_t dump_base; + uint32_t dump_row_length_used; + uint32_t dump_rows; + uint32_t dump_pitch; + resolve_info.GetCopyEdramTileSpan(dump_base, dump_row_length_used, + dump_rows, dump_pitch); + DumpRenderTargets(dump_base, dump_row_length_used, dump_rows, dump_pitch); + } + + draw_util::ResolveCopyShaderConstants copy_shader_constants; + uint32_t copy_group_count_x, copy_group_count_y; + draw_util::ResolveCopyShaderIndex copy_shader = resolve_info.GetCopyShader( + draw_resolution_scale_x(), draw_resolution_scale_y(), + copy_shader_constants, copy_group_count_x, copy_group_count_y); + assert_true(copy_group_count_x && copy_group_count_y); + if (copy_shader != draw_util::ResolveCopyShaderIndex::kUnknown) { + const draw_util::ResolveCopyShaderInfo& copy_shader_info = + draw_util::resolve_copy_shader_info[size_t(copy_shader)]; + + // Make sure there is memory to write to. + bool copy_dest_committed; + // TODO(Triang3l): Resolution-scaled buffer committing. + copy_dest_committed = + shared_memory.RequestRange(resolve_info.copy_dest_extent_start, + resolve_info.copy_dest_extent_length); + if (!copy_dest_committed) { + XELOGE( + "VulkanRenderTargetCache: Failed to obtain the resolve destination " + "memory region"); + } else { + // TODO(Triang3l): Switching between descriptors if exceeding + // maxStorageBufferRange. + // TODO(Triang3l): Use a single 512 MB shared memory binding if + // possible. + VkDescriptorSet descriptor_set_dest = + command_processor_.AllocateSingleTransientDescriptor( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + if (descriptor_set_dest != VK_NULL_HANDLE) { + // Write the destination descriptor. + // TODO(Triang3l): Scaled resolve buffer binding. + VkDescriptorBufferInfo write_descriptor_set_dest_buffer_info; + write_descriptor_set_dest_buffer_info.buffer = shared_memory.buffer(); + write_descriptor_set_dest_buffer_info.offset = + resolve_info.copy_dest_base; + write_descriptor_set_dest_buffer_info.range = + resolve_info.copy_dest_extent_start - + resolve_info.copy_dest_base + + resolve_info.copy_dest_extent_length; + VkWriteDescriptorSet write_descriptor_set_dest; + write_descriptor_set_dest.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_dest.pNext = nullptr; + write_descriptor_set_dest.dstSet = descriptor_set_dest; + write_descriptor_set_dest.dstBinding = 0; + write_descriptor_set_dest.dstArrayElement = 0; + write_descriptor_set_dest.descriptorCount = 1; + write_descriptor_set_dest.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_set_dest.pImageInfo = nullptr; + write_descriptor_set_dest.pBufferInfo = + &write_descriptor_set_dest_buffer_info; + write_descriptor_set_dest.pTexelBufferView = nullptr; + dfn.vkUpdateDescriptorSets(device, 1, &write_descriptor_set_dest, 0, + nullptr); + + // Submit the resolve. + // TODO(Triang3l): Transition the scaled resolve buffer. + shared_memory.Use(VulkanSharedMemory::Usage::kComputeWrite, + std::pair( + resolve_info.copy_dest_extent_start, + resolve_info.copy_dest_extent_length)); + UseEdramBuffer(EdramBufferUsage::kComputeRead); + command_processor_.BindExternalComputePipeline( + resolve_copy_pipelines_[size_t(copy_shader)]); + VkDescriptorSet descriptor_sets[kResolveCopyDescriptorSetCount] = {}; + descriptor_sets[kResolveCopyDescriptorSetEdram] = + edram_storage_buffer_descriptor_set_; + descriptor_sets[kResolveCopyDescriptorSetDest] = descriptor_set_dest; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, resolve_copy_pipeline_layout_, 0, + uint32_t(xe::countof(descriptor_sets)), descriptor_sets, 0, + nullptr); + if (draw_resolution_scaled) { + command_buffer.CmdVkPushConstants( + resolve_copy_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT, 0, + sizeof(copy_shader_constants.dest_relative), + ©_shader_constants.dest_relative); + } else { + // TODO(Triang3l): Proper dest_base in case of one 512 MB shared + // memory binding, or multiple shared memory bindings in case of + // splitting due to maxStorageBufferRange overflow. + copy_shader_constants.dest_base -= + uint32_t(write_descriptor_set_dest_buffer_info.offset); + command_buffer.CmdVkPushConstants( + resolve_copy_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT, 0, + sizeof(copy_shader_constants), ©_shader_constants); + } + command_processor_.SubmitBarriers(true); + command_buffer.CmdVkDispatch(copy_group_count_x, copy_group_count_y, + 1); + + // Invalidate textures and mark the range as scaled if needed. + texture_cache.MarkRangeAsResolved( + resolve_info.copy_dest_extent_start, + resolve_info.copy_dest_extent_length); + written_address_out = resolve_info.copy_dest_extent_start; + written_length_out = resolve_info.copy_dest_extent_length; + copied = true; + } + } + } + } else { + copied = true; + } + + // Clearing. + bool cleared = false; + bool clear_depth = resolve_info.IsClearingDepth(); + bool clear_color = resolve_info.IsClearingColor(); + if (clear_depth || clear_color) { + // TODO(Triang3l): Fragment shader interlock path EDRAM buffer clearing. + if (GetPath() == Path::kHostRenderTargets) { + Transfer::Rectangle clear_rectangle; + RenderTarget* clear_render_targets[2]; + // If PrepareHostRenderTargetsResolveClear returns false, may be just an + // empty region (success) or an error - don't care. + if (PrepareHostRenderTargetsResolveClear( + resolve_info, clear_rectangle, clear_render_targets[0], + clear_transfers_[0], clear_render_targets[1], + clear_transfers_[1])) { + uint64_t clear_values[2]; + clear_values[0] = resolve_info.rb_depth_clear; + clear_values[1] = resolve_info.rb_color_clear | + (uint64_t(resolve_info.rb_color_clear_lo) << 32); + PerformTransfersAndResolveClears(2, clear_render_targets, + clear_transfers_, clear_values, + &clear_rectangle); + } + cleared = true; + } + } else { + cleared = true; + } + + return copied && cleared; +} + +bool VulkanRenderTargetCache::Update( + bool is_rasterization_done, reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t normalized_color_mask, const Shader& vertex_shader) { + if (!RenderTargetCache::Update(is_rasterization_done, + normalized_depth_control, + normalized_color_mask, vertex_shader)) { + return false; + } + + // TODO(Triang3l): All paths (FSI). + + RenderTarget* const* depth_and_color_render_targets = + last_update_accumulated_render_targets(); + + PerformTransfersAndResolveClears(1 + xenos::kMaxColorRenderTargets, + depth_and_color_render_targets, + last_update_transfers()); + + auto rb_surface_info = register_file().Get(); + uint32_t render_targets_are_srgb = + gamma_render_target_as_srgb_ + ? last_update_accumulated_color_targets_are_gamma() + : 0; + + RenderPassKey render_pass_key; + render_pass_key.msaa_samples = rb_surface_info.msaa_samples; + if (depth_and_color_render_targets[0]) { + render_pass_key.depth_and_color_used |= 1 << 0; + render_pass_key.depth_format = + depth_and_color_render_targets[0]->key().GetDepthFormat(); + } + if (depth_and_color_render_targets[1]) { + render_pass_key.depth_and_color_used |= 1 << 1; + render_pass_key.color_0_view_format = + (render_targets_are_srgb & (1 << 0)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[1]->key().GetColorFormat(); + } + if (depth_and_color_render_targets[2]) { + render_pass_key.depth_and_color_used |= 1 << 2; + render_pass_key.color_1_view_format = + (render_targets_are_srgb & (1 << 1)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[2]->key().GetColorFormat(); + } + if (depth_and_color_render_targets[3]) { + render_pass_key.depth_and_color_used |= 1 << 3; + render_pass_key.color_2_view_format = + (render_targets_are_srgb & (1 << 2)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[3]->key().GetColorFormat(); + } + if (depth_and_color_render_targets[4]) { + render_pass_key.depth_and_color_used |= 1 << 4; + render_pass_key.color_3_view_format = + (render_targets_are_srgb & (1 << 3)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[4]->key().GetColorFormat(); + } + + const Framebuffer* framebuffer = last_update_framebuffer_; + VkRenderPass render_pass = last_update_render_pass_key_ == render_pass_key + ? last_update_render_pass_ + : VK_NULL_HANDLE; + if (render_pass == VK_NULL_HANDLE) { + render_pass = GetRenderPass(render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return false; + } + // Framebuffer for a different render pass needed now. + framebuffer = nullptr; + } + + uint32_t pitch_tiles_at_32bpp = + ((rb_surface_info.surface_pitch + << uint32_t(rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X)) + + (xenos::kEdramTileWidthSamples - 1)) / + xenos::kEdramTileWidthSamples; + if (framebuffer) { + if (last_update_framebuffer_pitch_tiles_at_32bpp_ != pitch_tiles_at_32bpp || + std::memcmp(last_update_framebuffer_attachments_, + depth_and_color_render_targets, + sizeof(last_update_framebuffer_attachments_))) { + framebuffer = nullptr; + } + } + if (!framebuffer) { + framebuffer = GetFramebuffer(render_pass_key, pitch_tiles_at_32bpp, + depth_and_color_render_targets); + if (!framebuffer) { + return false; + } + } + + // Successful update - write the new configuration. + last_update_render_pass_key_ = render_pass_key; + last_update_render_pass_ = render_pass; + last_update_framebuffer_pitch_tiles_at_32bpp_ = pitch_tiles_at_32bpp; + std::memcpy(last_update_framebuffer_attachments_, + depth_and_color_render_targets, + sizeof(last_update_framebuffer_attachments_)); + last_update_framebuffer_ = framebuffer; + + // Transition the used render targets. + for (uint32_t i = 0; i < 1 + xenos::kMaxColorRenderTargets; ++i) { + RenderTarget* rt = depth_and_color_render_targets[i]; + if (!rt) { + continue; + } + auto& vulkan_rt = *static_cast(rt); + VkPipelineStageFlags rt_dst_stage_mask; + VkAccessFlags rt_dst_access_mask; + VkImageLayout rt_new_layout; + VulkanRenderTarget::GetDrawUsage(i == 0, &rt_dst_stage_mask, + &rt_dst_access_mask, &rt_new_layout); + command_processor_.PushImageMemoryBarrier( + vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + i ? VK_IMAGE_ASPECT_COLOR_BIT + : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)), + vulkan_rt.current_stage_mask(), rt_dst_stage_mask, + vulkan_rt.current_access_mask(), rt_dst_access_mask, + vulkan_rt.current_layout(), rt_new_layout); + vulkan_rt.SetUsage(rt_dst_stage_mask, rt_dst_access_mask, rt_new_layout); + } + + return true; +} + +VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { + auto it = render_passes_.find(key.key); + if (it != render_passes_.end()) { + return it->second; + } + + VkSampleCountFlagBits samples; + switch (key.msaa_samples) { + case xenos::MsaaSamples::k1X: + samples = VK_SAMPLE_COUNT_1_BIT; + break; + case xenos::MsaaSamples::k2X: + samples = IsMsaa2xSupported(key.depth_and_color_used != 0) + ? VK_SAMPLE_COUNT_2_BIT + : VK_SAMPLE_COUNT_4_BIT; + break; + case xenos::MsaaSamples::k4X: + samples = VK_SAMPLE_COUNT_4_BIT; + break; + default: + return VK_NULL_HANDLE; + } + + VkAttachmentDescription attachments[1 + xenos::kMaxColorRenderTargets]; + if (key.depth_and_color_used & 0b1) { + VkAttachmentDescription& attachment = attachments[0]; + attachment.flags = 0; + attachment.format = GetDepthVulkanFormat(key.depth_format); + attachment.samples = samples; + attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.initialLayout = VulkanRenderTarget::kDepthDrawLayout; + attachment.finalLayout = VulkanRenderTarget::kDepthDrawLayout; + } + VkAttachmentReference color_attachments[xenos::kMaxColorRenderTargets]; + xenos::ColorRenderTargetFormat color_formats[] = { + key.color_0_view_format, + key.color_1_view_format, + key.color_2_view_format, + key.color_3_view_format, + }; + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + VkAttachmentReference& color_attachment = color_attachments[i]; + color_attachment.layout = VulkanRenderTarget::kColorDrawLayout; + uint32_t attachment_bit = uint32_t(1) << (1 + i); + if (!(key.depth_and_color_used & attachment_bit)) { + color_attachment.attachment = VK_ATTACHMENT_UNUSED; + continue; + } + uint32_t attachment_index = + xe::bit_count(key.depth_and_color_used & (attachment_bit - 1)); + color_attachment.attachment = attachment_index; + VkAttachmentDescription& attachment = attachments[attachment_index]; + attachment.flags = 0; + xenos::ColorRenderTargetFormat color_format = color_formats[i]; + attachment.format = + key.color_rts_use_transfer_formats + ? GetColorOwnershipTransferVulkanFormat(color_format) + : GetColorVulkanFormat(color_format); + attachment.samples = samples; + attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment.initialLayout = VulkanRenderTarget::kColorDrawLayout; + attachment.finalLayout = VulkanRenderTarget::kColorDrawLayout; + } + + VkAttachmentReference depth_stencil_attachment; + depth_stencil_attachment.attachment = + (key.depth_and_color_used & 0b1) ? 0 : VK_ATTACHMENT_UNUSED; + depth_stencil_attachment.layout = VulkanRenderTarget::kDepthDrawLayout; + + VkSubpassDescription subpass; + subpass.flags = 0; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = nullptr; + subpass.colorAttachmentCount = + 32 - xe::lzcnt(uint32_t(key.depth_and_color_used >> 1)); + subpass.pColorAttachments = color_attachments; + subpass.pResolveAttachments = nullptr; + subpass.pDepthStencilAttachment = + (key.depth_and_color_used & 0b1) ? &depth_stencil_attachment : nullptr; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = nullptr; + + VkPipelineStageFlags dependency_stage_mask = 0; + VkAccessFlags dependency_access_mask = 0; + if (key.depth_and_color_used & 0b1) { + dependency_stage_mask |= VulkanRenderTarget::kDepthDrawStageMask; + dependency_access_mask |= VulkanRenderTarget::kDepthDrawAccessMask; + } + if (key.depth_and_color_used >> 1) { + dependency_stage_mask |= VulkanRenderTarget::kColorDrawStageMask; + dependency_access_mask |= VulkanRenderTarget::kColorDrawAccessMask; + } + VkSubpassDependency subpass_dependencies[2]; + subpass_dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL; + subpass_dependencies[0].dstSubpass = 0; + subpass_dependencies[0].srcStageMask = dependency_stage_mask; + subpass_dependencies[0].dstStageMask = dependency_stage_mask; + subpass_dependencies[0].srcAccessMask = dependency_access_mask; + subpass_dependencies[0].dstAccessMask = dependency_access_mask; + subpass_dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + subpass_dependencies[1].srcSubpass = 0; + subpass_dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL; + subpass_dependencies[1].srcStageMask = dependency_stage_mask; + subpass_dependencies[1].dstStageMask = dependency_stage_mask; + subpass_dependencies[1].srcAccessMask = dependency_access_mask; + subpass_dependencies[1].dstAccessMask = dependency_access_mask; + subpass_dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + + VkRenderPassCreateInfo render_pass_create_info; + render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_create_info.pNext = nullptr; + render_pass_create_info.flags = 0; + render_pass_create_info.attachmentCount = + xe::bit_count(key.depth_and_color_used); + render_pass_create_info.pAttachments = attachments; + render_pass_create_info.subpassCount = 1; + render_pass_create_info.pSubpasses = &subpass; + render_pass_create_info.dependencyCount = + key.depth_and_color_used ? uint32_t(xe::countof(subpass_dependencies)) + : 0; + render_pass_create_info.pDependencies = subpass_dependencies; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkRenderPass render_pass; + if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr, + &render_pass) != VK_SUCCESS) { + XELOGE("VulkanRenderTargetCache: Failed to create a render pass"); + render_passes_.emplace(key.key, VK_NULL_HANDLE); + return VK_NULL_HANDLE; + } + render_passes_.emplace(key.key, render_pass); + return render_pass; +} + +VkFormat VulkanRenderTargetCache::GetDepthVulkanFormat( + xenos::DepthRenderTargetFormat format) const { + if (format == xenos::DepthRenderTargetFormat::kD24S8 && + depth_unorm24_vulkan_format_supported()) { + return VK_FORMAT_D24_UNORM_S8_UINT; + } + return VK_FORMAT_D32_SFLOAT_S8_UINT; +} + +VkFormat VulkanRenderTargetCache::GetColorVulkanFormat( + xenos::ColorRenderTargetFormat format) const { + switch (format) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + return VK_FORMAT_R8G8B8A8_UNORM; + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + return gamma_render_target_as_srgb_ ? VK_FORMAT_R8G8B8A8_SRGB + : VK_FORMAT_R8G8B8A8_UNORM; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case xenos::ColorRenderTargetFormat::k_16_16: + // TODO(Triang3l): Fallback to float16 (disregarding clearing correctness + // likely) - possibly on render target gathering, treating them entirely + // as float16. + return VK_FORMAT_R16G16_SNORM; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + // TODO(Triang3l): Fallback to float16 (disregarding clearing correctness + // likely) - possibly on render target gathering, treating them entirely + // as float16. + return VK_FORMAT_R16G16B16A16_SNORM; + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + return VK_FORMAT_R16G16_SFLOAT; + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + return VK_FORMAT_R32_SFLOAT; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: + return VK_FORMAT_R32G32_SFLOAT; + default: + assert_unhandled_case(format); + return VK_FORMAT_UNDEFINED; + } +} + +VkFormat VulkanRenderTargetCache::GetColorOwnershipTransferVulkanFormat( + xenos::ColorRenderTargetFormat format, bool* is_integer_out) const { + if (is_integer_out) { + *is_integer_out = true; + } + // Floating-point numbers have NaNs that need to be propagated without + // modifications to the bit representation, and SNORM has two representations + // of -1. + switch (format) { + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + return VK_FORMAT_R16G16_UINT; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + return VK_FORMAT_R16G16B16A16_UINT; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + return VK_FORMAT_R32_UINT; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: + return VK_FORMAT_R32G32_UINT; + default: + if (is_integer_out) { + *is_integer_out = false; + } + return GetColorVulkanFormat(format); + } +} + +VulkanRenderTargetCache::VulkanRenderTarget::~VulkanRenderTarget() { + const ui::vulkan::VulkanProvider& provider = + render_target_cache_.command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::SingleLayoutDescriptorSetPool& descriptor_set_pool = + key().is_depth + ? *render_target_cache_.descriptor_set_pool_sampled_image_x2_ + : *render_target_cache_.descriptor_set_pool_sampled_image_; + descriptor_set_pool.Free(descriptor_set_index_transfer_source_); + if (view_color_transfer_separate_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_color_transfer_separate_, nullptr); + } + if (view_srgb_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_srgb_, nullptr); + } + if (view_stencil_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_stencil_, nullptr); + } + if (view_depth_stencil_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_depth_stencil_, nullptr); + } + dfn.vkDestroyImageView(device, view_depth_color_, nullptr); + dfn.vkDestroyImage(device, image_, nullptr); + dfn.vkFreeMemory(device, memory_, nullptr); +} + +uint32_t VulkanRenderTargetCache::GetMaxRenderTargetWidth() const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + return provider.device_properties().limits.maxFramebufferWidth; +} + +uint32_t VulkanRenderTargetCache::GetMaxRenderTargetHeight() const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + return provider.device_properties().limits.maxFramebufferHeight; +} + +RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( + RenderTargetKey key) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Create the image. + + VkImageCreateInfo image_create_info; + image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_create_info.pNext = nullptr; + image_create_info.flags = 0; + image_create_info.imageType = VK_IMAGE_TYPE_2D; + image_create_info.extent.width = key.GetWidth() * draw_resolution_scale_x(); + image_create_info.extent.height = + GetRenderTargetHeight(key.pitch_tiles_at_32bpp, key.msaa_samples) * + draw_resolution_scale_y(); + image_create_info.extent.depth = 1; + image_create_info.mipLevels = 1; + image_create_info.arrayLayers = 1; + if (key.msaa_samples == xenos::MsaaSamples::k2X && + !msaa_2x_attachments_supported_) { + image_create_info.samples = VK_SAMPLE_COUNT_4_BIT; + } else { + image_create_info.samples = + VkSampleCountFlagBits(uint32_t(1) << uint32_t(key.msaa_samples)); + } + image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_create_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_create_info.queueFamilyIndexCount = 0; + image_create_info.pQueueFamilyIndices = nullptr; + image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VkFormat transfer_format; + bool is_srgb_view_needed = false; + if (key.is_depth) { + image_create_info.format = GetDepthVulkanFormat(key.GetDepthFormat()); + transfer_format = image_create_info.format; + image_create_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } else { + xenos::ColorRenderTargetFormat color_format = key.GetColorFormat(); + image_create_info.format = GetColorVulkanFormat(color_format); + transfer_format = GetColorOwnershipTransferVulkanFormat(color_format); + is_srgb_view_needed = + gamma_render_target_as_srgb_ && + (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8 || + color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA); + if (image_create_info.format != transfer_format || is_srgb_view_needed) { + image_create_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + } + image_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + if (image_create_info.format == VK_FORMAT_UNDEFINED) { + XELOGE("VulkanRenderTargetCache: Unknown {} render target format {}", + key.is_depth ? "depth" : "color", key.resource_format); + return nullptr; + } + VkImage image; + VkDeviceMemory memory; + if (!ui::vulkan::util::CreateDedicatedAllocationImage( + provider, image_create_info, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, image, memory)) { + XELOGE( + "VulkanRenderTarget: Failed to create a {}x{} {}xMSAA {} render target " + "image", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), key.GetFormatName()); + return nullptr; + } + + // Create the image views. + + VkImageViewCreateInfo view_create_info; + view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_create_info.pNext = nullptr; + view_create_info.flags = 0; + view_create_info.image = image; + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_create_info.format = image_create_info.format; + view_create_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange( + key.is_depth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT); + VkImageView view_depth_color; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_depth_color) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTarget: Failed to create a {} view for a {}x{} {}xMSAA {} " + "render target", + key.is_depth ? "depth" : "color", image_create_info.extent.width, + image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), key.GetFormatName()); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + VkImageView view_depth_stencil = VK_NULL_HANDLE; + VkImageView view_stencil = VK_NULL_HANDLE; + VkImageView view_srgb = VK_NULL_HANDLE; + VkImageView view_color_transfer_separate = VK_NULL_HANDLE; + if (key.is_depth) { + view_create_info.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_depth_stencil) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTarget: Failed to create a depth / stencil view for a " + "{}x{} {}xMSAA {} render target", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), + xenos::GetDepthRenderTargetFormatName(key.GetDepthFormat())); + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_stencil) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTarget: Failed to create a stencil view for a {}x{} " + "{}xMSAA render target", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), + xenos::GetDepthRenderTargetFormatName(key.GetDepthFormat())); + dfn.vkDestroyImageView(device, view_depth_stencil, nullptr); + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + } else { + if (is_srgb_view_needed) { + view_create_info.format = VK_FORMAT_R8G8B8A8_SRGB; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_srgb) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTarget: Failed to create an sRGB view for a {}x{} " + "{}xMSAA render target", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), + xenos::GetColorRenderTargetFormatName(key.GetColorFormat())); + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + } + if (transfer_format != image_create_info.format) { + view_create_info.format = transfer_format; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_color_transfer_separate) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTarget: Failed to create a transfer view for a {}x{} " + "{}xMSAA {} render target", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), key.GetFormatName()); + if (view_srgb != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_srgb, nullptr); + } + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + } + } + + ui::vulkan::SingleLayoutDescriptorSetPool& descriptor_set_pool = + key.is_depth ? *descriptor_set_pool_sampled_image_x2_ + : *descriptor_set_pool_sampled_image_; + size_t descriptor_set_index_transfer_source = descriptor_set_pool.Allocate(); + if (descriptor_set_index_transfer_source == SIZE_MAX) { + XELOGE( + "VulkanRenderTargetCache: Failed to allocate sampled image descriptors " + "for a {} render target", + key.is_depth ? "depth/stencil" : "color"); + if (view_color_transfer_separate != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_color_transfer_separate, nullptr); + } + if (view_srgb != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_srgb, nullptr); + } + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + VkDescriptorSet descriptor_set_transfer_source = + descriptor_set_pool.Get(descriptor_set_index_transfer_source); + VkWriteDescriptorSet descriptor_set_write[2]; + VkDescriptorImageInfo descriptor_set_write_depth_color; + descriptor_set_write_depth_color.sampler = VK_NULL_HANDLE; + descriptor_set_write_depth_color.imageView = + view_color_transfer_separate != VK_NULL_HANDLE + ? view_color_transfer_separate + : view_depth_color; + descriptor_set_write_depth_color.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + descriptor_set_write[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set_write[0].pNext = nullptr; + descriptor_set_write[0].dstSet = descriptor_set_transfer_source; + descriptor_set_write[0].dstBinding = 0; + descriptor_set_write[0].dstArrayElement = 0; + descriptor_set_write[0].descriptorCount = 1; + descriptor_set_write[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_write[0].pImageInfo = &descriptor_set_write_depth_color; + descriptor_set_write[0].pBufferInfo = nullptr; + descriptor_set_write[0].pTexelBufferView = nullptr; + VkDescriptorImageInfo descriptor_set_write_stencil; + if (key.is_depth) { + descriptor_set_write_stencil.sampler = VK_NULL_HANDLE; + descriptor_set_write_stencil.imageView = view_stencil; + descriptor_set_write_stencil.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + descriptor_set_write[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set_write[1].pNext = nullptr; + descriptor_set_write[1].dstSet = descriptor_set_transfer_source; + descriptor_set_write[1].dstBinding = 1; + descriptor_set_write[1].dstArrayElement = 0; + descriptor_set_write[1].descriptorCount = 1; + descriptor_set_write[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_write[1].pImageInfo = &descriptor_set_write_stencil; + descriptor_set_write[1].pBufferInfo = nullptr; + descriptor_set_write[1].pTexelBufferView = nullptr; + } + dfn.vkUpdateDescriptorSets(device, key.is_depth ? 2 : 1, descriptor_set_write, + 0, nullptr); + + return new VulkanRenderTarget(key, *this, image, memory, view_depth_color, + view_depth_stencil, view_stencil, view_srgb, + view_color_transfer_separate, + descriptor_set_index_transfer_source); +} + +bool VulkanRenderTargetCache::IsHostDepthEncodingDifferent( + xenos::DepthRenderTargetFormat format) const { + // TODO(Triang3l): Conversion directly in shaders. + switch (format) { + case xenos::DepthRenderTargetFormat::kD24S8: + return !depth_unorm24_vulkan_format_supported(); + case xenos::DepthRenderTargetFormat::kD24FS8: + return true; + } + return false; +} + +void VulkanRenderTargetCache::GetEdramBufferUsageMasks( + EdramBufferUsage usage, VkPipelineStageFlags& stage_mask_out, + VkAccessFlags& access_mask_out) { + switch (usage) { + case EdramBufferUsage::kFragmentRead: + stage_mask_out = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_mask_out = VK_ACCESS_SHADER_READ_BIT; + break; + case EdramBufferUsage::kFragmentReadWrite: + stage_mask_out = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_mask_out = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + break; + case EdramBufferUsage::kComputeRead: + stage_mask_out = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask_out = VK_ACCESS_SHADER_READ_BIT; + break; + case EdramBufferUsage::kComputeWrite: + stage_mask_out = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask_out = VK_ACCESS_SHADER_WRITE_BIT; + break; + case EdramBufferUsage::kTransferRead: + stage_mask_out = VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask_out = VK_ACCESS_TRANSFER_READ_BIT; + break; + case EdramBufferUsage::kTransferWrite: + stage_mask_out = VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask_out = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + default: + assert_unhandled_case(usage); + } +} + +void VulkanRenderTargetCache::UseEdramBuffer(EdramBufferUsage new_usage) { + if (edram_buffer_usage_ == new_usage) { + return; + } + VkPipelineStageFlags src_stage_mask, dst_stage_mask; + VkAccessFlags src_access_mask, dst_access_mask; + GetEdramBufferUsageMasks(edram_buffer_usage_, src_stage_mask, + src_access_mask); + GetEdramBufferUsageMasks(new_usage, dst_stage_mask, dst_access_mask); + if (command_processor_.PushBufferMemoryBarrier( + edram_buffer_, 0, VK_WHOLE_SIZE, src_stage_mask, dst_stage_mask, + src_access_mask, dst_access_mask)) { + // Resetting edram_buffer_modification_status_ only if the barrier has been + // truly inserted. + edram_buffer_modification_status_ = + EdramBufferModificationStatus::kUnmodified; + } + edram_buffer_usage_ = new_usage; +} + +void VulkanRenderTargetCache::MarkEdramBufferModified( + EdramBufferModificationStatus modification_status) { + assert_true(modification_status != + EdramBufferModificationStatus::kUnmodified); + switch (edram_buffer_usage_) { + case EdramBufferUsage::kFragmentReadWrite: + // max because being modified via unordered access requires stricter + // synchronization than via fragment shader interlocks. + edram_buffer_modification_status_ = + std::max(edram_buffer_modification_status_, modification_status); + break; + case EdramBufferUsage::kComputeWrite: + assert_true(modification_status == + EdramBufferModificationStatus::kViaUnordered); + modification_status = EdramBufferModificationStatus::kViaUnordered; + break; + default: + assert_always( + "While changing the usage of the EDRAM buffer before marking it as " + "modified is handled safely (but will cause spurious marking as " + "modified after the changes have been implicitly committed by the " + "usage switch), normally that shouldn't be done and is an " + "indication of architectural mistakes. Alternatively, this may " + "indicate that the usage switch has been forgotten before writing, " + "which is a clearly invalid situation."); + } +} + +void VulkanRenderTargetCache::CommitEdramBufferShaderWrites( + EdramBufferModificationStatus commit_status) { + assert_true(commit_status != EdramBufferModificationStatus::kUnmodified); + if (edram_buffer_modification_status_ < commit_status) { + return; + } + VkPipelineStageFlags stage_mask; + VkAccessFlags access_mask; + GetEdramBufferUsageMasks(edram_buffer_usage_, stage_mask, access_mask); + assert_not_zero(access_mask & VK_ACCESS_SHADER_WRITE_BIT); + command_processor_.PushBufferMemoryBarrier( + edram_buffer_, 0, VK_WHOLE_SIZE, stage_mask, stage_mask, access_mask, + access_mask, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, false); + edram_buffer_modification_status_ = + EdramBufferModificationStatus::kUnmodified; + PixelShaderInterlockFullEdramBarrierPlaced(); +} + +const VulkanRenderTargetCache::Framebuffer* +VulkanRenderTargetCache::GetFramebuffer( + RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp, + const RenderTarget* const* depth_and_color_render_targets) { + FramebufferKey key; + key.render_pass_key = render_pass_key; + key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp; + if (render_pass_key.depth_and_color_used & (1 << 0)) { + key.depth_base_tiles = depth_and_color_render_targets[0]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 1)) { + key.color_0_base_tiles = + depth_and_color_render_targets[1]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 2)) { + key.color_1_base_tiles = + depth_and_color_render_targets[2]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 3)) { + key.color_2_base_tiles = + depth_and_color_render_targets[3]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 4)) { + key.color_3_base_tiles = + depth_and_color_render_targets[4]->key().base_tiles; + } + auto it = framebuffers_.find(key); + if (it != framebuffers_.end()) { + return &it->second; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkRenderPass render_pass = GetRenderPass(render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return nullptr; + } + + VkImageView attachments[1 + xenos::kMaxColorRenderTargets]; + uint32_t attachment_count = 0; + uint32_t depth_and_color_rts_remaining = render_pass_key.depth_and_color_used; + uint32_t rt_index; + while (xe::bit_scan_forward(depth_and_color_rts_remaining, &rt_index)) { + depth_and_color_rts_remaining &= ~(uint32_t(1) << rt_index); + const auto& vulkan_rt = *static_cast( + depth_and_color_render_targets[rt_index]); + VkImageView attachment; + if (rt_index) { + attachment = render_pass_key.color_rts_use_transfer_formats + ? vulkan_rt.view_color_transfer() + : vulkan_rt.view_depth_color(); + } else { + attachment = vulkan_rt.view_depth_stencil(); + } + attachments[attachment_count++] = attachment; + } + + VkFramebufferCreateInfo framebuffer_create_info; + framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_create_info.pNext = nullptr; + framebuffer_create_info.flags = 0; + framebuffer_create_info.renderPass = render_pass; + framebuffer_create_info.attachmentCount = attachment_count; + framebuffer_create_info.pAttachments = attachments; + VkExtent2D host_extent; + if (pitch_tiles_at_32bpp) { + host_extent.width = RenderTargetKey::GetWidth(pitch_tiles_at_32bpp, + render_pass_key.msaa_samples); + host_extent.height = GetRenderTargetHeight(pitch_tiles_at_32bpp, + render_pass_key.msaa_samples); + } else { + assert_zero(render_pass_key.depth_and_color_used); + host_extent.width = 0; + host_extent.height = 0; + } + // Vulkan requires width and height greater than 0. + framebuffer_create_info.width = std::max(host_extent.width, uint32_t(1)); + framebuffer_create_info.height = std::max(host_extent.height, uint32_t(1)); + framebuffer_create_info.layers = 1; + VkFramebuffer framebuffer; + if (dfn.vkCreateFramebuffer(device, &framebuffer_create_info, nullptr, + &framebuffer) != VK_SUCCESS) { + return nullptr; + } + // Creates at a persistent location - safe to use pointers. + return &framebuffers_ + .emplace(std::piecewise_construct, std::forward_as_tuple(key), + std::forward_as_tuple(framebuffer, host_extent)) + .first->second; +} + +VkShaderModule VulkanRenderTargetCache::GetTransferShader( + TransferShaderKey key) { + auto shader_it = transfer_shaders_.find(key); + if (shader_it != transfer_shaders_.end()) { + return shader_it->second; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + std::vector id_vector_temp; + std::vector uint_vector_temp; + + spv::Builder builder(spv::Spv_1_0, + (SpirvShaderTranslator::kSpirvMagicToolId << 16) | 1, + nullptr); + spv::Id ext_inst_glsl_std_450 = builder.import("GLSL.std.450"); + builder.addCapability(spv::CapabilityShader); + builder.setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); + builder.setSource(spv::SourceLanguageUnknown, 0); + + spv::Id type_void = builder.makeVoidType(); + spv::Id type_bool = builder.makeBoolType(); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_int2 = builder.makeVectorType(type_int, 2); + spv::Id type_uint = builder.makeUintType(32); + spv::Id type_uint2 = builder.makeVectorType(type_uint, 2); + spv::Id type_uint4 = builder.makeVectorType(type_uint, 4); + spv::Id type_float = builder.makeFloatType(32); + spv::Id type_float2 = builder.makeVectorType(type_float, 2); + spv::Id type_float4 = builder.makeVectorType(type_float, 4); + + const TransferModeInfo& mode = kTransferModes[size_t(key.mode)]; + const TransferPipelineLayoutInfo& pipeline_layout_info = + kTransferPipelineLayoutInfos[size_t(mode.pipeline_layout)]; + + // If not dest_is_color, it's depth, or stencil bit - 40-sample columns are + // swapped as opposed to color source. + bool dest_is_color = (mode.output == TransferOutput::kColor); + xenos::ColorRenderTargetFormat dest_color_format = + xenos::ColorRenderTargetFormat(key.dest_resource_format); + xenos::DepthRenderTargetFormat dest_depth_format = + xenos::DepthRenderTargetFormat(key.dest_resource_format); + bool dest_is_64bpp = + dest_is_color && xenos::IsColorRenderTargetFormat64bpp(dest_color_format); + + xenos::ColorRenderTargetFormat source_color_format = + xenos::ColorRenderTargetFormat(key.source_resource_format); + xenos::DepthRenderTargetFormat source_depth_format = + xenos::DepthRenderTargetFormat(key.source_resource_format); + // If not source_is_color, it's depth / stencil - 40-sample columns are + // swapped as opposed to color destination. + bool source_is_color = (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetColorTextureBit) != 0; + bool source_is_64bpp; + uint32_t source_color_format_component_count; + uint32_t source_color_texture_component_mask; + bool source_color_is_uint; + spv::Id source_color_component_type; + if (source_is_color) { + assert_zero(pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetDepthStencilTexturesBit); + source_is_64bpp = + xenos::IsColorRenderTargetFormat64bpp(source_color_format); + source_color_format_component_count = + xenos::GetColorRenderTargetFormatComponentCount(source_color_format); + if (mode.output == TransferOutput::kStencilBit) { + if (source_is_64bpp && !dest_is_64bpp) { + // Need one component, but choosing from the two 32bpp halves of the + // 64bpp sample. + source_color_texture_component_mask = + 0b1 | (0b1 << (source_color_format_component_count >> 1)); + } else { + // Red is at least 8 bits per component in all formats. + source_color_texture_component_mask = 0b1; + } + } else { + source_color_texture_component_mask = + (uint32_t(1) << source_color_format_component_count) - 1; + } + GetColorOwnershipTransferVulkanFormat(source_color_format, + &source_color_is_uint); + source_color_component_type = source_color_is_uint ? type_uint : type_float; + } else { + source_is_64bpp = false; + source_color_format_component_count = 0; + source_color_texture_component_mask = 0; + source_color_is_uint = false; + source_color_component_type = spv::NoType; + } + + std::vector main_interface; + + // Outputs. + bool shader_uses_stencil_reference_output = + mode.output == TransferOutput::kDepth && + provider.device_extensions().ext_shader_stencil_export; + bool dest_color_is_uint = false; + uint32_t dest_color_component_count = 0; + spv::Id type_fragment_data_component = spv::NoResult; + spv::Id type_fragment_data = spv::NoResult; + spv::Id output_fragment_data = spv::NoResult; + spv::Id output_fragment_depth = spv::NoResult; + spv::Id output_fragment_stencil_ref = spv::NoResult; + switch (mode.output) { + case TransferOutput::kColor: + GetColorOwnershipTransferVulkanFormat(dest_color_format, + &dest_color_is_uint); + dest_color_component_count = + xenos::GetColorRenderTargetFormatComponentCount(dest_color_format); + type_fragment_data_component = + dest_color_is_uint ? type_uint : type_float; + type_fragment_data = + dest_color_component_count > 1 + ? builder.makeVectorType(type_fragment_data_component, + dest_color_component_count) + : type_fragment_data_component; + output_fragment_data = builder.createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_fragment_data, + "xe_transfer_fragment_data"); + builder.addDecoration(output_fragment_data, spv::DecorationLocation, + key.dest_color_rt_index); + main_interface.push_back(output_fragment_data); + break; + case TransferOutput::kDepth: + output_fragment_depth = + builder.createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_float, "gl_FragDepth"); + builder.addDecoration(output_fragment_depth, spv::DecorationBuiltIn, + spv::BuiltInFragDepth); + main_interface.push_back(output_fragment_depth); + if (shader_uses_stencil_reference_output) { + builder.addExtension("SPV_EXT_shader_stencil_export"); + builder.addCapability(spv::CapabilityStencilExportEXT); + output_fragment_stencil_ref = + builder.createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_int, "gl_FragStencilRefARB"); + builder.addDecoration(output_fragment_stencil_ref, + spv::DecorationBuiltIn, + spv::BuiltInFragStencilRefEXT); + main_interface.push_back(output_fragment_stencil_ref); + } + break; + default: + break; + } + + // Bindings. + // Generating SPIR-V 1.0, no need to add bindings to the entry point's + // interface until SPIR-V 1.4. + // Color source. + bool source_is_multisampled = + key.source_msaa_samples != xenos::MsaaSamples::k1X; + spv::Id source_color_texture = spv::NoResult; + if (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetColorTextureBit) { + source_color_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(source_color_component_type, spv::Dim2D, false, + false, source_is_multisampled, 1, + spv::ImageFormatUnknown), + "xe_transfer_color"); + builder.addDecoration( + source_color_texture, spv::DecorationDescriptorSet, + xe::bit_count(pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetColorTextureBit - 1))); + builder.addDecoration(source_color_texture, spv::DecorationBinding, 0); + } + // Depth / stencil source. + spv::Id source_depth_texture = spv::NoResult; + spv::Id source_stencil_texture = spv::NoResult; + if (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetDepthStencilTexturesBit) { + uint32_t source_depth_stencil_descriptor_set = + xe::bit_count(pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetDepthStencilTexturesBit - 1)); + // Using `depth == false` in makeImageType because comparisons are not + // required, and other values of `depth` are causing issues in drivers. + // https://github.com/microsoft/DirectXShaderCompiler/issues/1107 + if (mode.output != TransferOutput::kStencilBit) { + source_depth_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(type_float, spv::Dim2D, false, false, + source_is_multisampled, 1, + spv::ImageFormatUnknown), + "xe_transfer_depth"); + builder.addDecoration(source_depth_texture, spv::DecorationDescriptorSet, + source_depth_stencil_descriptor_set); + builder.addDecoration(source_depth_texture, spv::DecorationBinding, 0); + } + if (mode.output != TransferOutput::kDepth || + shader_uses_stencil_reference_output) { + source_stencil_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(type_uint, spv::Dim2D, false, false, + source_is_multisampled, 1, + spv::ImageFormatUnknown), + "xe_transfer_stencil"); + builder.addDecoration(source_stencil_texture, + spv::DecorationDescriptorSet, + source_depth_stencil_descriptor_set); + builder.addDecoration(source_stencil_texture, spv::DecorationBinding, 1); + } + } + // Host depth source buffer. + spv::Id host_depth_source_buffer = spv::NoResult; + if (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetHostDepthBufferBit) { + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeRuntimeArray(type_uint)); + // Storage buffers have std430 packing, no padding to 4-component vectors. + builder.addDecoration(id_vector_temp.back(), spv::DecorationArrayStride, + sizeof(uint32_t)); + spv::Id type_host_depth_source_buffer = + builder.makeStructType(id_vector_temp, "XeTransferHostDepthBuffer"); + builder.addMemberName(type_host_depth_source_buffer, 0, "host_depth"); + builder.addMemberDecoration(type_host_depth_source_buffer, 0, + spv::DecorationNonWritable); + builder.addMemberDecoration(type_host_depth_source_buffer, 0, + spv::DecorationOffset, 0); + // Block since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // BufferBlock. + builder.addDecoration(type_host_depth_source_buffer, + spv::DecorationBufferBlock); + // StorageBuffer since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // Uniform. + host_depth_source_buffer = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniform, + type_host_depth_source_buffer, "xe_transfer_host_depth_buffer"); + builder.addDecoration( + host_depth_source_buffer, spv::DecorationDescriptorSet, + xe::bit_count(pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetHostDepthBufferBit - 1))); + builder.addDecoration(host_depth_source_buffer, spv::DecorationBinding, 0); + } + // Host depth source texture (the depth / stencil descriptor set is reused, + // but stencil is not needed). + spv::Id host_depth_source_texture = spv::NoResult; + if (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetHostDepthStencilTexturesBit) { + host_depth_source_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType( + type_float, spv::Dim2D, false, false, + key.host_depth_source_msaa_samples != xenos::MsaaSamples::k1X, 1, + spv::ImageFormatUnknown), + "xe_transfer_host_depth"); + builder.addDecoration( + host_depth_source_texture, spv::DecorationDescriptorSet, + xe::bit_count( + pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetHostDepthStencilTexturesBit - 1))); + builder.addDecoration(host_depth_source_texture, spv::DecorationBinding, 0); + } + // Push constants. + id_vector_temp.clear(); + uint32_t push_constants_member_host_depth_address = UINT32_MAX; + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordHostDepthAddressBit) { + push_constants_member_host_depth_address = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_uint); + } + uint32_t push_constants_member_address = UINT32_MAX; + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordAddressBit) { + push_constants_member_address = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_uint); + } + uint32_t push_constants_member_stencil_mask = UINT32_MAX; + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordStencilMaskBit) { + push_constants_member_stencil_mask = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_uint); + } + spv::Id push_constants = spv::NoResult; + if (!id_vector_temp.empty()) { + spv::Id type_push_constants = + builder.makeStructType(id_vector_temp, "XeTransferPushConstants"); + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordHostDepthAddressBit) { + assert_true(push_constants_member_host_depth_address != UINT32_MAX); + builder.addMemberName(type_push_constants, + push_constants_member_host_depth_address, + "host_depth_address"); + builder.addMemberDecoration( + type_push_constants, push_constants_member_host_depth_address, + spv::DecorationOffset, + sizeof(uint32_t) * + xe::bit_count( + pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordHostDepthAddressBit - 1))); + } + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordAddressBit) { + assert_true(push_constants_member_address != UINT32_MAX); + builder.addMemberName(type_push_constants, push_constants_member_address, + "address"); + builder.addMemberDecoration( + type_push_constants, push_constants_member_address, + spv::DecorationOffset, + sizeof(uint32_t) * + xe::bit_count(pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordAddressBit - 1))); + } + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordStencilMaskBit) { + assert_true(push_constants_member_stencil_mask != UINT32_MAX); + builder.addMemberName(type_push_constants, + push_constants_member_stencil_mask, "stencil_mask"); + builder.addMemberDecoration( + type_push_constants, push_constants_member_stencil_mask, + spv::DecorationOffset, + sizeof(uint32_t) * + xe::bit_count( + pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordStencilMaskBit - 1))); + } + builder.addDecoration(type_push_constants, spv::DecorationBlock); + push_constants = builder.createVariable( + spv::NoPrecision, spv::StorageClassPushConstant, type_push_constants, + "xe_transfer_push_constants"); + } + + // Coordinate inputs. + spv::Id input_fragment_coord = builder.createVariable( + spv::NoPrecision, spv::StorageClassInput, type_float4, "gl_FragCoord"); + builder.addDecoration(input_fragment_coord, spv::DecorationBuiltIn, + spv::BuiltInFragCoord); + main_interface.push_back(input_fragment_coord); + spv::Id input_sample_id = spv::NoResult; + spv::Id spec_const_sample_id = spv::NoResult; + if (key.dest_msaa_samples != xenos::MsaaSamples::k1X) { + if (device_features.sampleRateShading) { + // One draw for all samples. + builder.addCapability(spv::CapabilitySampleRateShading); + input_sample_id = builder.createVariable( + spv::NoPrecision, spv::StorageClassInput, type_int, "gl_SampleID"); + builder.addDecoration(input_sample_id, spv::DecorationFlat); + builder.addDecoration(input_sample_id, spv::DecorationBuiltIn, + spv::BuiltInSampleId); + main_interface.push_back(input_sample_id); + } else { + // One sample per draw, with different sample masks. + spec_const_sample_id = builder.makeUintConstant(0, true); + builder.addName(spec_const_sample_id, "xe_transfer_sample_id"); + builder.addDecoration(spec_const_sample_id, spv::DecorationSpecId, 0); + } + } + + // Begin the main function. + std::vector main_param_types; + std::vector> main_precisions; + spv::Block* main_entry; + spv::Function* main_function = + builder.makeFunctionEntry(spv::NoPrecision, type_void, "main", + main_param_types, main_precisions, &main_entry); + + // Working with unsigned numbers for simplicity now, bitcasting to signed will + // be done at texture fetch. + + uint32_t tile_width_samples = + xenos::kEdramTileWidthSamples * draw_resolution_scale_x(); + uint32_t tile_height_samples = + xenos::kEdramTileHeightSamples * draw_resolution_scale_y(); + + // Split the destination pixel index into 32bpp tile and 32bpp-tile-relative + // pixel index. + // Note that division by non-power-of-two constants will include a 4-cycle + // 32*32 multiplication on AMD, even though so many bits are not needed for + // the pixel position - however, if an OpUnreachable path is inserted for the + // case when the position has upper bits set, for some reason, the code for it + // is not eliminated when compiling the shader for AMD via RenderDoc on + // Windows, as of June 2022. + uint_vector_temp.clear(); + uint_vector_temp.reserve(2); + uint_vector_temp.push_back(0); + uint_vector_temp.push_back(1); + spv::Id dest_pixel_coord = builder.createUnaryOp( + spv::OpConvertFToU, type_uint2, + builder.createRvalueSwizzle( + spv::NoPrecision, type_float2, + builder.createLoad(input_fragment_coord, spv::NoPrecision), + uint_vector_temp)); + spv::Id dest_pixel_x = + builder.createCompositeExtract(dest_pixel_coord, type_uint, 0); + spv::Id const_dest_tile_width_pixels = builder.makeUintConstant( + tile_width_samples >> + (uint32_t(dest_is_64bpp) + + uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k4X))); + spv::Id dest_tile_index_x = builder.createBinOp( + spv::OpUDiv, type_uint, dest_pixel_x, const_dest_tile_width_pixels); + spv::Id dest_tile_pixel_x = builder.createBinOp( + spv::OpUMod, type_uint, dest_pixel_x, const_dest_tile_width_pixels); + spv::Id dest_pixel_y = + builder.createCompositeExtract(dest_pixel_coord, type_uint, 1); + spv::Id const_dest_tile_height_pixels = builder.makeUintConstant( + tile_height_samples >> + uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k2X)); + spv::Id dest_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, dest_pixel_y, const_dest_tile_height_pixels); + spv::Id dest_tile_pixel_y = builder.createBinOp( + spv::OpUMod, type_uint, dest_pixel_y, const_dest_tile_height_pixels); + + assert_true(push_constants_member_address != UINT32_MAX); + id_vector_temp.clear(); + id_vector_temp.push_back( + builder.makeIntConstant(int32_t(push_constants_member_address))); + spv::Id address_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, push_constants, + id_vector_temp), + spv::NoPrecision); + + // Calculate the 32bpp tile index from its X and Y parts. + spv::Id dest_tile_index = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, address_constant, + builder.makeUintConstant(0), + builder.makeUintConstant(xenos::kEdramPitchTilesBits)), + dest_tile_index_y), + dest_tile_index_x); + + // Load the destination sample index. + spv::Id dest_sample_id = spv::NoResult; + if (key.dest_msaa_samples != xenos::MsaaSamples::k1X) { + if (device_features.sampleRateShading) { + assert_true(input_sample_id != spv::NoResult); + dest_sample_id = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createLoad(input_sample_id, spv::NoPrecision)); + } else { + assert_true(spec_const_sample_id != spv::NoResult); + // Already uint. + dest_sample_id = spec_const_sample_id; + } + } + + // Transform the destination framebuffer pixel and sample coordinates into the + // source texture pixel and sample coordinates. + + // First sample bit at 4x with Vulkan standard locations - horizontal sample. + // Second sample bit at 4x with Vulkan standard locations - vertical sample. + // At 2x: + // - Native 2x: top is 1 in Vulkan, bottom is 0. + // - 2x as 4x: top is 0, bottom is 3. + + spv::Id source_sample_id = dest_sample_id; + spv::Id source_tile_pixel_x = dest_tile_pixel_x; + spv::Id source_tile_pixel_y = dest_tile_pixel_y; + spv::Id source_color_half = spv::NoResult; + if (!source_is_64bpp && dest_is_64bpp) { + // 32bpp -> 64bpp, need two samples of the source. + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + // 32bpp -> 64bpp, 4x ->. + // Source has 32bpp halves in two adjacent samples. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 32bpp -> 64bpp, 4x -> 4x. + // 1 destination horizontal sample = 2 source horizontal samples. + // D p0,0 s0,0 = S p0,0 s0,0 | S p0,0 s1,0 + // D p0,0 s1,0 = S p1,0 s0,0 | S p1,0 s1,0 + // D p0,0 s0,1 = S p0,0 s0,1 | S p0,0 s1,1 + // D p0,0 s1,1 = S p1,0 s0,1 | S p1,0 s1,1 + // Thus destination horizontal sample -> source horizontal pixel, + // vertical samples are 1:1. + source_sample_id = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_sample_id, + builder.makeUintConstant(1 << 1)); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_x = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { + // 32bpp -> 64bpp, 4x -> 2x. + // 1 destination horizontal pixel = 2 source horizontal samples. + // D p0,0 s0 = S p0,0 s0,0 | S p0,0 s1,0 + // D p0,0 s1 = S p0,0 s0,1 | S p0,0 s1,1 + // D p1,0 s0 = S p1,0 s0,0 | S p1,0 s1,0 + // D p1,0 s1 = S p1,0 s0,1 | S p1,0 s1,1 + // Pixel index can be reused. Sample 1 (for native 2x) or 0 (for 2x as + // 4x) should become samples 01, sample 0 or 3 should become samples 23. + if (msaa_2x_attachments_supported_) { + source_sample_id = builder.createBinOp( + spv::OpShiftLeftLogical, type_uint, + builder.createBinOp(spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1)), + builder.makeUintConstant(1)); + } else { + source_sample_id = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_sample_id, + builder.makeUintConstant(1 << 1)); + } + } else { + // 32bpp -> 64bpp, 4x -> 1x. + // 1 destination horizontal pixel = 2 source horizontal samples. + // D p0,0 = S p0,0 s0,0 | S p0,0 s1,0 + // D p0,1 = S p0,0 s0,1 | S p0,0 s1,1 + // Horizontal pixel index can be reused. Vertical pixel 1 should + // become sample 2. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + source_tile_pixel_y = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_y, builder.makeUintConstant(1)); + } + } else { + // 32bpp -> 64bpp, 1x/2x ->. + // Source has 32bpp halves in two adjacent pixels. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 32bpp -> 64bpp, 1x/2x -> 4x. + // The X part. + // 1 destination horizontal sample = 2 source horizontal pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpShiftLeftLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(2))); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_tile_pixel_x = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + // Y is handled by common code. + } else { + // 32bpp -> 64bpp, 1x/2x -> 1x/2x. + // The X part. + // 1 destination horizontal pixel = 2 source horizontal pixels. + source_tile_pixel_x = + builder.createBinOp(spv::OpShiftLeftLogical, type_uint, + dest_tile_pixel_x, builder.makeUintConstant(1)); + // Y is handled by common code. + } + } + } else if (source_is_64bpp && !dest_is_64bpp) { + // 64bpp -> 32bpp, also the half to load. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 64bpp -> 32bpp, -> 4x. + // The needed half is in the destination horizontal sample index. + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + // 64bpp -> 32bpp, 4x -> 4x. + // D p0,0 s0,0 = S s0,0 low + // D p0,0 s1,0 = S s0,0 high + // D p1,0 s0,0 = S s1,0 low + // D p1,0 s1,0 = S s1,0 high + // Vertical pixel and sample (second bit) addressing is the same. + // However, 1 horizontal destination pixel = 1 horizontal source sample. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + // 2 destination horizontal samples = 1 source horizontal sample, thus + // 2 destination horizontal pixels = 1 source horizontal pixel. + source_tile_pixel_x = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_x, builder.makeUintConstant(1)); + } else { + // 64bpp -> 32bpp, 1x/2x -> 4x. + // 2 destination horizontal samples = 1 source horizontal pixel, thus + // 1 destination horizontal pixel = 1 source horizontal pixel. Can reuse + // horizontal pixel index. + // Y is handled by common code. + } + // Half from the destination horizontal sample index. + source_color_half = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_sample_id, + builder.makeUintConstant(1)); + } else { + // 64bpp -> 32bpp, -> 1x/2x. + // The needed half is in the destination horizontal pixel index. + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + // 64bpp -> 32bpp, 4x -> 1x/2x. + // (Destination horizontal pixel >> 1) & 1 = source horizontal sample + // (first bit). + source_sample_id = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1), builder.makeUintConstant(1)); + if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { + // 64bpp -> 32bpp, 4x -> 2x. + // Destination vertical samples (1/0 in the first bit for native 2x or + // 0/1 in the second bit for 2x as 4x) = source vertical samples + // (second bit). + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + } else { + // 64bpp -> 32bpp, 4x -> 1x. + // 1 destination vertical pixel = 1 source vertical sample. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(source_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + source_tile_pixel_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + } + // 2 destination horizontal pixels = 1 source horizontal sample. + // 4 destination horizontal pixels = 1 source horizontal pixel. + source_tile_pixel_x = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_x, builder.makeUintConstant(2)); + } else { + // 64bpp -> 32bpp, 1x/2x -> 1x/2x. + // The X part. + // 2 destination horizontal pixels = 1 destination source pixel. + source_tile_pixel_x = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_x, builder.makeUintConstant(1)); + // Y is handled by common code. + } + // Half from the destination horizontal pixel index. + source_color_half = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + } + assert_true(source_color_half != spv::NoResult); + } else { + // Same bit count. + if (key.source_msaa_samples != key.dest_msaa_samples) { + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + // Same BPP, 4x -> 1x/2x. + if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { + // Same BPP, 4x -> 2x. + // Horizontal pixels to samples. Vertical sample (1/0 in the first bit + // for native 2x or 0/1 in the second bit for 2x as 4x) to second + // sample bit. + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + source_tile_pixel_x = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + } else { + // Same BPP, 4x -> 1x. + // Pixels to samples. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseAnd, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + source_tile_pixel_x = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + source_tile_pixel_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + } + } else { + // Same BPP, 1x/2x -> 1x/2x/4x (as long as they're different). + // Only the X part - Y is handled by common code. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // Horizontal samples to pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_x = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + } + } + } + // Common source Y and sample index for 1x/2x AA sources, independent of bits + // per sample. + if (key.source_msaa_samples < xenos::MsaaSamples::k4X && + key.source_msaa_samples != key.dest_msaa_samples) { + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 1x/2x -> 4x. + if (key.source_msaa_samples == xenos::MsaaSamples::k2X) { + // 2x -> 4x. + // Vertical samples (second bit) of 4x destination to vertical sample + // (1, 0 for native 2x, or 0, 3 for 2x as 4x) of 2x source. + source_sample_id = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_sample_id, builder.makeUintConstant(1)); + if (msaa_2x_attachments_supported_) { + source_sample_id = builder.createBinOp(spv::OpBitwiseXor, type_uint, + source_sample_id, + builder.makeUintConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } else { + // 1x -> 4x. + // Vertical samples (second bit) to Y pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back( + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_sample_id, builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_y = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } else { + // 1x/2x -> different 1x/2x. + if (key.source_msaa_samples == xenos::MsaaSamples::k2X) { + // 2x -> 1x. + // Vertical pixels of 2x destination to vertical samples (1, 0 for + // native 2x, or 0, 3 for 2x as 4x) of 1x source. + source_sample_id = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + if (msaa_2x_attachments_supported_) { + source_sample_id = builder.createBinOp(spv::OpBitwiseXor, type_uint, + source_sample_id, + builder.makeUintConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + source_tile_pixel_y = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_y, builder.makeUintConstant(1)); + } else { + // 1x -> 2x. + // Vertical samples (1/0 in the first bit for native 2x or 0/1 in the + // second bit for 2x as 4x) of 2x destination to vertical pixels of 1x + // source. + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back( + builder.createBinOp(spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_y = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back( + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_sample_id, builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_y = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + } + } + } + + uint32_t source_pixel_width_dwords_log2 = + uint32_t(key.source_msaa_samples >= xenos::MsaaSamples::k4X) + + uint32_t(source_is_64bpp); + + if (source_is_color != dest_is_color) { + // Copying between color and depth / stencil - swap 40-32bpp-sample columns + // in the pixel index within the source 32bpp tile. + uint32_t source_32bpp_tile_half_pixels = + tile_width_samples >> (1 + source_pixel_width_dwords_log2); + source_tile_pixel_x = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createBinOp( + spv::OpIAdd, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, + source_tile_pixel_x), + builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp( + spv::OpULessThan, builder.makeBoolType(), + source_tile_pixel_x, + builder.makeUintConstant(source_32bpp_tile_half_pixels)), + builder.makeIntConstant(int32_t(source_32bpp_tile_half_pixels)), + builder.makeIntConstant( + -int32_t(source_32bpp_tile_half_pixels))))); + } + + // Transform the destination 32bpp tile index into the source. + spv::Id source_tile_index = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createBinOp( + spv::OpIAdd, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, dest_tile_index), + builder.createTriOp( + spv::OpBitFieldSExtract, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, address_constant), + builder.makeUintConstant(xenos::kEdramPitchTilesBits * 2), + builder.makeUintConstant(xenos::kEdramBaseTilesBits)))); + // Split the source 32bpp tile index into X and Y tile index within the source + // image. + spv::Id source_pitch_tiles = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, address_constant, + builder.makeUintConstant(xenos::kEdramPitchTilesBits), + builder.makeUintConstant(xenos::kEdramPitchTilesBits)); + spv::Id source_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, source_tile_index, source_pitch_tiles); + spv::Id source_tile_index_x = builder.createBinOp( + spv::OpUMod, type_uint, source_tile_index, source_pitch_tiles); + // Finally calculate the source texture coordinates. + spv::Id source_pixel_x_int = builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.makeUintConstant(tile_width_samples >> + source_pixel_width_dwords_log2), + source_tile_index_x), + source_tile_pixel_x)); + spv::Id source_pixel_y_int = builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.makeUintConstant( + tile_height_samples >> + uint32_t(key.source_msaa_samples >= xenos::MsaaSamples::k2X)), + source_tile_index_y), + source_tile_pixel_y)); + + // Load the source. + + spv::Builder::TextureParameters source_texture_parameters = {}; + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(source_pixel_x_int); + id_vector_temp.push_back(source_pixel_y_int); + spv::Id source_coordinates[2] = { + builder.createCompositeConstruct(type_int2, id_vector_temp), + }; + spv::Id source_sample_ids_int[2] = {}; + if (key.source_msaa_samples != xenos::MsaaSamples::k1X) { + source_sample_ids_int[0] = + builder.createUnaryOp(spv::OpBitcast, type_int, source_sample_id); + } else { + source_texture_parameters.lod = builder.makeIntConstant(0); + } + // Go to the next sample or pixel along X if need to load two dwords. + bool source_load_is_two_32bpp_samples = !source_is_64bpp && dest_is_64bpp; + if (source_load_is_two_32bpp_samples) { + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + source_coordinates[1] = source_coordinates[0]; + source_sample_ids_int[1] = builder.createBinOp( + spv::OpBitwiseOr, type_int, source_sample_ids_int[0], + builder.makeIntConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(builder.createBinOp(spv::OpBitwiseOr, type_int, + source_pixel_x_int, + builder.makeIntConstant(1))); + id_vector_temp.push_back(source_pixel_y_int); + source_coordinates[1] = + builder.createCompositeConstruct(type_int2, id_vector_temp); + source_sample_ids_int[1] = source_sample_ids_int[0]; + } + } + spv::Id source_color[2][4] = {}; + if (source_color_texture != spv::NoResult) { + source_texture_parameters.sampler = + builder.createLoad(source_color_texture, spv::NoPrecision); + assert_true(source_color_component_type != spv::NoType); + spv::Id source_color_vec4_type = + builder.makeVectorType(source_color_component_type, 4); + for (uint32_t i = 0; i <= uint32_t(source_load_is_two_32bpp_samples); ++i) { + source_texture_parameters.coords = source_coordinates[i]; + source_texture_parameters.sample = source_sample_ids_int[i]; + spv::Id source_color_vec4 = builder.createTextureCall( + spv::NoPrecision, source_color_vec4_type, false, true, false, false, + false, source_texture_parameters, spv::ImageOperandsMaskNone); + uint32_t source_color_components_remaining = + source_color_texture_component_mask; + uint32_t source_color_component_index; + while (xe::bit_scan_forward(source_color_components_remaining, + &source_color_component_index)) { + source_color_components_remaining &= + ~(uint32_t(1) << source_color_component_index); + source_color[i][source_color_component_index] = + builder.createCompositeExtract(source_color_vec4, + source_color_component_type, + source_color_component_index); + } + } + } + spv::Id source_depth_float[2] = {}; + if (source_depth_texture != spv::NoResult) { + source_texture_parameters.sampler = + builder.createLoad(source_depth_texture, spv::NoPrecision); + for (uint32_t i = 0; i <= uint32_t(source_load_is_two_32bpp_samples); ++i) { + source_texture_parameters.coords = source_coordinates[i]; + source_texture_parameters.sample = source_sample_ids_int[i]; + source_depth_float[i] = builder.createCompositeExtract( + builder.createTextureCall( + spv::NoPrecision, type_float4, false, true, false, false, false, + source_texture_parameters, spv::ImageOperandsMaskNone), + type_float, 0); + } + } + spv::Id source_stencil[2] = {}; + if (source_stencil_texture != spv::NoResult) { + source_texture_parameters.sampler = + builder.createLoad(source_stencil_texture, spv::NoPrecision); + for (uint32_t i = 0; i <= uint32_t(source_load_is_two_32bpp_samples); ++i) { + source_texture_parameters.coords = source_coordinates[i]; + source_texture_parameters.sample = source_sample_ids_int[i]; + source_stencil[i] = builder.createCompositeExtract( + builder.createTextureCall( + spv::NoPrecision, type_uint4, false, true, false, false, false, + source_texture_parameters, spv::ImageOperandsMaskNone), + type_uint, 0); + } + } + + // Pick the needed 32bpp half of the 64bpp color. + if (source_is_64bpp && !dest_is_64bpp) { + uint32_t source_color_half_component_count = + source_color_format_component_count >> 1; + assert_true(source_color_half != spv::NoResult); + spv::Id source_color_is_second_half = + builder.createBinOp(spv::OpINotEqual, type_bool, source_color_half, + builder.makeUintConstant(0)); + if (mode.output == TransferOutput::kStencilBit) { + source_color[0][0] = builder.createTriOp( + spv::OpSelect, source_color_component_type, + source_color_is_second_half, + source_color[0][source_color_half_component_count], + source_color[0][0]); + } else { + for (uint32_t i = 0; i < source_color_half_component_count; ++i) { + source_color[0][i] = builder.createTriOp( + spv::OpSelect, source_color_component_type, + source_color_is_second_half, + source_color[0][source_color_half_component_count + i], + source_color[0][i]); + } + } + } + + if (output_fragment_stencil_ref != spv::NoResult && + source_stencil[0] != spv::NoResult) { + // For the depth -> depth case, write the stencil directly to the output. + assert_true(mode.output == TransferOutput::kDepth); + builder.createStore(source_stencil[0], output_fragment_stencil_ref); + } + + if (dest_is_64bpp) { + // Construct the 64bpp color from two 32-bit samples or one 64-bit sample. + // If `packed` (two uints) are created, use the generic path involving + // unpacking. + // Otherwise, the fragment data output must be written to directly by the + // reached control flow path. + spv::Id packed[2] = {}; + if (source_is_color) { + switch (source_color_format) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale = builder.makeFloatConstant(255.0f); + spv::Id component_width = builder.makeUintConstant(8); + for (uint32_t i = 0; i < 2; ++i) { + packed[i] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[i][0], unorm_scale), + unorm_round_offset)); + for (uint32_t j = 1; j < 4; ++j) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[i]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[i][j], unorm_scale), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(8 * j)); + id_vector_temp.push_back(component_width); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale_rgb = builder.makeFloatConstant(1023.0f); + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id unorm_scale_a = builder.makeFloatConstant(3.0f); + spv::Id width_a = builder.makeUintConstant(2); + for (uint32_t i = 0; i < 2; ++i) { + packed[i] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[i][0], unorm_scale_rgb), + unorm_round_offset)); + for (uint32_t j = 1; j < 4; ++j) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[i]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, source_color[i][j], + j == 3 ? unorm_scale_a : unorm_scale_rgb), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(10 * j)); + id_vector_temp.push_back(j == 3 ? width_a : width_rgb); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id float_0 = builder.makeFloatConstant(0.0f); + spv::Id float_1 = builder.makeFloatConstant(1.0f); + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale_a = builder.makeFloatConstant(3.0f); + spv::Id offset_a = builder.makeUintConstant(30); + spv::Id width_a = builder.makeUintConstant(2); + for (uint32_t i = 0; i < 2; ++i) { + // Float16 has a wider range for both color and alpha, also NaNs - + // clamp and convert. + packed[i] = SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, source_color[i][0], ext_inst_glsl_std_450); + for (uint32_t j = 1; j < 3; ++j) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[i]); + id_vector_temp.push_back( + SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, source_color[i][j], ext_inst_glsl_std_450)); + id_vector_temp.push_back(builder.makeUintConstant(10 * j)); + id_vector_temp.push_back(width_rgb); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + // Saturate and convert the alpha. + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back(source_color[i][3]); + id_vector_temp.push_back(float_0); + id_vector_temp.push_back(float_1); + spv::Id alpha_saturated = + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450NClamp, id_vector_temp); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[i]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + alpha_saturated, unorm_scale_a), + unorm_round_offset))); + id_vector_temp.push_back(offset_a); + id_vector_temp.push_back(width_a); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + // All 64bpp formats, and all 16 bits per component formats, are + // represented as integers in ownership transfer for safe handling of + // NaN encodings and -32768 / -32767. + // TODO(Triang3l): Handle the case when that's not true (no multisampled + // sampled images, no 16-bit UNORM, no cross-packing 32bpp aliasing on a + // portability subset device or a 64bpp format where that wouldn't help + // anyway). + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: { + if (dest_color_format == + xenos::ColorRenderTargetFormat::k_32_32_FLOAT) { + spv::Id component_offset_width = builder.makeUintConstant(16); + spv::Id color_16_in_32[2]; + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_color[i][0]); + id_vector_temp.push_back(source_color[i][1]); + id_vector_temp.push_back(component_offset_width); + id_vector_temp.push_back(component_offset_width); + color_16_in_32[i] = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(color_16_in_32[0]); + id_vector_temp.push_back(color_16_in_32[1]); + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[i >> 1][i & 1]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } + } break; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { + if (dest_color_format == + xenos::ColorRenderTargetFormat::k_32_32_FLOAT) { + spv::Id component_offset_width = builder.makeUintConstant(16); + spv::Id color_16_in_32[2]; + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_color[0][i << 1]); + id_vector_temp.push_back(source_color[0][(i << 1) + 1]); + id_vector_temp.push_back(component_offset_width); + id_vector_temp.push_back(component_offset_width); + color_16_in_32[i] = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(color_16_in_32[0]); + id_vector_temp.push_back(color_16_in_32[1]); + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } + } break; + // Float32 is transferred as uint32 to preserve NaN encodings. However, + // multisampled sampled image support is optional in Vulkan. + case xenos::ColorRenderTargetFormat::k_32_FLOAT: { + for (uint32_t i = 0; i < 2; ++i) { + packed[i] = source_color[i][0]; + if (!source_color_is_uint) { + packed[i] = + builder.createUnaryOp(spv::OpBitcast, type_uint, packed[i]); + } + } + } break; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { + for (uint32_t i = 0; i < 2; ++i) { + packed[i] = source_color[0][i]; + if (!source_color_is_uint) { + packed[i] = + builder.createUnaryOp(spv::OpBitcast, type_uint, packed[i]); + } + } + } break; + } + } else { + assert_true(source_depth_texture != spv::NoResult); + assert_true(source_stencil_texture != spv::NoResult); + spv::Id depth_offset = builder.makeUintConstant(8); + spv::Id depth_width = builder.makeUintConstant(24); + for (uint32_t i = 0; i < 2; ++i) { + spv::Id depth24 = spv::NoResult; + switch (source_depth_format) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Round to the nearest even integer. This seems to be the + // correct conversion, adding +0.5 and rounding towards zero results + // in red instead of black in the 4D5307E6 clear shader. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, source_depth_float[i], + builder.makeFloatConstant(float(0xFFFFFF)))); + depth24 = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450RoundEven, id_vector_temp)); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( + builder, source_depth_float[i], depth_float24_round(), true, + ext_inst_glsl_std_450); + } break; + } + // Merge depth and stencil. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_stencil[i]); + id_vector_temp.push_back(depth24); + id_vector_temp.push_back(depth_offset); + id_vector_temp.push_back(depth_width); + packed[i] = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } + // Common path unless there was a specialized one - unpack two packed 32-bit + // parts. + if (packed[0] != spv::NoResult) { + assert_true(packed[1] != spv::NoResult); + if (dest_color_format == xenos::ColorRenderTargetFormat::k_32_32_FLOAT) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(packed[1]); + // Multisampled sampled images are optional in Vulkan, and image views + // of different formats can't be created separately for sampled image + // and color attachment usages, so no multisampled integer sampled image + // support implies no multisampled integer framebuffer attachment + // support in Xenia. + if (!dest_color_is_uint) { + for (spv::Id& float32 : id_vector_temp) { + float32 = + builder.createUnaryOp(spv::OpBitcast, type_float, float32); + } + } + builder.createStore(builder.createCompositeConstruct(type_fragment_data, + id_vector_temp), + output_fragment_data); + } else { + spv::Id const_uint_0 = builder.makeUintConstant(0); + spv::Id const_uint_16 = builder.makeUintConstant(16); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, packed[i >> 1], + (i & 1) ? const_uint_16 : const_uint_0, const_uint_16)); + } + // TODO(Triang3l): Handle the case when that's not true (no multisampled + // sampled images, no 16-bit UNORM, no cross-packing 32bpp aliasing on a + // portability subset device or a 64bpp format where that wouldn't help + // anyway). + builder.createStore(builder.createCompositeConstruct(type_fragment_data, + id_vector_temp), + output_fragment_data); + } + } + } else { + // If `packed` is created, use the generic path involving unpacking. + // - For a color destination, the packed 32bpp color. + // - For a depth / stencil destination, stencil in 0:7, depth in 8:31 + // normally, or depth in 0:23 and zeros in 24:31 with packed_only_depth. + // - For a stencil bit, stencil in 0:7. + // Otherwise, the fragment data or fragment depth / stencil output must be + // written to directly by the reached control flow path. + spv::Id packed = spv::NoResult; + bool packed_only_depth = false; + if (source_is_color) { + switch (source_color_format) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + if (dest_is_color && + (dest_color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8 || + dest_color_format == + xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) { + // Same format - passthrough. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale = builder.makeFloatConstant(255.0f); + uint32_t packed_component_offset = 0; + if (mode.output == TransferOutput::kDepth) { + // When need only depth, not stencil, skip the red component, and + // put the depth from GBA directly in the lower bits. + packed_component_offset = 1; + packed_only_depth = true; + if (output_fragment_stencil_ref != spv::NoResult) { + builder.createStore( + builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[0][0], + unorm_scale), + unorm_round_offset)), + output_fragment_stencil_ref); + } + } + packed = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, + source_color[0][packed_component_offset], unorm_scale), + unorm_round_offset)); + if (mode.output != TransferOutput::kStencilBit) { + spv::Id component_width = builder.makeUintConstant(8); + for (uint32_t i = 1; i < 4 - packed_component_offset; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, + source_color[0][packed_component_offset + i], + unorm_scale), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(8 * i)); + id_vector_temp.push_back(component_width); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + if (dest_is_color && + (dest_color_format == + xenos::ColorRenderTargetFormat::k_2_10_10_10 || + dest_color_format == xenos::ColorRenderTargetFormat:: + k_2_10_10_10_AS_10_10_10_10)) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale_rgb = builder.makeFloatConstant(1023.0f); + packed = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[0][0], unorm_scale_rgb), + unorm_round_offset)); + if (mode.output != TransferOutput::kStencilBit) { + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id unorm_scale_a = builder.makeFloatConstant(3.0f); + spv::Id width_a = builder.makeUintConstant(2); + for (uint32_t i = 1; i < 4; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, source_color[0][i], + i == 3 ? unorm_scale_a : unorm_scale_rgb), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(10 * i)); + id_vector_temp.push_back(i == 3 ? width_a : width_rgb); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { + if (dest_is_color && + (dest_color_format == + xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT || + dest_color_format == xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16)) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + // Float16 has a wider range for both color and alpha, also NaNs - + // clamp and convert. + packed = SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, source_color[0][0], ext_inst_glsl_std_450); + if (mode.output != TransferOutput::kStencilBit) { + spv::Id width_rgb = builder.makeUintConstant(10); + for (uint32_t i = 1; i < 3; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back( + SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, source_color[0][i], ext_inst_glsl_std_450)); + id_vector_temp.push_back(builder.makeUintConstant(10 * i)); + id_vector_temp.push_back(width_rgb); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + // Saturate and convert the alpha. + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back(source_color[0][3]); + id_vector_temp.push_back(builder.makeFloatConstant(0.0f)); + id_vector_temp.push_back(builder.makeFloatConstant(1.0f)); + spv::Id alpha_saturated = + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450NClamp, id_vector_temp); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + alpha_saturated, + builder.makeFloatConstant(3.0f)), + builder.makeFloatConstant(0.5f)))); + id_vector_temp.push_back(builder.makeUintConstant(30)); + id_vector_temp.push_back(builder.makeUintConstant(2)); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } break; + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { + // All 64bpp formats, and all 16 bits per component formats, are + // represented as integers in ownership transfer for safe handling of + // NaN encodings and -32768 / -32767. + // TODO(Triang3l): Handle the case when that's not true (no + // multisampled sampled images, no 16-bit UNORM, no cross-packing + // 32bpp aliasing on a portability subset device or a 64bpp format + // where that wouldn't help anyway). + if (dest_is_color && + (dest_color_format == xenos::ColorRenderTargetFormat::k_16_16 || + dest_color_format == + xenos::ColorRenderTargetFormat::k_16_16_FLOAT)) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + packed = source_color[0][0]; + if (mode.output != TransferOutput::kStencilBit) { + spv::Id component_offset_width = builder.makeUintConstant(16); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(source_color[0][1]); + id_vector_temp.push_back(component_offset_width); + id_vector_temp.push_back(component_offset_width); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } break; + // Float32 is transferred as uint32 to preserve NaN encodings. However, + // multisampled sampled image support is optional in Vulkan. + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { + packed = source_color[0][0]; + if (!source_color_is_uint) { + packed = builder.createUnaryOp(spv::OpBitcast, type_uint, packed); + } + } break; + } + } else if (source_depth_float[0] != spv::NoResult) { + if (mode.output == TransferOutput::kDepth && + dest_depth_format == source_depth_format) { + builder.createStore(source_depth_float[0], output_fragment_depth); + } else { + switch (source_depth_format) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Round to the nearest even integer. This seems to be the correct + // conversion, adding +0.5 and rounding towards zero results in red + // instead of black in the 4D5307E6 clear shader. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, source_depth_float[0], + builder.makeFloatConstant(float(0xFFFFFF)))); + packed = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450RoundEven, id_vector_temp)); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + packed = SpirvShaderTranslator::PreClampedDepthTo20e4( + builder, source_depth_float[0], depth_float24_round(), true, + ext_inst_glsl_std_450); + } break; + } + if (mode.output == TransferOutput::kDepth) { + packed_only_depth = true; + } else { + // Merge depth and stencil. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_stencil[0]); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(builder.makeUintConstant(8)); + id_vector_temp.push_back(builder.makeUintConstant(24)); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } + switch (mode.output) { + case TransferOutput::kColor: { + // Unless a special path was taken, unpack the raw 32bpp value into the + // 32bpp color output. + if (packed != spv::NoResult) { + switch (dest_color_format) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + spv::Id component_width = builder.makeUintConstant(8); + spv::Id unorm_scale = builder.makeFloatConstant(1.0f / 255.0f); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, + builder.createUnaryOp( + spv::OpConvertUToF, type_float, + builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, packed, + builder.makeUintConstant(8 * i), component_width)), + unorm_scale)); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id unorm_scale_rgb = + builder.makeFloatConstant(1.0f / 1023.0f); + spv::Id width_a = builder.makeUintConstant(2); + spv::Id unorm_scale_a = builder.makeFloatConstant(1.0f / 3.0f); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, + builder.createUnaryOp( + spv::OpConvertUToF, type_float, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, + packed, + builder.makeUintConstant(10 * i), + i == 3 ? width_a : width_rgb)), + i == 3 ? unorm_scale_a : unorm_scale_rgb)); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + // Color. + spv::Id width_rgb = builder.makeUintConstant(10); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp.push_back(SpirvShaderTranslator::Float7e3To32( + builder, packed, 10 * i, false, ext_inst_glsl_std_450)); + } + // Alpha. + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, + builder.createUnaryOp( + spv::OpConvertUToF, type_float, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, + packed, builder.makeUintConstant(30), + builder.makeUintConstant(2))), + builder.makeFloatConstant(1.0f / 3.0f))); + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } break; + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: { + // All 16 bits per component formats are represented as integers + // in ownership transfer for safe handling of NaN encodings and + // -32768 / -32767. + // TODO(Triang3l): Handle the case when that's not true (no + // multisampled sampled images, no 16-bit UNORM, no cross-packing + // 32bpp aliasing on a portability subset device or a 64bpp format + // where that wouldn't help anyway). + spv::Id component_offset_width = builder.makeUintConstant(16); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp.push_back(builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, packed, + i ? component_offset_width : builder.makeUintConstant(0), + component_offset_width)); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } break; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: { + // Float32 is transferred as uint32 to preserve NaN encodings. + // However, multisampled sampled images are optional in Vulkan, + // and image views of different formats can't be created + // separately for sampled image and color attachment usages, so no + // multisampled integer sampled image support implies no + // multisampled integer framebuffer attachment support in Xenia. + spv::Id float32 = packed; + if (!dest_color_is_uint) { + float32 = + builder.createUnaryOp(spv::OpBitcast, type_float, float32); + } + builder.createStore(float32, output_fragment_data); + } break; + default: + // A 64bpp format (handled separately) or an invalid one. + assert_unhandled_case(dest_color_format); + } + } + } break; + case TransferOutput::kDepth: { + if (packed) { + spv::Id guest_depth24 = packed; + if (!packed_only_depth) { + // Extract the depth bits. + guest_depth24 = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + guest_depth24, builder.makeUintConstant(8)); + } + // Load the host float32 depth, check if, when converted to the guest + // format, it's the same as the guest source, thus up to date, and if + // it is, write host float32 depth, otherwise do the guest -> host + // conversion. + spv::Id host_depth32 = spv::NoResult; + if (host_depth_source_texture != spv::NoResult) { + // Convert position and sample index from within the destination + // tile to within the host depth source tile, like for the guest + // render target, but for 32bpp -> 32bpp only. + spv::Id host_depth_source_sample_id = dest_sample_id; + spv::Id host_depth_source_tile_pixel_x = dest_tile_pixel_x; + spv::Id host_depth_source_tile_pixel_y = dest_tile_pixel_y; + if (key.host_depth_source_msaa_samples != key.dest_msaa_samples) { + if (key.host_depth_source_msaa_samples >= + xenos::MsaaSamples::k4X) { + // 4x -> 1x/2x. + if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { + // 4x -> 2x. + // Horizontal pixels to samples. Vertical sample (1/0 in the + // first bit for native 2x or 0/1 in the second bit for 2x as + // 4x) to second sample bit. + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + host_depth_source_tile_pixel_x = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + } else { + // 4x -> 1x. + // Pixels to samples. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseAnd, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + host_depth_source_tile_pixel_x = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + host_depth_source_tile_pixel_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + } + } else { + // 1x/2x -> 1x/2x/4x (as long as they're different). + // Only the X part - Y is handled by common code. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // Horizontal samples to pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_tile_pixel_x = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } + // Host depth source Y and sample index for 1x/2x AA sources. + if (key.host_depth_source_msaa_samples < + xenos::MsaaSamples::k4X) { + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 1x/2x -> 4x. + if (key.host_depth_source_msaa_samples == + xenos::MsaaSamples::k2X) { + // 2x -> 4x. + // Vertical samples (second bit) of 4x destination to + // vertical sample (1, 0 for native 2x, or 0, 3 for 2x as + // 4x) of 2x source. + host_depth_source_sample_id = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_sample_id, + builder.makeUintConstant(1)); + if (msaa_2x_attachments_supported_) { + host_depth_source_sample_id = + builder.createBinOp(spv::OpBitwiseXor, type_uint, + host_depth_source_sample_id, + builder.makeUintConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(host_depth_source_sample_id); + id_vector_temp.push_back(host_depth_source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } else { + // 1x -> 4x. + // Vertical samples (second bit) to Y pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_tile_pixel_y = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } else { + // 1x/2x -> different 1x/2x. + if (key.host_depth_source_msaa_samples == + xenos::MsaaSamples::k2X) { + // 2x -> 1x. + // Vertical pixels of 2x destination to vertical samples (1, + // 0 for native 2x, or 0, 3 for 2x as 4x) of 1x source. + host_depth_source_sample_id = builder.createBinOp( + spv::OpBitwiseAnd, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + if (msaa_2x_attachments_supported_) { + host_depth_source_sample_id = + builder.createBinOp(spv::OpBitwiseXor, type_uint, + host_depth_source_sample_id, + builder.makeUintConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(host_depth_source_sample_id); + id_vector_temp.push_back(host_depth_source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + host_depth_source_tile_pixel_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + } else { + // 1x -> 2x. + // Vertical samples (1/0 in the first bit for native 2x or + // 0/1 in the second bit for 2x as 4x) of 2x destination to + // vertical pixels of 1x source. + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_tile_pixel_y = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_tile_pixel_y = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } + } + } + } + assert_true(push_constants_member_host_depth_address != UINT32_MAX); + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeIntConstant( + int32_t(push_constants_member_host_depth_address))); + spv::Id host_depth_address_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, + push_constants, id_vector_temp), + spv::NoPrecision); + // Transform the destination tile index into the host depth source. + spv::Id host_depth_source_tile_index = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createBinOp( + spv::OpIAdd, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, + dest_tile_index), + builder.createTriOp( + spv::OpBitFieldSExtract, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, + host_depth_address_constant), + builder.makeUintConstant(xenos::kEdramPitchTilesBits * + 2), + builder.makeUintConstant(xenos::kEdramBaseTilesBits)))); + // Split the host depth source tile index into X and Y tile index + // within the source image. + spv::Id host_depth_source_pitch_tiles = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, host_depth_address_constant, + builder.makeUintConstant(xenos::kEdramPitchTilesBits), + builder.makeUintConstant(xenos::kEdramPitchTilesBits)); + spv::Id host_depth_source_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, host_depth_source_tile_index, + host_depth_source_pitch_tiles); + spv::Id host_depth_source_tile_index_x = builder.createBinOp( + spv::OpUMod, type_uint, host_depth_source_tile_index, + host_depth_source_pitch_tiles); + // Finally calculate the host depth source texture coordinates. + spv::Id host_depth_source_pixel_x_int = builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, + builder.makeUintConstant( + tile_width_samples >> + uint32_t(key.source_msaa_samples >= + xenos::MsaaSamples::k4X)), + host_depth_source_tile_index_x), + host_depth_source_tile_pixel_x)); + spv::Id host_depth_source_pixel_y_int = builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, + builder.makeUintConstant( + tile_height_samples >> + uint32_t(key.source_msaa_samples >= + xenos::MsaaSamples::k2X)), + host_depth_source_tile_index_y), + host_depth_source_tile_pixel_y)); + // Load the host depth source. + spv::Builder::TextureParameters + host_depth_source_texture_parameters = {}; + host_depth_source_texture_parameters.sampler = + builder.createLoad(host_depth_source_texture, spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(host_depth_source_pixel_x_int); + id_vector_temp.push_back(host_depth_source_pixel_y_int); + host_depth_source_texture_parameters.coords = + builder.createCompositeConstruct(type_int2, id_vector_temp); + if (key.host_depth_source_msaa_samples != xenos::MsaaSamples::k1X) { + host_depth_source_texture_parameters.sample = + builder.createUnaryOp(spv::OpBitcast, type_int, + host_depth_source_sample_id); + } else { + host_depth_source_texture_parameters.lod = + builder.makeIntConstant(0); + } + host_depth32 = builder.createCompositeExtract( + builder.createTextureCall(spv::NoPrecision, type_float4, false, + true, false, false, false, + host_depth_source_texture_parameters, + spv::ImageOperandsMaskNone), + type_float, 0); + } else if (host_depth_source_buffer != spv::NoResult) { + // Get the address in the EDRAM scratch buffer and load from there. + // The beginning of the buffer is (0, 0) of the destination. + // 40-sample columns are not swapped for addressing simplicity + // (because this is used for depth -> depth transfers, where + // swapping isn't needed). + // Convert samples to pixels. + assert_true(key.host_depth_source_msaa_samples == + xenos::MsaaSamples::k1X); + spv::Id dest_tile_sample_x = dest_tile_pixel_x; + spv::Id dest_tile_sample_y = dest_tile_pixel_y; + if (key.dest_msaa_samples >= xenos::MsaaSamples::k2X) { + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // Horizontal sample index in bit 0. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + dest_tile_sample_x = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + // Vertical sample index as 1 or 0 in bit 0 for true 2x or as 0 + // or 1 in bit 1 for 4x or for 2x emulated as 4x. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + (key.dest_msaa_samples == xenos::MsaaSamples::k2X && + msaa_2x_attachments_supported_) + ? spv::OpBitwiseXor + : spv::OpShiftRightLogical, + type_uint, dest_sample_id, builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + dest_tile_sample_y = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + // Combine the tile sample index and the tile index. + spv::Id host_depth_offset = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.makeUintConstant(tile_width_samples * + tile_height_samples), + dest_tile_index), + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.makeUintConstant(tile_width_samples), + dest_tile_sample_y), + dest_tile_sample_x)); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + // The only SSBO structure member. + id_vector_temp.push_back(builder.makeIntConstant(0)); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpBitcast, type_int, host_depth_offset)); + // StorageBuffer since SPIR-V 1.3, but since SPIR-V 1.0 is + // generated, it's Uniform. + host_depth32 = builder.createUnaryOp( + spv::OpBitcast, type_float, + builder.createLoad( + builder.createAccessChain(spv::StorageClassUniform, + host_depth_source_buffer, + id_vector_temp), + spv::NoPrecision)); + } + spv::Block* depth24_to_depth32_header = builder.getBuildPoint(); + spv::Id depth24_to_depth32_convert_id = spv::NoResult; + spv::Block* depth24_to_depth32_merge = nullptr; + spv::Id host_depth24 = spv::NoResult; + if (host_depth32 != spv::NoResult) { + // Convert the host depth value to the guest format and check if it + // matches the value in the currently owning guest render target. + switch (dest_depth_format) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Round to the nearest even integer. This seems to be the + // correct conversion, adding +0.5 and rounding towards zero + // results in red instead of black in the 4D5307E6 clear shader. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, host_depth32, + builder.makeFloatConstant(float(0xFFFFFF)))); + host_depth24 = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450RoundEven, + id_vector_temp)); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + host_depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( + builder, host_depth32, depth_float24_round(), true, + ext_inst_glsl_std_450); + } break; + } + assert_true(host_depth24 != spv::NoResult); + // Update the header block pointer after the conversion (to avoid + // assuming that the conversion doesn't branch). + depth24_to_depth32_header = builder.getBuildPoint(); + spv::Id host_depth_outdated = builder.createBinOp( + spv::OpINotEqual, type_bool, guest_depth24, host_depth24); + spv::Block& depth24_to_depth32_convert_entry = + builder.makeNewBlock(); + { + spv::Block& depth24_to_depth32_merge_block = + builder.makeNewBlock(); + depth24_to_depth32_merge = &depth24_to_depth32_merge_block; + } + { + std::unique_ptr depth24_to_depth32_merge_op = + std::make_unique(spv::OpSelectionMerge); + depth24_to_depth32_merge_op->addIdOperand( + depth24_to_depth32_merge->getId()); + depth24_to_depth32_merge_op->addImmediateOperand( + spv::SelectionControlMaskNone); + builder.getBuildPoint()->addInstruction( + std::move(depth24_to_depth32_merge_op)); + } + builder.createConditionalBranch(host_depth_outdated, + &depth24_to_depth32_convert_entry, + depth24_to_depth32_merge); + builder.setBuildPoint(&depth24_to_depth32_convert_entry); + } + // Convert the guest 24-bit depth to float32 (in an open conditional + // if the host depth is also loaded). + spv::Id guest_depth32 = spv::NoResult; + switch (dest_depth_format) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Multiplying by 1.0 / 0xFFFFFF produces an incorrect result (for + // 0xC00000, for instance - which is 2_10_10_10 clear to 0001) - + // rescale from 0...0xFFFFFF to 0...0x1000000 doing what true + // float division followed by multiplication does (on x86-64 MSVC + // with default SSE rounding) - values starting from 0x800000 + // become bigger by 1; then accurately bias the result's exponent. + guest_depth32 = builder.createBinOp( + spv::OpFMul, type_float, + builder.createUnaryOp( + spv::OpConvertUToF, type_float, + builder.createBinOp( + spv::OpIAdd, type_uint, guest_depth24, + builder.createBinOp(spv::OpShiftRightLogical, + type_uint, guest_depth24, + builder.makeUintConstant(23)))), + builder.makeFloatConstant(1.0f / float(1 << 24))); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + guest_depth32 = SpirvShaderTranslator::Depth20e4To32( + builder, guest_depth24, 0, true, false, + ext_inst_glsl_std_450); + } break; + } + assert_true(guest_depth32 != spv::NoResult); + spv::Id fragment_depth32 = guest_depth32; + if (host_depth32 != spv::NoResult) { + assert_not_null(depth24_to_depth32_merge); + spv::Id depth24_to_depth32_result_block_id = + builder.getBuildPoint()->getId(); + builder.createBranch(depth24_to_depth32_merge); + builder.setBuildPoint(depth24_to_depth32_merge); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(guest_depth32); + id_vector_temp.push_back(depth24_to_depth32_result_block_id); + id_vector_temp.push_back(host_depth32); + id_vector_temp.push_back(depth24_to_depth32_header->getId()); + fragment_depth32 = + builder.createOp(spv::OpPhi, type_float, id_vector_temp); + } + builder.createStore(fragment_depth32, output_fragment_depth); + } + } break; + case TransferOutput::kStencilBit: { + if (packed) { + // Kill the sample if the needed stencil bit is not set. + assert_true(push_constants_member_stencil_mask != UINT32_MAX); + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeIntConstant( + int32_t(push_constants_member_stencil_mask))); + spv::Id stencil_mask_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, + push_constants, id_vector_temp), + spv::NoPrecision); + spv::Id stencil_sample_passed = builder.createBinOp( + spv::OpINotEqual, type_bool, + builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed, + stencil_mask_constant), + builder.makeUintConstant(0)); + spv::Block& stencil_bit_kill_block = builder.makeNewBlock(); + spv::Block& stencil_bit_merge_block = builder.makeNewBlock(); + { + std::unique_ptr stencil_bit_merge_op = + std::make_unique(spv::OpSelectionMerge); + stencil_bit_merge_op->addIdOperand(stencil_bit_merge_block.getId()); + stencil_bit_merge_op->addImmediateOperand( + spv::SelectionControlMaskNone); + builder.getBuildPoint()->addInstruction( + std::move(stencil_bit_merge_op)); + } + builder.createConditionalBranch(stencil_sample_passed, + &stencil_bit_merge_block, + &stencil_bit_kill_block); + builder.setBuildPoint(&stencil_bit_kill_block); + builder.createNoResultOp(spv::OpKill); + builder.setBuildPoint(&stencil_bit_merge_block); + } + } break; + } + } + + // End the main function and make it the entry point. + builder.leaveFunction(); + builder.addExecutionMode(main_function, spv::ExecutionModeOriginUpperLeft); + if (output_fragment_depth != spv::NoResult) { + builder.addExecutionMode(main_function, spv::ExecutionModeDepthReplacing); + } + if (output_fragment_stencil_ref != spv::NoResult) { + builder.addExecutionMode(main_function, + spv::ExecutionModeStencilRefReplacingEXT); + } + spv::Instruction* entry_point = + builder.addEntryPoint(spv::ExecutionModelFragment, main_function, "main"); + for (spv::Id interface_id : main_interface) { + entry_point->addIdOperand(interface_id); + } + + // Serialize the shader code. + std::vector shader_code; + builder.dump(shader_code); + + // Create the shader module, and store the handle even if creation fails not + // to try to create it again later. + VkShaderModule shader_module = ui::vulkan::util::CreateShaderModule( + provider, reinterpret_cast(shader_code.data()), + sizeof(uint32_t) * shader_code.size()); + if (shader_module == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target ownership " + "transfer shader 0x{:08X}", + key.key); + } + transfer_shaders_.emplace(key, shader_module); + return shader_module; +} + +VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines( + TransferPipelineKey key) { + auto pipeline_it = transfer_pipelines_.find(key); + if (pipeline_it != transfer_pipelines_.end()) { + return pipeline_it->second[0] != VK_NULL_HANDLE ? pipeline_it->second.data() + : nullptr; + } + + VkRenderPass render_pass = GetRenderPass(key.render_pass_key); + VkShaderModule fragment_shader_module = GetTransferShader(key.shader_key); + if (render_pass == VK_NULL_HANDLE || + fragment_shader_module == VK_NULL_HANDLE) { + transfer_pipelines_.emplace(key, std::array{}); + return nullptr; + } + + const TransferModeInfo& mode = kTransferModes[size_t(key.shader_key.mode)]; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + uint32_t dest_sample_count = uint32_t(1) + << uint32_t(key.shader_key.dest_msaa_samples); + bool dest_is_masked_sample = + dest_sample_count > 1 && !device_features.sampleRateShading; + + VkPipelineShaderStageCreateInfo shader_stages[2]; + shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[0].pNext = nullptr; + shader_stages[0].flags = 0; + shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shader_stages[0].module = transfer_passthrough_vertex_shader_; + shader_stages[0].pName = "main"; + shader_stages[0].pSpecializationInfo = nullptr; + shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[1].pNext = nullptr; + shader_stages[1].flags = 0; + shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + shader_stages[1].module = fragment_shader_module; + shader_stages[1].pName = "main"; + shader_stages[1].pSpecializationInfo = nullptr; + VkSpecializationMapEntry sample_id_specialization_map_entry; + uint32_t sample_id_specialization_constant; + VkSpecializationInfo sample_id_specialization_info; + if (dest_is_masked_sample) { + sample_id_specialization_map_entry.constantID = 0; + sample_id_specialization_map_entry.offset = 0; + sample_id_specialization_map_entry.size = sizeof(uint32_t); + sample_id_specialization_constant = 0; + sample_id_specialization_info.mapEntryCount = 1; + sample_id_specialization_info.pMapEntries = + &sample_id_specialization_map_entry; + sample_id_specialization_info.dataSize = + sizeof(sample_id_specialization_constant); + sample_id_specialization_info.pData = &sample_id_specialization_constant; + shader_stages[1].pSpecializationInfo = &sample_id_specialization_info; + } + + VkVertexInputBindingDescription vertex_input_binding; + vertex_input_binding.binding = 0; + vertex_input_binding.stride = sizeof(float) * 2; + vertex_input_binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + VkVertexInputAttributeDescription vertex_input_attribute; + vertex_input_attribute.location = 0; + vertex_input_attribute.binding = 0; + vertex_input_attribute.format = VK_FORMAT_R32G32_SFLOAT; + vertex_input_attribute.offset = 0; + VkPipelineVertexInputStateCreateInfo vertex_input_state; + vertex_input_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_state.pNext = nullptr; + vertex_input_state.flags = 0; + vertex_input_state.vertexBindingDescriptionCount = 1; + vertex_input_state.pVertexBindingDescriptions = &vertex_input_binding; + vertex_input_state.vertexAttributeDescriptionCount = 1; + vertex_input_state.pVertexAttributeDescriptions = &vertex_input_attribute; + + VkPipelineInputAssemblyStateCreateInfo input_assembly_state; + input_assembly_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_state.pNext = nullptr; + input_assembly_state.flags = 0; + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + input_assembly_state.primitiveRestartEnable = VK_FALSE; + + // Dynamic, to stay within maxViewportDimensions while preferring a + // power-of-two factor for converting from pixel coordinates to NDC for exact + // precision. + VkPipelineViewportStateCreateInfo viewport_state; + viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state.pNext = nullptr; + viewport_state.flags = 0; + viewport_state.viewportCount = 1; + viewport_state.pViewports = nullptr; + viewport_state.scissorCount = 1; + viewport_state.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_state = {}; + rasterization_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_state.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_state.cullMode = VK_CULL_MODE_NONE; + rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterization_state.lineWidth = 1.0f; + + // For samples other than the first, will be changed for the pipelines for + // other samples. + VkSampleMask sample_mask = UINT32_MAX; + VkPipelineMultisampleStateCreateInfo multisample_state = {}; + multisample_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample_state.rasterizationSamples = + (dest_sample_count == 2 && !msaa_2x_attachments_supported_) + ? VK_SAMPLE_COUNT_4_BIT + : VkSampleCountFlagBits(dest_sample_count); + if (dest_sample_count > 1) { + if (device_features.sampleRateShading) { + multisample_state.sampleShadingEnable = VK_TRUE; + multisample_state.minSampleShading = 1.0f; + if (dest_sample_count == 2 && !msaa_2x_attachments_supported_) { + // Emulating 2x MSAA as samples 0 and 3 of 4x MSAA when 2x is not + // supported. + sample_mask = 0b1001; + } + } else { + sample_mask = 0b1; + } + if (sample_mask != UINT32_MAX) { + multisample_state.pSampleMask = &sample_mask; + } + } + + // Whether the depth / stencil state is used depends on the presence of a + // depth attachment in the render pass - but not making assumptions about + // whether the render pass contains any specific attachments, so setting up + // valid depth / stencil state unconditionally. + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {}; + depth_stencil_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + if (mode.output == TransferOutput::kDepth) { + depth_stencil_state.depthTestEnable = VK_TRUE; + depth_stencil_state.depthWriteEnable = VK_TRUE; + depth_stencil_state.depthCompareOp = cvars::depth_transfer_not_equal_test + ? VK_COMPARE_OP_NOT_EQUAL + : VK_COMPARE_OP_ALWAYS; + } + if ((mode.output == TransferOutput::kDepth && + provider.device_extensions().ext_shader_stencil_export) || + mode.output == TransferOutput::kStencilBit) { + depth_stencil_state.stencilTestEnable = VK_TRUE; + depth_stencil_state.front.failOp = VK_STENCIL_OP_KEEP; + depth_stencil_state.front.passOp = VK_STENCIL_OP_REPLACE; + depth_stencil_state.front.depthFailOp = VK_STENCIL_OP_REPLACE; + // Using ALWAYS, not NOT_EQUAL, so depth writing is unaffected by stencil + // being different. + depth_stencil_state.front.compareOp = VK_COMPARE_OP_ALWAYS; + // Will be dynamic for stencil bit output. + depth_stencil_state.front.writeMask = UINT8_MAX; + depth_stencil_state.front.reference = UINT8_MAX; + depth_stencil_state.back = depth_stencil_state.front; + } + + // Whether the color blend state is used depends on the presence of color + // attachments in the render pass - but not making assumptions about whether + // the render pass contains any specific attachments, so setting up valid + // color blend state unconditionally. + VkPipelineColorBlendAttachmentState + color_blend_attachments[xenos::kMaxColorRenderTargets] = {}; + VkPipelineColorBlendStateCreateInfo color_blend_state = {}; + color_blend_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_state.attachmentCount = + 32 - xe::lzcnt(key.render_pass_key.depth_and_color_used >> 1); + color_blend_state.pAttachments = color_blend_attachments; + if (mode.output == TransferOutput::kColor) { + if (device_features.independentBlend) { + // State the intention more explicitly. + color_blend_attachments[key.shader_key.dest_color_rt_index] + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + } else { + // The blend state for all attachments must be identical, but other render + // targets are not written to by the shader. + for (uint32_t i = 0; i < color_blend_state.attachmentCount; ++i) { + color_blend_attachments[i].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + } + } + } + + std::array dynamic_states; + VkPipelineDynamicStateCreateInfo dynamic_state; + dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state.pNext = nullptr; + dynamic_state.flags = 0; + dynamic_state.dynamicStateCount = 0; + dynamic_state.pDynamicStates = dynamic_states.data(); + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; + if (mode.output == TransferOutput::kStencilBit) { + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; + } + + std::array pipelines{}; + VkGraphicsPipelineCreateInfo pipeline_create_info; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_create_info.pNext = nullptr; + pipeline_create_info.flags = 0; + if (dest_is_masked_sample) { + pipeline_create_info.flags |= VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT; + } + pipeline_create_info.stageCount = uint32_t(xe::countof(shader_stages)); + pipeline_create_info.pStages = shader_stages; + pipeline_create_info.pVertexInputState = &vertex_input_state; + pipeline_create_info.pInputAssemblyState = &input_assembly_state; + pipeline_create_info.pTessellationState = nullptr; + pipeline_create_info.pViewportState = &viewport_state; + pipeline_create_info.pRasterizationState = &rasterization_state; + pipeline_create_info.pMultisampleState = &multisample_state; + pipeline_create_info.pDepthStencilState = &depth_stencil_state; + pipeline_create_info.pColorBlendState = &color_blend_state; + pipeline_create_info.pDynamicState = &dynamic_state; + pipeline_create_info.layout = + transfer_pipeline_layouts_[size_t(mode.pipeline_layout)]; + pipeline_create_info.renderPass = render_pass; + pipeline_create_info.subpass = 0; + pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_create_info.basePipelineIndex = -1; + if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipelines[0]) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target ownership " + "transfer pipeline for render pass 0x{:08X}, shader 0x{:08X}", + key.render_pass_key.key, key.shader_key.key); + transfer_pipelines_.emplace(key, std::array{}); + return nullptr; + } + if (dest_is_masked_sample) { + assert_true(multisample_state.pSampleMask == &sample_mask); + pipeline_create_info.flags = (pipeline_create_info.flags & + ~VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT) | + VK_PIPELINE_CREATE_DERIVATIVE_BIT; + pipeline_create_info.basePipelineHandle = pipelines[0]; + for (uint32_t i = 1; i < dest_sample_count; ++i) { + // Emulating 2x MSAA as samples 0 and 3 of 4x MSAA when 2x is not + // supported. + uint32_t host_sample_index = + (dest_sample_count == 2 && !msaa_2x_attachments_supported_ && i == 1) + ? 3 + : i; + sample_id_specialization_constant = host_sample_index; + sample_mask = uint32_t(1) << host_sample_index; + if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipelines[i]) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target " + "ownership transfer pipeline for render pass 0x{:08X}, shader " + "0x{:08X}, sample {}", + key.render_pass_key.key, key.shader_key.key, i); + for (uint32_t j = 0; j < i; ++j) { + dfn.vkDestroyPipeline(device, pipelines[j], nullptr); + } + transfer_pipelines_.emplace(key, std::array{}); + return nullptr; + } + } + } + return transfer_pipelines_.emplace(key, pipelines).first->second.data(); +} + +void VulkanRenderTargetCache::PerformTransfersAndResolveClears( + uint32_t render_target_count, RenderTarget* const* render_targets, + const std::vector* render_target_transfers, + const uint64_t* render_target_resolve_clear_values, + const Transfer::Rectangle* resolve_clear_rectangle) { + assert_true(GetPath() == Path::kHostRenderTargets); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + bool shader_stencil_export = + provider.device_extensions().ext_shader_stencil_export; + uint64_t current_submission = command_processor_.GetCurrentSubmission(); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + bool resolve_clear_needed = + render_target_resolve_clear_values && resolve_clear_rectangle; + VkClearRect resolve_clear_rect; + if (resolve_clear_needed) { + // Assuming the rectangle is already clamped by the setup function from the + // common render target cache. + resolve_clear_rect.rect.offset.x = + int32_t(resolve_clear_rectangle->x_pixels * draw_resolution_scale_x()); + resolve_clear_rect.rect.offset.y = + int32_t(resolve_clear_rectangle->y_pixels * draw_resolution_scale_y()); + resolve_clear_rect.rect.extent.width = + resolve_clear_rectangle->width_pixels * draw_resolution_scale_x(); + resolve_clear_rect.rect.extent.height = + resolve_clear_rectangle->height_pixels * draw_resolution_scale_y(); + resolve_clear_rect.baseArrayLayer = 0; + resolve_clear_rect.layerCount = 1; + } + + // Do host depth storing for the depth destination (assuming there can be only + // one depth destination) where depth destination == host depth source. + bool host_depth_store_set_up = false; + for (uint32_t i = 0; i < render_target_count; ++i) { + RenderTarget* dest_rt = render_targets[i]; + if (!dest_rt) { + continue; + } + auto& dest_vulkan_rt = *static_cast(dest_rt); + RenderTargetKey dest_rt_key = dest_vulkan_rt.key(); + if (!dest_rt_key.is_depth) { + continue; + } + const std::vector& depth_transfers = render_target_transfers[i]; + for (const Transfer& transfer : depth_transfers) { + if (transfer.host_depth_source != dest_rt) { + continue; + } + if (!host_depth_store_set_up) { + // Pipeline. + command_processor_.BindExternalComputePipeline( + host_depth_store_pipelines_[size_t(dest_rt_key.msaa_samples)]); + // Descriptor set bindings. + VkDescriptorSet host_depth_store_descriptor_sets[] = { + edram_storage_buffer_descriptor_set_, + dest_vulkan_rt.GetDescriptorSetTransferSource(), + }; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, host_depth_store_pipeline_layout_, + 0, uint32_t(xe::countof(host_depth_store_descriptor_sets)), + host_depth_store_descriptor_sets, 0, nullptr); + // Render target constant. + HostDepthStoreRenderTargetConstant + host_depth_store_render_target_constant = + GetHostDepthStoreRenderTargetConstant( + dest_rt_key.pitch_tiles_at_32bpp, + msaa_2x_attachments_supported_); + command_buffer.CmdVkPushConstants( + host_depth_store_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT, + uint32_t(offsetof(HostDepthStoreConstants, render_target)), + sizeof(host_depth_store_render_target_constant), + &host_depth_store_render_target_constant); + // Barriers - don't need to try to combine them with the rest of + // render target transfer barriers now - if this happens, after host + // depth storing, SHADER_READ -> DEPTH_STENCIL_ATTACHMENT_WRITE will be + // done anyway even in the best case, so it's not possible to have all + // the barriers in one place here. + UseEdramBuffer(EdramBufferUsage::kComputeWrite); + // Always transitioning both depth and stencil, not storing separate + // usage flags for depth and stencil. + command_processor_.PushImageMemoryBarrier( + dest_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT), + dest_vulkan_rt.current_stage_mask(), + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + dest_vulkan_rt.current_access_mask(), VK_ACCESS_SHADER_READ_BIT, + dest_vulkan_rt.current_layout(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + dest_vulkan_rt.SetUsage(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + host_depth_store_set_up = true; + } + Transfer::Rectangle + transfer_rectangles[Transfer::kMaxRectanglesWithCutout]; + uint32_t transfer_rectangle_count = transfer.GetRectangles( + dest_rt_key.base_tiles, dest_rt_key.pitch_tiles_at_32bpp, + dest_rt_key.msaa_samples, false, transfer_rectangles, + resolve_clear_rectangle); + assert_not_zero(transfer_rectangle_count); + HostDepthStoreRectangleConstant host_depth_store_rectangle_constant; + for (uint32_t j = 0; j < transfer_rectangle_count; ++j) { + uint32_t group_count_x, group_count_y; + GetHostDepthStoreRectangleInfo( + transfer_rectangles[j], dest_rt_key.msaa_samples, + host_depth_store_rectangle_constant, group_count_x, group_count_y); + command_buffer.CmdVkPushConstants( + host_depth_store_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT, + uint32_t(offsetof(HostDepthStoreConstants, rectangle)), + sizeof(host_depth_store_rectangle_constant), + &host_depth_store_rectangle_constant); + command_processor_.SubmitBarriers(true); + command_buffer.CmdVkDispatch(group_count_x, group_count_y, 1); + MarkEdramBufferModified(); + } + } + break; + } + + constexpr VkPipelineStageFlags kSourceStageMask = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + constexpr VkAccessFlags kSourceAccessMask = VK_ACCESS_SHADER_READ_BIT; + constexpr VkImageLayout kSourceLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + // Try to insert as many barriers as possible in one place, hoping that in the + // best case (no cross-copying between current render targets), barriers will + // need to be only inserted here, not between transfers. In case of + // cross-copying, if the destination use is going to happen before the source + // use, choose the destination state, otherwise the source state - to match + // the order in which transfers will actually happen (otherwise there will be + // just a useless switch back and forth). + for (uint32_t i = 0; i < render_target_count; ++i) { + RenderTarget* dest_rt = render_targets[i]; + if (!dest_rt) { + continue; + } + const std::vector& dest_transfers = render_target_transfers[i]; + if (!resolve_clear_needed && dest_transfers.empty()) { + continue; + } + // Transition the destination, only if not going to be used as a source + // earlier. + bool dest_used_previously_as_source = false; + for (uint32_t j = 0; j < i; ++j) { + for (const Transfer& previous_transfer : render_target_transfers[j]) { + if (previous_transfer.source == dest_rt || + previous_transfer.host_depth_source == dest_rt) { + dest_used_previously_as_source = true; + break; + } + } + } + if (!dest_used_previously_as_source) { + auto& dest_vulkan_rt = *static_cast(dest_rt); + VkPipelineStageFlags dest_dst_stage_mask; + VkAccessFlags dest_dst_access_mask; + VkImageLayout dest_new_layout; + dest_vulkan_rt.GetDrawUsage(&dest_dst_stage_mask, &dest_dst_access_mask, + &dest_new_layout); + command_processor_.PushImageMemoryBarrier( + dest_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + dest_vulkan_rt.key().is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + dest_vulkan_rt.current_stage_mask(), dest_dst_stage_mask, + dest_vulkan_rt.current_access_mask(), dest_dst_access_mask, + dest_vulkan_rt.current_layout(), dest_new_layout); + dest_vulkan_rt.SetUsage(dest_dst_stage_mask, dest_dst_access_mask, + dest_new_layout); + } + // Transition the sources, only if not going to be used as destinations + // earlier. + for (const Transfer& transfer : dest_transfers) { + bool source_previously_used_as_dest = false; + bool host_depth_source_previously_used_as_dest = false; + for (uint32_t j = 0; j < i; ++j) { + if (render_target_transfers[j].empty()) { + continue; + } + const RenderTarget* previous_rt = render_targets[j]; + if (transfer.source == previous_rt) { + source_previously_used_as_dest = true; + } + if (transfer.host_depth_source == previous_rt) { + host_depth_source_previously_used_as_dest = true; + } + } + if (!source_previously_used_as_dest) { + auto& source_vulkan_rt = + *static_cast(transfer.source); + command_processor_.PushImageMemoryBarrier( + source_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + source_vulkan_rt.key().is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + source_vulkan_rt.current_stage_mask(), kSourceStageMask, + source_vulkan_rt.current_access_mask(), kSourceAccessMask, + source_vulkan_rt.current_layout(), kSourceLayout); + source_vulkan_rt.SetUsage(kSourceStageMask, kSourceAccessMask, + kSourceLayout); + } + // transfer.host_depth_source == dest_rt means the EDRAM buffer will be + // used instead, no need to transition. + if (transfer.host_depth_source && transfer.host_depth_source != dest_rt && + !host_depth_source_previously_used_as_dest) { + auto& host_depth_source_vulkan_rt = + *static_cast(transfer.host_depth_source); + command_processor_.PushImageMemoryBarrier( + host_depth_source_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT), + host_depth_source_vulkan_rt.current_stage_mask(), kSourceStageMask, + host_depth_source_vulkan_rt.current_access_mask(), + kSourceAccessMask, host_depth_source_vulkan_rt.current_layout(), + kSourceLayout); + host_depth_source_vulkan_rt.SetUsage(kSourceStageMask, + kSourceAccessMask, kSourceLayout); + } + } + } + if (host_depth_store_set_up) { + // Will be reading copied host depth from the EDRAM buffer. + UseEdramBuffer(EdramBufferUsage::kFragmentRead); + } + + // Perform the transfers and clears. + + TransferPipelineLayoutIndex last_transfer_pipeline_layout_index = + TransferPipelineLayoutIndex::kCount; + uint32_t transfer_descriptor_sets_bound = 0; + uint32_t transfer_push_constants_set = 0; + VkDescriptorSet last_descriptor_set_host_depth_stencil_textures = + VK_NULL_HANDLE; + VkDescriptorSet last_descriptor_set_depth_stencil_textures = VK_NULL_HANDLE; + VkDescriptorSet last_descriptor_set_color_texture = VK_NULL_HANDLE; + TransferAddressConstant last_host_depth_address_constant; + TransferAddressConstant last_address_constant; + + for (uint32_t i = 0; i < render_target_count; ++i) { + RenderTarget* dest_rt = render_targets[i]; + if (!dest_rt) { + continue; + } + + const std::vector& current_transfers = render_target_transfers[i]; + if (current_transfers.empty() && !resolve_clear_needed) { + continue; + } + + auto& dest_vulkan_rt = *static_cast(dest_rt); + RenderTargetKey dest_rt_key = dest_vulkan_rt.key(); + + // Late barriers in case there was cross-copying that prevented merging of + // barriers. + { + VkPipelineStageFlags dest_dst_stage_mask; + VkAccessFlags dest_dst_access_mask; + VkImageLayout dest_new_layout; + dest_vulkan_rt.GetDrawUsage(&dest_dst_stage_mask, &dest_dst_access_mask, + &dest_new_layout); + command_processor_.PushImageMemoryBarrier( + dest_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + dest_rt_key.is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + dest_vulkan_rt.current_stage_mask(), dest_dst_stage_mask, + dest_vulkan_rt.current_access_mask(), dest_dst_access_mask, + dest_vulkan_rt.current_layout(), dest_new_layout); + dest_vulkan_rt.SetUsage(dest_dst_stage_mask, dest_dst_access_mask, + dest_new_layout); + } + + // Get the objects needed for transfers to the destination. + // TODO(Triang3l): Reuse the guest render pass for transfers where possible + // (if the Vulkan format used for drawing is also usable for transfers - for + // instance, R8G8B8A8_UNORM can be used for both, so the guest pass can be + // reused, but R16G16B16A16_SFLOAT render targets use R16G16B16A16_UINT for + // transfers, so the transfer pass has to be separate) to avoid stores and + // loads on tile-based devices to make this actually applicable. Also + // overall perform all non-cross-copying transfers for the current + // framebuffer configuration in a single pass, to load / store only once. + RenderPassKey transfer_render_pass_key; + transfer_render_pass_key.msaa_samples = dest_rt_key.msaa_samples; + if (dest_rt_key.is_depth) { + transfer_render_pass_key.depth_and_color_used = 0b1; + transfer_render_pass_key.depth_format = dest_rt_key.GetDepthFormat(); + } else { + transfer_render_pass_key.depth_and_color_used = 0b1 << 1; + transfer_render_pass_key.color_0_view_format = + dest_rt_key.GetColorFormat(); + transfer_render_pass_key.color_rts_use_transfer_formats = 1; + } + VkRenderPass transfer_render_pass = GetRenderPass(transfer_render_pass_key); + if (transfer_render_pass == VK_NULL_HANDLE) { + continue; + } + const RenderTarget* + transfer_framebuffer_render_targets[1 + xenos::kMaxColorRenderTargets] = + {}; + transfer_framebuffer_render_targets[dest_rt_key.is_depth ? 0 : 1] = dest_rt; + const Framebuffer* transfer_framebuffer = GetFramebuffer( + transfer_render_pass_key, dest_rt_key.pitch_tiles_at_32bpp, + transfer_framebuffer_render_targets); + if (!transfer_framebuffer) { + continue; + } + // Don't enter the render pass immediately - may still insert source + // barriers later. + + if (!current_transfers.empty()) { + uint32_t dest_pitch_tiles = dest_rt_key.GetPitchTiles(); + bool dest_is_64bpp = dest_rt_key.Is64bpp(); + + // Gather shader keys and sort to reduce pipeline state and binding + // switches. Also gather stencil rectangles to clear if needed. + bool need_stencil_bit_draws = + dest_rt_key.is_depth && !shader_stencil_export; + current_transfer_invocations_.clear(); + current_transfer_invocations_.reserve( + current_transfers.size() << uint32_t(need_stencil_bit_draws)); + uint32_t rt_sort_index = 0; + TransferShaderKey new_transfer_shader_key; + new_transfer_shader_key.dest_msaa_samples = dest_rt_key.msaa_samples; + new_transfer_shader_key.dest_resource_format = + dest_rt_key.resource_format; + uint32_t stencil_clear_rectangle_count = 0; + for (uint32_t j = 0; j <= uint32_t(need_stencil_bit_draws); ++j) { + // j == 0 - color or depth. + // j == 1 - stencil bits. + // Stencil bit writing always requires a different root signature, + // handle these separately. Stencil never has a host depth source. + // Clear previously set sort indices. + for (const Transfer& transfer : current_transfers) { + auto host_depth_source_vulkan_rt = + static_cast(transfer.host_depth_source); + if (host_depth_source_vulkan_rt) { + host_depth_source_vulkan_rt->SetTemporarySortIndex(UINT32_MAX); + } + assert_not_null(transfer.source); + auto& source_vulkan_rt = + *static_cast(transfer.source); + source_vulkan_rt.SetTemporarySortIndex(UINT32_MAX); + } + for (const Transfer& transfer : current_transfers) { + assert_not_null(transfer.source); + auto& source_vulkan_rt = + *static_cast(transfer.source); + VulkanRenderTarget* host_depth_source_vulkan_rt = + j ? nullptr + : static_cast(transfer.host_depth_source); + if (host_depth_source_vulkan_rt && + host_depth_source_vulkan_rt->temporary_sort_index() == + UINT32_MAX) { + host_depth_source_vulkan_rt->SetTemporarySortIndex(rt_sort_index++); + } + if (source_vulkan_rt.temporary_sort_index() == UINT32_MAX) { + source_vulkan_rt.SetTemporarySortIndex(rt_sort_index++); + } + RenderTargetKey source_rt_key = source_vulkan_rt.key(); + new_transfer_shader_key.source_msaa_samples = + source_rt_key.msaa_samples; + new_transfer_shader_key.source_resource_format = + source_rt_key.resource_format; + bool host_depth_source_is_copy = + host_depth_source_vulkan_rt == &dest_vulkan_rt; + // The host depth copy buffer has only raw samples. + new_transfer_shader_key.host_depth_source_msaa_samples = + (host_depth_source_vulkan_rt && !host_depth_source_is_copy) + ? host_depth_source_vulkan_rt->key().msaa_samples + : xenos::MsaaSamples::k1X; + if (j) { + new_transfer_shader_key.mode = + source_rt_key.is_depth ? TransferMode::kDepthToStencilBit + : TransferMode::kColorToStencilBit; + stencil_clear_rectangle_count += + transfer.GetRectangles(dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, + nullptr, resolve_clear_rectangle); + } else { + if (dest_rt_key.is_depth) { + if (host_depth_source_vulkan_rt) { + if (host_depth_source_is_copy) { + new_transfer_shader_key.mode = + source_rt_key.is_depth + ? TransferMode::kDepthAndHostDepthCopyToDepth + : TransferMode::kColorAndHostDepthCopyToDepth; + } else { + new_transfer_shader_key.mode = + source_rt_key.is_depth + ? TransferMode::kDepthAndHostDepthToDepth + : TransferMode::kColorAndHostDepthToDepth; + } + } else { + new_transfer_shader_key.mode = + source_rt_key.is_depth ? TransferMode::kDepthToDepth + : TransferMode::kColorToDepth; + } + } else { + new_transfer_shader_key.mode = source_rt_key.is_depth + ? TransferMode::kDepthToColor + : TransferMode::kColorToColor; + } + } + current_transfer_invocations_.emplace_back(transfer, + new_transfer_shader_key); + if (j) { + current_transfer_invocations_.back().transfer.host_depth_source = + nullptr; + } + } + } + std::sort(current_transfer_invocations_.begin(), + current_transfer_invocations_.end()); + + for (auto it = current_transfer_invocations_.cbegin(); + it != current_transfer_invocations_.cend(); ++it) { + assert_not_null(it->transfer.source); + auto& source_vulkan_rt = + *static_cast(it->transfer.source); + command_processor_.PushImageMemoryBarrier( + source_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + source_vulkan_rt.key().is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + source_vulkan_rt.current_stage_mask(), kSourceStageMask, + source_vulkan_rt.current_access_mask(), kSourceAccessMask, + source_vulkan_rt.current_layout(), kSourceLayout); + source_vulkan_rt.SetUsage(kSourceStageMask, kSourceAccessMask, + kSourceLayout); + auto host_depth_source_vulkan_rt = + static_cast(it->transfer.host_depth_source); + if (host_depth_source_vulkan_rt) { + TransferShaderKey transfer_shader_key = it->shader_key; + if (transfer_shader_key.mode == + TransferMode::kDepthAndHostDepthCopyToDepth || + transfer_shader_key.mode == + TransferMode::kColorAndHostDepthCopyToDepth) { + // Reading copied host depth from the EDRAM buffer. + UseEdramBuffer(EdramBufferUsage::kFragmentRead); + } else { + // Reading host depth from the texture. + command_processor_.PushImageMemoryBarrier( + host_depth_source_vulkan_rt->image(), + ui::vulkan::util::InitializeSubresourceRange( + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT), + host_depth_source_vulkan_rt->current_stage_mask(), + kSourceStageMask, + host_depth_source_vulkan_rt->current_access_mask(), + kSourceAccessMask, + host_depth_source_vulkan_rt->current_layout(), kSourceLayout); + host_depth_source_vulkan_rt->SetUsage( + kSourceStageMask, kSourceAccessMask, kSourceLayout); + } + } + } + + // Perform the transfers for the render target. + + command_processor_.SubmitBarriersAndEnterRenderTargetCacheRenderPass( + transfer_render_pass, transfer_framebuffer); + + if (stencil_clear_rectangle_count) { + VkClearAttachment* stencil_clear_attachment; + VkClearRect* stencil_clear_rect_write_ptr; + command_buffer.CmdClearAttachmentsEmplace(1, stencil_clear_attachment, + stencil_clear_rectangle_count, + stencil_clear_rect_write_ptr); + stencil_clear_attachment->aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + stencil_clear_attachment->colorAttachment = 0; + stencil_clear_attachment->clearValue.depthStencil.depth = 0.0f; + stencil_clear_attachment->clearValue.depthStencil.stencil = 0; + for (const Transfer& transfer : current_transfers) { + Transfer::Rectangle transfer_stencil_clear_rectangles + [Transfer::kMaxRectanglesWithCutout]; + uint32_t transfer_stencil_clear_rectangle_count = + transfer.GetRectangles(dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, + transfer_stencil_clear_rectangles, + resolve_clear_rectangle); + for (uint32_t j = 0; j < transfer_stencil_clear_rectangle_count; + ++j) { + const Transfer::Rectangle& stencil_clear_rectangle = + transfer_stencil_clear_rectangles[j]; + stencil_clear_rect_write_ptr->rect.offset.x = int32_t( + stencil_clear_rectangle.x_pixels * draw_resolution_scale_x()); + stencil_clear_rect_write_ptr->rect.offset.y = int32_t( + stencil_clear_rectangle.y_pixels * draw_resolution_scale_y()); + stencil_clear_rect_write_ptr->rect.extent.width = + stencil_clear_rectangle.width_pixels * + draw_resolution_scale_x(); + stencil_clear_rect_write_ptr->rect.extent.height = + stencil_clear_rectangle.height_pixels * + draw_resolution_scale_y(); + stencil_clear_rect_write_ptr->baseArrayLayer = 0; + stencil_clear_rect_write_ptr->layerCount = 1; + ++stencil_clear_rect_write_ptr; + } + } + } + + // Prefer power of two viewports for exact division by simply biasing the + // exponent. + VkViewport transfer_viewport; + transfer_viewport.x = 0.0f; + transfer_viewport.y = 0.0f; + transfer_viewport.width = + float(std::min(xe::next_pow2(transfer_framebuffer->host_extent.width), + device_limits.maxViewportDimensions[0])); + transfer_viewport.height = float( + std::min(xe::next_pow2(transfer_framebuffer->host_extent.height), + device_limits.maxViewportDimensions[1])); + transfer_viewport.minDepth = 0.0f; + transfer_viewport.maxDepth = 1.0f; + command_processor_.SetViewport(transfer_viewport); + float pixels_to_ndc_x = 2.0f / transfer_viewport.width; + float pixels_to_ndc_y = 2.0f / transfer_viewport.height; + VkRect2D transfer_scissor; + transfer_scissor.offset.x = 0; + transfer_scissor.offset.y = 0; + transfer_scissor.extent = transfer_framebuffer->host_extent; + command_processor_.SetScissor(transfer_scissor); + + for (auto it = current_transfer_invocations_.cbegin(); + it != current_transfer_invocations_.cend(); ++it) { + const TransferInvocation& transfer_invocation_first = *it; + // Will be merging transfers from the same source into one mesh. + auto it_merged_first = it, it_merged_last = it; + uint32_t transfer_rectangle_count = + transfer_invocation_first.transfer.GetRectangles( + dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, nullptr, + resolve_clear_rectangle); + for (auto it_merge = std::next(it_merged_first); + it_merge != current_transfer_invocations_.cend(); ++it_merge) { + if (!transfer_invocation_first.CanBeMergedIntoOneDraw(*it_merge)) { + break; + } + transfer_rectangle_count += it_merge->transfer.GetRectangles( + dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, nullptr, + resolve_clear_rectangle); + it_merged_last = it_merge; + } + assert_not_zero(transfer_rectangle_count); + // Skip the merged transfers in the subsequent iterations. + it = it_merged_last; + + assert_not_null(it->transfer.source); + auto& source_vulkan_rt = + *static_cast(it->transfer.source); + auto host_depth_source_vulkan_rt = + static_cast(it->transfer.host_depth_source); + TransferShaderKey transfer_shader_key = it->shader_key; + const TransferModeInfo& transfer_mode_info = + kTransferModes[size_t(transfer_shader_key.mode)]; + TransferPipelineLayoutIndex transfer_pipeline_layout_index = + transfer_mode_info.pipeline_layout; + const TransferPipelineLayoutInfo& transfer_pipeline_layout_info = + kTransferPipelineLayoutInfos[size_t( + transfer_pipeline_layout_index)]; + uint32_t transfer_sample_pipeline_count = + device_features.sampleRateShading + ? 1 + : uint32_t(1) << uint32_t(dest_rt_key.msaa_samples); + bool transfer_is_stencil_bit = + (transfer_pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordStencilMaskBit) != 0; + + uint32_t transfer_vertex_count = 6 * transfer_rectangle_count; + VkBuffer transfer_vertex_buffer; + VkDeviceSize transfer_vertex_buffer_offset; + float* transfer_rectangle_write_ptr = + reinterpret_cast(transfer_vertex_buffer_pool_->Request( + current_submission, sizeof(float) * 2 * transfer_vertex_count, + sizeof(float), transfer_vertex_buffer, + transfer_vertex_buffer_offset)); + if (!transfer_rectangle_write_ptr) { + continue; + } + for (auto it_merged = it_merged_first; it_merged <= it_merged_last; + ++it_merged) { + Transfer::Rectangle transfer_invocation_rectangles + [Transfer::kMaxRectanglesWithCutout]; + uint32_t transfer_invocation_rectangle_count = + it_merged->transfer.GetRectangles( + dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, + transfer_invocation_rectangles, resolve_clear_rectangle); + assert_not_zero(transfer_invocation_rectangle_count); + for (uint32_t j = 0; j < transfer_invocation_rectangle_count; ++j) { + const Transfer::Rectangle& transfer_rectangle = + transfer_invocation_rectangles[j]; + float transfer_rectangle_x0 = + -1.0f + transfer_rectangle.x_pixels * pixels_to_ndc_x; + float transfer_rectangle_y0 = + -1.0f + transfer_rectangle.y_pixels * pixels_to_ndc_y; + float transfer_rectangle_x1 = + transfer_rectangle_x0 + + transfer_rectangle.width_pixels * pixels_to_ndc_x; + float transfer_rectangle_y1 = + transfer_rectangle_y0 + + transfer_rectangle.height_pixels * pixels_to_ndc_y; + // O-* + // |/ + // * + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x0; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y0; + // *-* + // |/ + // O + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x0; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y1; + // *-O + // |/ + // * + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x1; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y0; + // O + // /| + // *-* + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x1; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y0; + // * + // /| + // O-* + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x0; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y1; + // * + // /| + // *-O + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x1; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y1; + } + } + command_buffer.CmdVkBindVertexBuffers(0, 1, &transfer_vertex_buffer, + &transfer_vertex_buffer_offset); + + const VkPipeline* transfer_pipelines = GetTransferPipelines( + TransferPipelineKey(transfer_render_pass_key, transfer_shader_key)); + if (!transfer_pipelines) { + continue; + } + command_processor_.BindExternalGraphicsPipeline(transfer_pipelines[0]); + if (last_transfer_pipeline_layout_index != + transfer_pipeline_layout_index) { + last_transfer_pipeline_layout_index = transfer_pipeline_layout_index; + transfer_descriptor_sets_bound = 0; + transfer_push_constants_set = 0; + } + + // Invalidate outdated bindings. + if (transfer_pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetHostDepthStencilTexturesBit) { + assert_not_null(host_depth_source_vulkan_rt); + VkDescriptorSet descriptor_set_host_depth_stencil_textures = + host_depth_source_vulkan_rt->GetDescriptorSetTransferSource(); + if (last_descriptor_set_host_depth_stencil_textures != + descriptor_set_host_depth_stencil_textures) { + last_descriptor_set_host_depth_stencil_textures = + descriptor_set_host_depth_stencil_textures; + transfer_descriptor_sets_bound &= + ~kTransferUsedDescriptorSetHostDepthStencilTexturesBit; + } + } + if (transfer_pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetDepthStencilTexturesBit) { + VkDescriptorSet descriptor_set_depth_stencil_textures = + source_vulkan_rt.GetDescriptorSetTransferSource(); + if (last_descriptor_set_depth_stencil_textures != + descriptor_set_depth_stencil_textures) { + last_descriptor_set_depth_stencil_textures = + descriptor_set_depth_stencil_textures; + transfer_descriptor_sets_bound &= + ~kTransferUsedDescriptorSetDepthStencilTexturesBit; + } + } + if (transfer_pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetColorTextureBit) { + VkDescriptorSet descriptor_set_color_texture = + source_vulkan_rt.GetDescriptorSetTransferSource(); + if (last_descriptor_set_color_texture != + descriptor_set_color_texture) { + last_descriptor_set_color_texture = descriptor_set_color_texture; + transfer_descriptor_sets_bound &= + ~kTransferUsedDescriptorSetColorTextureBit; + } + } + if (transfer_pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordHostDepthAddressBit) { + assert_not_null(host_depth_source_vulkan_rt); + RenderTargetKey host_depth_source_rt_key = + host_depth_source_vulkan_rt->key(); + TransferAddressConstant host_depth_address_constant; + host_depth_address_constant.dest_pitch = dest_pitch_tiles; + host_depth_address_constant.source_pitch = + host_depth_source_rt_key.GetPitchTiles(); + host_depth_address_constant.source_to_dest = + int32_t(dest_rt_key.base_tiles) - + int32_t(host_depth_source_rt_key.base_tiles); + if (last_host_depth_address_constant != host_depth_address_constant) { + last_host_depth_address_constant = host_depth_address_constant; + transfer_push_constants_set &= + ~kTransferUsedPushConstantDwordHostDepthAddressBit; + } + } + if (transfer_pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordAddressBit) { + RenderTargetKey source_rt_key = source_vulkan_rt.key(); + TransferAddressConstant address_constant; + address_constant.dest_pitch = dest_pitch_tiles; + address_constant.source_pitch = source_rt_key.GetPitchTiles(); + address_constant.source_to_dest = int32_t(dest_rt_key.base_tiles) - + int32_t(source_rt_key.base_tiles); + if (last_address_constant != address_constant) { + last_address_constant = address_constant; + transfer_push_constants_set &= + ~kTransferUsedPushConstantDwordAddressBit; + } + } + + // Apply the new bindings. + // TODO(Triang3l): Merge binding updates into spans. + VkPipelineLayout transfer_pipeline_layout = + transfer_pipeline_layouts_[size_t(transfer_pipeline_layout_index)]; + uint32_t transfer_descriptor_sets_unbound = + transfer_pipeline_layout_info.used_descriptor_sets & + ~transfer_descriptor_sets_bound; + if (transfer_descriptor_sets_unbound & + kTransferUsedDescriptorSetHostDepthBufferBit) { + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, transfer_pipeline_layout, + xe::bit_count(transfer_pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetHostDepthBufferBit - 1)), + 1, &edram_storage_buffer_descriptor_set_, 0, nullptr); + transfer_descriptor_sets_bound |= + kTransferUsedDescriptorSetHostDepthBufferBit; + } + if (transfer_descriptor_sets_unbound & + kTransferUsedDescriptorSetHostDepthStencilTexturesBit) { + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, transfer_pipeline_layout, + xe::bit_count( + transfer_pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetHostDepthStencilTexturesBit - 1)), + 1, &last_descriptor_set_host_depth_stencil_textures, 0, nullptr); + transfer_descriptor_sets_bound |= + kTransferUsedDescriptorSetHostDepthStencilTexturesBit; + } + if (transfer_descriptor_sets_unbound & + kTransferUsedDescriptorSetDepthStencilTexturesBit) { + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, transfer_pipeline_layout, + xe::bit_count( + transfer_pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetDepthStencilTexturesBit - 1)), + 1, &last_descriptor_set_depth_stencil_textures, 0, nullptr); + transfer_descriptor_sets_bound |= + kTransferUsedDescriptorSetDepthStencilTexturesBit; + } + if (transfer_descriptor_sets_unbound & + kTransferUsedDescriptorSetColorTextureBit) { + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, transfer_pipeline_layout, + xe::bit_count(transfer_pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetColorTextureBit - 1)), + 1, &last_descriptor_set_color_texture, 0, nullptr); + transfer_descriptor_sets_bound |= + kTransferUsedDescriptorSetColorTextureBit; + } + uint32_t transfer_push_constants_unset = + transfer_pipeline_layout_info.used_push_constant_dwords & + ~transfer_push_constants_set; + if (transfer_push_constants_unset & + kTransferUsedPushConstantDwordHostDepthAddressBit) { + command_buffer.CmdVkPushConstants( + transfer_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(uint32_t) * + xe::bit_count( + transfer_pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordHostDepthAddressBit - 1)), + sizeof(uint32_t), &last_host_depth_address_constant); + transfer_push_constants_set |= + kTransferUsedPushConstantDwordHostDepthAddressBit; + } + if (transfer_push_constants_unset & + kTransferUsedPushConstantDwordAddressBit) { + command_buffer.CmdVkPushConstants( + transfer_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(uint32_t) * + xe::bit_count( + transfer_pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordAddressBit - 1)), + sizeof(uint32_t), &last_address_constant); + transfer_push_constants_set |= + kTransferUsedPushConstantDwordAddressBit; + } + + for (uint32_t j = 0; j < transfer_sample_pipeline_count; ++j) { + if (j) { + command_processor_.BindExternalGraphicsPipeline( + transfer_pipelines[j]); + } + for (uint32_t k = 0; k < uint32_t(transfer_is_stencil_bit ? 8 : 1); + ++k) { + if (transfer_is_stencil_bit) { + uint32_t transfer_stencil_bit = uint32_t(1) << k; + command_buffer.CmdVkPushConstants( + transfer_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(uint32_t) * + xe::bit_count( + transfer_pipeline_layout_info + .used_push_constant_dwords & + (kTransferUsedPushConstantDwordStencilMaskBit - 1)), + sizeof(uint32_t), &transfer_stencil_bit); + command_buffer.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_FRONT_AND_BACK, transfer_stencil_bit); + } + command_buffer.CmdVkDraw(transfer_vertex_count, 1, 0, 0); + } + } + } + } + + // Perform the clear. + if (resolve_clear_needed) { + command_processor_.SubmitBarriersAndEnterRenderTargetCacheRenderPass( + transfer_render_pass, transfer_framebuffer); + VkClearAttachment resolve_clear_attachment; + resolve_clear_attachment.colorAttachment = 0; + std::memset(&resolve_clear_attachment.clearValue, 0, + sizeof(resolve_clear_attachment.clearValue)); + uint64_t clear_value = render_target_resolve_clear_values[i]; + if (dest_rt_key.is_depth) { + resolve_clear_attachment.aspectMask = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + uint32_t depth_guest_clear_value = + (uint32_t(clear_value) >> 8) & 0xFFFFFF; + switch (dest_rt_key.GetDepthFormat()) { + case xenos::DepthRenderTargetFormat::kD24S8: + resolve_clear_attachment.clearValue.depthStencil.depth = + xenos::UNorm24To32(depth_guest_clear_value); + break; + case xenos::DepthRenderTargetFormat::kD24FS8: + // Taking [0, 2) -> [0, 1) remapping into account. + resolve_clear_attachment.clearValue.depthStencil.depth = + xenos::Float20e4To32(depth_guest_clear_value) * 0.5f; + break; + } + resolve_clear_attachment.clearValue.depthStencil.stencil = + uint32_t(clear_value) & 0xFF; + } else { + resolve_clear_attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + switch (dest_rt_key.GetColorFormat()) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + for (uint32_t j = 0; j < 4; ++j) { + resolve_clear_attachment.clearValue.color.float32[j] = + ((clear_value >> (j * 8)) & 0xFF) * (1.0f / 0xFF); + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + for (uint32_t j = 0; j < 3; ++j) { + resolve_clear_attachment.clearValue.color.float32[j] = + ((clear_value >> (j * 10)) & 0x3FF) * (1.0f / 0x3FF); + } + resolve_clear_attachment.clearValue.color.float32[3] = + ((clear_value >> 30) & 0x3) * (1.0f / 0x3); + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { + for (uint32_t j = 0; j < 3; ++j) { + resolve_clear_attachment.clearValue.color.float32[j] = + xenos::Float7e3To32((clear_value >> (j * 10)) & 0x3FF); + } + resolve_clear_attachment.clearValue.color.float32[3] = + ((clear_value >> 30) & 0x3) * (1.0f / 0x3); + } break; + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: { + // Using uint for transfers and clears of both. Disregarding the + // current -32...32 vs. -1...1 settings for consistency with color + // clear via depth aliasing. + // TODO(Triang3l): Handle cases of unsupported multisampled 16_UINT + // and completely unsupported 16_UNORM. + for (uint32_t j = 0; j < 2; ++j) { + resolve_clear_attachment.clearValue.color.uint32[j] = + uint32_t(clear_value >> (j * 16)) & 0xFFFF; + } + } break; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { + // Using uint for transfers and clears of both. Disregarding the + // current -32...32 vs. -1...1 settings for consistency with color + // clear via depth aliasing. + // TODO(Triang3l): Handle cases of unsupported multisampled 16_UINT + // and completely unsupported 16_UNORM. + for (uint32_t j = 0; j < 4; ++j) { + resolve_clear_attachment.clearValue.color.uint32[j] = + uint32_t(clear_value >> (j * 16)) & 0xFFFF; + } + } break; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: { + // Using uint for proper denormal and NaN handling. + resolve_clear_attachment.clearValue.color.uint32[0] = + uint32_t(clear_value); + } break; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { + // Using uint for proper denormal and NaN handling. + resolve_clear_attachment.clearValue.color.uint32[0] = + uint32_t(clear_value); + resolve_clear_attachment.clearValue.color.uint32[1] = + uint32_t(clear_value >> 32); + } break; + } + } + command_buffer.CmdVkClearAttachments(1, &resolve_clear_attachment, 1, + &resolve_clear_rect); + } + } +} + +VkPipeline VulkanRenderTargetCache::GetDumpPipeline(DumpPipelineKey key) { + auto pipeline_it = dump_pipelines_.find(key); + if (pipeline_it != dump_pipelines_.end()) { + return pipeline_it->second; + } + + std::vector id_vector_temp; + + spv::Builder builder(spv::Spv_1_0, + (SpirvShaderTranslator::kSpirvMagicToolId << 16) | 1, + nullptr); + spv::Id ext_inst_glsl_std_450 = builder.import("GLSL.std.450"); + builder.addCapability(spv::CapabilityShader); + builder.setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); + builder.setSource(spv::SourceLanguageUnknown, 0); + + spv::Id type_void = builder.makeVoidType(); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_int2 = builder.makeVectorType(type_int, 2); + spv::Id type_uint = builder.makeUintType(32); + spv::Id type_uint2 = builder.makeVectorType(type_uint, 2); + spv::Id type_uint3 = builder.makeVectorType(type_uint, 3); + spv::Id type_float = builder.makeFloatType(32); + + // Bindings. + // EDRAM buffer. + bool format_is_64bpp = !key.is_depth && xenos::IsColorRenderTargetFormat64bpp( + key.GetColorFormat()); + id_vector_temp.clear(); + id_vector_temp.push_back( + builder.makeRuntimeArray(format_is_64bpp ? type_uint2 : type_uint)); + // Storage buffers have std430 packing, no padding to 4-component vectors. + builder.addDecoration(id_vector_temp.back(), spv::DecorationArrayStride, + sizeof(uint32_t) << uint32_t(format_is_64bpp)); + spv::Id type_edram = builder.makeStructType(id_vector_temp, "XeEdram"); + builder.addMemberName(type_edram, 0, "edram"); + builder.addMemberDecoration(type_edram, 0, spv::DecorationNonReadable); + builder.addMemberDecoration(type_edram, 0, spv::DecorationOffset, 0); + // Block since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // BufferBlock. + builder.addDecoration(type_edram, spv::DecorationBufferBlock); + // StorageBuffer since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // Uniform. + spv::Id edram_buffer = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_edram, "xe_edram"); + builder.addDecoration(edram_buffer, spv::DecorationDescriptorSet, + kDumpDescriptorSetEdram); + builder.addDecoration(edram_buffer, spv::DecorationBinding, 0); + // Color or depth source. + bool source_is_multisampled = key.msaa_samples != xenos::MsaaSamples::k1X; + bool source_is_uint; + if (key.is_depth) { + source_is_uint = false; + } else { + GetColorOwnershipTransferVulkanFormat(key.GetColorFormat(), + &source_is_uint); + } + spv::Id source_component_type = source_is_uint ? type_uint : type_float; + spv::Id source_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(source_component_type, spv::Dim2D, false, false, + source_is_multisampled, 1, spv::ImageFormatUnknown), + "xe_edram_dump_source"); + builder.addDecoration(source_texture, spv::DecorationDescriptorSet, + kDumpDescriptorSetSource); + builder.addDecoration(source_texture, spv::DecorationBinding, 0); + // Stencil source. + spv::Id source_stencil_texture = spv::NoResult; + if (key.is_depth) { + source_stencil_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(type_uint, spv::Dim2D, false, false, + source_is_multisampled, 1, + spv::ImageFormatUnknown), + "xe_edram_dump_stencil"); + builder.addDecoration(source_stencil_texture, spv::DecorationDescriptorSet, + kDumpDescriptorSetSource); + builder.addDecoration(source_stencil_texture, spv::DecorationBinding, 1); + } + // Push constants. + id_vector_temp.clear(); + id_vector_temp.reserve(kDumpPushConstantCount); + for (uint32_t i = 0; i < kDumpPushConstantCount; ++i) { + id_vector_temp.push_back(type_uint); + } + spv::Id type_push_constants = + builder.makeStructType(id_vector_temp, "XeEdramDumpPushConstants"); + builder.addMemberName(type_push_constants, kDumpPushConstantPitches, + "pitches"); + builder.addMemberDecoration(type_push_constants, kDumpPushConstantPitches, + spv::DecorationOffset, + int(sizeof(uint32_t) * kDumpPushConstantPitches)); + builder.addMemberName(type_push_constants, kDumpPushConstantOffsets, + "offsets"); + builder.addMemberDecoration(type_push_constants, kDumpPushConstantOffsets, + spv::DecorationOffset, + int(sizeof(uint32_t) * kDumpPushConstantOffsets)); + builder.addDecoration(type_push_constants, spv::DecorationBlock); + spv::Id push_constants = builder.createVariable( + spv::NoPrecision, spv::StorageClassPushConstant, type_push_constants, + "xe_edram_dump_push_constants"); + + // gl_GlobalInvocationID input. + spv::Id input_global_invocation_id = + builder.createVariable(spv::NoPrecision, spv::StorageClassInput, + type_uint3, "gl_GlobalInvocationID"); + builder.addDecoration(input_global_invocation_id, spv::DecorationBuiltIn, + spv::BuiltInGlobalInvocationId); + + // Begin the main function. + std::vector main_param_types; + std::vector> main_precisions; + spv::Block* main_entry; + spv::Function* main_function = + builder.makeFunctionEntry(spv::NoPrecision, type_void, "main", + main_param_types, main_precisions, &main_entry); + + // For now, as the exact addressing in 64bpp render targets relatively to + // 32bpp is unknown, treating 64bpp tiles as storing 40x16 samples rather than + // 80x16 for simplicity of addressing into the texture. + + // Split the destination sample index into the 32bpp tile and the + // 32bpp-tile-relative sample index. + // Note that division by non-power-of-two constants will include a 4-cycle + // 32*32 multiplication on AMD, even though so many bits are not needed for + // the sample position - however, if an OpUnreachable path is inserted for the + // case when the position has upper bits set, for some reason, the code for it + // is not eliminated when compiling the shader for AMD via RenderDoc on + // Windows, as of June 2022. + spv::Id global_invocation_id = + builder.createLoad(input_global_invocation_id, spv::NoPrecision); + spv::Id rectangle_sample_x = + builder.createCompositeExtract(global_invocation_id, type_uint, 0); + uint32_t tile_width = + (xenos::kEdramTileWidthSamples >> uint32_t(format_is_64bpp)) * + draw_resolution_scale_x(); + spv::Id const_tile_width = builder.makeUintConstant(tile_width); + spv::Id rectangle_tile_index_x = builder.createBinOp( + spv::OpUDiv, type_uint, rectangle_sample_x, const_tile_width); + spv::Id tile_sample_x = builder.createBinOp( + spv::OpUMod, type_uint, rectangle_sample_x, const_tile_width); + spv::Id rectangle_sample_y = + builder.createCompositeExtract(global_invocation_id, type_uint, 1); + uint32_t tile_height = + xenos::kEdramTileHeightSamples * draw_resolution_scale_y(); + spv::Id const_tile_height = builder.makeUintConstant(tile_height); + spv::Id rectangle_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, rectangle_sample_y, const_tile_height); + spv::Id tile_sample_y = builder.createBinOp( + spv::OpUMod, type_uint, rectangle_sample_y, const_tile_height); + + // Get the tile index in the EDRAM relative to the dump rectangle base tile. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeIntConstant(kDumpPushConstantPitches)); + spv::Id pitches_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, push_constants, + id_vector_temp), + spv::NoPrecision); + spv::Id const_uint_0 = builder.makeUintConstant(0); + spv::Id const_edram_pitch_tiles_bits = + builder.makeUintConstant(xenos::kEdramPitchTilesBits); + spv::Id rectangle_tile_index = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, + pitches_constant, const_uint_0, + const_edram_pitch_tiles_bits), + rectangle_tile_index_y), + rectangle_tile_index_x); + // Add the base tile in the dispatch to the dispatch-local tile index. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeIntConstant(kDumpPushConstantOffsets)); + spv::Id offsets_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, push_constants, + id_vector_temp), + spv::NoPrecision); + spv::Id const_edram_base_tiles_bits = + builder.makeUintConstant(xenos::kEdramBaseTilesBits); + spv::Id edram_tile_index = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, offsets_constant, + const_uint_0, const_edram_base_tiles_bits), + rectangle_tile_index); + + // Combine the tile sample index and the tile index into the EDRAM sample + // index. + spv::Id edram_sample_address = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, + builder.makeUintConstant(tile_width * tile_height), + edram_tile_index), + builder.createBinOp(spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, + const_tile_width, tile_sample_y), + tile_sample_x)); + if (key.is_depth) { + // Swap 40-sample columns in the depth buffer in the destination address to + // get the final address of the sample in the EDRAM. + uint32_t tile_width_half = tile_width >> 1; + edram_sample_address = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createBinOp( + spv::OpIAdd, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, + edram_sample_address), + builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp(spv::OpULessThan, builder.makeBoolType(), + tile_sample_x, + builder.makeUintConstant(tile_width_half)), + builder.makeIntConstant(int32_t(tile_width_half)), + builder.makeIntConstant(-int32_t(tile_width_half))))); + } + + // Get the linear tile index within the source texture. + spv::Id source_tile_index = builder.createBinOp( + spv::OpISub, type_uint, edram_tile_index, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, offsets_constant, + const_edram_base_tiles_bits, + const_edram_base_tiles_bits)); + // Split the linear tile index in the source texture into X and Y in tiles. + spv::Id source_pitch_tiles = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, pitches_constant, + const_edram_pitch_tiles_bits, const_edram_pitch_tiles_bits); + spv::Id source_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, source_tile_index, source_pitch_tiles); + spv::Id source_tile_index_x = builder.createBinOp( + spv::OpUMod, type_uint, source_tile_index, source_pitch_tiles); + // Combine the source tile offset and the sample index within the tile. + spv::Id source_sample_x = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, const_tile_width, + source_tile_index_x), + tile_sample_x); + spv::Id source_sample_y = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, const_tile_height, + source_tile_index_y), + tile_sample_y); + // Get the source pixel coordinate and the sample index within the pixel. + spv::Id source_pixel_x = source_sample_x, source_pixel_y = source_sample_y; + spv::Id source_sample_id = spv::NoResult; + if (source_is_multisampled) { + spv::Id const_uint_1 = builder.makeUintConstant(1); + source_pixel_y = builder.createBinOp(spv::OpShiftRightLogical, type_uint, + source_sample_y, const_uint_1); + if (key.msaa_samples >= xenos::MsaaSamples::k4X) { + source_pixel_x = builder.createBinOp(spv::OpShiftRightLogical, type_uint, + source_sample_x, const_uint_1); + // 4x MSAA source texture sample index - bit 0 for horizontal, bit 1 for + // vertical. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseAnd, type_uint, source_sample_x, const_uint_1)); + id_vector_temp.push_back(source_sample_y); + id_vector_temp.push_back(const_uint_1); + id_vector_temp.push_back(const_uint_1); + source_sample_id = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else { + // 2x MSAA source texture sample index - convert from the guest to + // the Vulkan standard sample locations. + source_sample_id = builder.createTriOp( + spv::OpSelect, type_uint, + builder.createBinOp( + spv::OpINotEqual, builder.makeBoolType(), + builder.createBinOp(spv::OpBitwiseAnd, type_uint, source_sample_y, + const_uint_1), + const_uint_0), + builder.makeUintConstant(draw_util::GetD3D10SampleIndexForGuest2xMSAA( + 1, msaa_2x_attachments_supported_)), + builder.makeUintConstant(draw_util::GetD3D10SampleIndexForGuest2xMSAA( + 0, msaa_2x_attachments_supported_))); + } + } + + // Load the source, and pack the value into one or two 32-bit integers. + spv::Id packed[2] = {}; + spv::Builder::TextureParameters source_texture_parameters = {}; + source_texture_parameters.sampler = + builder.createLoad(source_texture, spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back( + builder.createUnaryOp(spv::OpBitcast, type_int, source_pixel_x)); + id_vector_temp.push_back( + builder.createUnaryOp(spv::OpBitcast, type_int, source_pixel_y)); + source_texture_parameters.coords = + builder.createCompositeConstruct(type_int2, id_vector_temp); + if (source_is_multisampled) { + source_texture_parameters.sample = + builder.createUnaryOp(spv::OpBitcast, type_int, source_sample_id); + } else { + source_texture_parameters.lod = builder.makeIntConstant(0); + } + spv::Id source_vec4 = builder.createTextureCall( + spv::NoPrecision, builder.makeVectorType(source_component_type, 4), false, + true, false, false, false, source_texture_parameters, + spv::ImageOperandsMaskNone); + if (key.is_depth) { + source_texture_parameters.sampler = + builder.createLoad(source_stencil_texture, spv::NoPrecision); + spv::Id source_stencil = builder.createCompositeExtract( + builder.createTextureCall( + spv::NoPrecision, builder.makeVectorType(type_uint, 4), false, true, + false, false, false, source_texture_parameters, + spv::ImageOperandsMaskNone), + type_uint, 0); + spv::Id source_depth32 = + builder.createCompositeExtract(source_vec4, type_float, 0); + switch (key.GetDepthFormat()) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Round to the nearest even integer. This seems to be the correct + // conversion, adding +0.5 and rounding towards zero results in red + // instead of black in the 4D5307E6 clear shader. + id_vector_temp.clear(); + id_vector_temp.push_back( + builder.createBinOp(spv::OpFMul, type_float, source_depth32, + builder.makeFloatConstant(float(0xFFFFFF)))); + packed[0] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450RoundEven, id_vector_temp)); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + packed[0] = SpirvShaderTranslator::PreClampedDepthTo20e4( + builder, source_depth32, depth_float24_round(), true, + ext_inst_glsl_std_450); + } break; + } + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_stencil); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(builder.makeUintConstant(8)); + id_vector_temp.push_back(builder.makeUintConstant(24)); + packed[0] = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else { + switch (key.GetColorFormat()) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale = builder.makeFloatConstant(255.0f); + packed[0] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, + builder.createCompositeExtract(source_vec4, type_float, 0), + unorm_scale), + unorm_round_offset)); + spv::Id component_width = builder.makeUintConstant(8); + for (uint32_t i = 1; i < 4; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + builder.createCompositeExtract( + source_vec4, type_float, i), + unorm_scale), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(8 * i)); + id_vector_temp.push_back(component_width); + packed[0] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale_rgb = builder.makeFloatConstant(1023.0f); + packed[0] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, + builder.createCompositeExtract(source_vec4, type_float, 0), + unorm_scale_rgb), + unorm_round_offset)); + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id unorm_scale_a = builder.makeFloatConstant(3.0f); + spv::Id width_a = builder.makeUintConstant(2); + for (uint32_t i = 1; i < 4; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + builder.createCompositeExtract( + source_vec4, type_float, i), + i == 3 ? unorm_scale_a : unorm_scale_rgb), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(10 * i)); + id_vector_temp.push_back(i == 3 ? width_a : width_rgb); + packed[0] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: { + // Float16 has a wider range for both color and alpha, also NaNs - clamp + // and convert. + packed[0] = SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, builder.createCompositeExtract(source_vec4, type_float, 0), + ext_inst_glsl_std_450); + spv::Id width_rgb = builder.makeUintConstant(10); + for (uint32_t i = 1; i < 3; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, + builder.createCompositeExtract(source_vec4, type_float, i), + ext_inst_glsl_std_450)); + id_vector_temp.push_back(builder.makeUintConstant(10 * i)); + id_vector_temp.push_back(width_rgb); + packed[0] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + // Saturate and convert the alpha. + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back( + builder.createCompositeExtract(source_vec4, type_float, 3)); + id_vector_temp.push_back(builder.makeFloatConstant(0.0f)); + id_vector_temp.push_back(builder.makeFloatConstant(1.0f)); + spv::Id alpha_saturated = + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450NClamp, id_vector_temp); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, alpha_saturated, + builder.makeFloatConstant(3.0f)), + builder.makeFloatConstant(0.5f)))); + id_vector_temp.push_back(builder.makeUintConstant(30)); + id_vector_temp.push_back(builder.makeUintConstant(2)); + packed[0] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { + // All 64bpp formats, and all 16 bits per component formats, are + // represented as integers in ownership transfer for safe handling of + // NaN encodings and -32768 / -32767. + // TODO(Triang3l): Handle the case when that's not true (no multisampled + // sampled images, no 16-bit UNORM, no cross-packing 32bpp aliasing on a + // portability subset device or a 64bpp format where that wouldn't help + // anyway). + spv::Id component_offset_width = builder.makeUintConstant(16); + for (uint32_t i = 0; i <= uint32_t(format_is_64bpp); ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back( + builder.createCompositeExtract(source_vec4, type_uint, 2 * i)); + id_vector_temp.push_back(builder.createCompositeExtract( + source_vec4, type_uint, 2 * i + 1)); + id_vector_temp.push_back(component_offset_width); + id_vector_temp.push_back(component_offset_width); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + // Float32 is transferred as uint32 to preserve NaN encodings. However, + // multisampled sampled image support is optional in Vulkan. + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { + for (uint32_t i = 0; i <= uint32_t(format_is_64bpp); ++i) { + spv::Id& packed_ref = packed[i]; + packed_ref = builder.createCompositeExtract(source_vec4, + source_component_type, i); + if (!source_is_uint) { + packed_ref = + builder.createUnaryOp(spv::OpBitcast, type_uint, packed_ref); + } + } + } break; + } + } + + // Write the packed value to the EDRAM buffer. + spv::Id store_value = packed[0]; + if (format_is_64bpp) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(packed[1]); + store_value = builder.createCompositeConstruct(type_uint2, id_vector_temp); + } + id_vector_temp.clear(); + id_vector_temp.reserve(2); + // The only SSBO structure member. + id_vector_temp.push_back(builder.makeIntConstant(0)); + id_vector_temp.push_back( + builder.createUnaryOp(spv::OpBitcast, type_int, edram_sample_address)); + // StorageBuffer since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // Uniform. + builder.createStore(store_value, + builder.createAccessChain(spv::StorageClassUniform, + edram_buffer, id_vector_temp)); + + // End the main function and make it the entry point. + builder.leaveFunction(); + builder.addExecutionMode(main_function, spv::ExecutionModeLocalSize, + kDumpSamplesPerGroupX, kDumpSamplesPerGroupY, 1); + spv::Instruction* entry_point = builder.addEntryPoint( + spv::ExecutionModelGLCompute, main_function, "main"); + // Bindings only need to be added to the entry point's interface starting with + // SPIR-V 1.4 - emitting 1.0 here, so only inputs / outputs. + entry_point->addIdOperand(input_global_invocation_id); + + // Serialize the shader code. + std::vector shader_code; + builder.dump(shader_code); + + // Create the pipeline, and store the handle even if creation fails not to try + // to create it again later. + VkPipeline pipeline = ui::vulkan::util::CreateComputePipeline( + command_processor_.GetVulkanProvider(), + key.is_depth ? dump_pipeline_layout_depth_ : dump_pipeline_layout_color_, + reinterpret_cast(shader_code.data()), + sizeof(uint32_t) * shader_code.size()); + if (pipeline == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create a render target dumping " + "pipeline for {}-sample render targets with format {}", + UINT32_C(1) << uint32_t(key.msaa_samples), + key.is_depth + ? xenos::GetDepthRenderTargetFormatName(key.GetDepthFormat()) + : xenos::GetColorRenderTargetFormatName(key.GetColorFormat())); + } + dump_pipelines_.emplace(key, pipeline); + return pipeline; +} + +void VulkanRenderTargetCache::DumpRenderTargets(uint32_t dump_base, + uint32_t dump_row_length_used, + uint32_t dump_rows, + uint32_t dump_pitch) { + assert_true(GetPath() == Path::kHostRenderTargets); + + GetResolveCopyRectanglesToDump(dump_base, dump_row_length_used, dump_rows, + dump_pitch, dump_rectangles_); + if (dump_rectangles_.empty()) { + return; + } + + // Clear previously set temporary indices. + for (const ResolveCopyDumpRectangle& rectangle : dump_rectangles_) { + static_cast(rectangle.render_target) + ->SetTemporarySortIndex(UINT32_MAX); + } + // Gather all needed barriers and info needed to sort the invocations. + UseEdramBuffer(EdramBufferUsage::kComputeWrite); + dump_invocations_.clear(); + dump_invocations_.reserve(dump_rectangles_.size()); + constexpr VkPipelineStageFlags kRenderTargetDstStageMask = + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + constexpr VkAccessFlags kRenderTargetDstAccessMask = + VK_ACCESS_SHADER_READ_BIT; + constexpr VkImageLayout kRenderTargetNewLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + uint32_t rt_sort_index = 0; + for (const ResolveCopyDumpRectangle& rectangle : dump_rectangles_) { + auto& vulkan_rt = + *static_cast(rectangle.render_target); + RenderTargetKey rt_key = vulkan_rt.key(); + command_processor_.PushImageMemoryBarrier( + vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + rt_key.is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + vulkan_rt.current_stage_mask(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + vulkan_rt.current_access_mask(), VK_ACCESS_SHADER_READ_BIT, + vulkan_rt.current_layout(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + vulkan_rt.SetUsage(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + if (vulkan_rt.temporary_sort_index() == UINT32_MAX) { + vulkan_rt.SetTemporarySortIndex(rt_sort_index++); + } + DumpPipelineKey pipeline_key; + pipeline_key.msaa_samples = rt_key.msaa_samples; + pipeline_key.resource_format = rt_key.resource_format; + pipeline_key.is_depth = rt_key.is_depth; + dump_invocations_.emplace_back(rectangle, pipeline_key); + } + + // Sort the invocations to reduce context and binding switches. + std::sort(dump_invocations_.begin(), dump_invocations_.end()); + + // Dump the render targets. + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + bool edram_buffer_bound = false; + VkDescriptorSet last_source_descriptor_set = VK_NULL_HANDLE; + DumpPitches last_pitches; + DumpOffsets last_offsets; + bool pitches_bound = false, offsets_bound = false; + for (const DumpInvocation& invocation : dump_invocations_) { + const ResolveCopyDumpRectangle& rectangle = invocation.rectangle; + auto& vulkan_rt = + *static_cast(rectangle.render_target); + RenderTargetKey rt_key = vulkan_rt.key(); + DumpPipelineKey pipeline_key = invocation.pipeline_key; + VkPipeline pipeline = GetDumpPipeline(pipeline_key); + if (!pipeline) { + continue; + } + command_processor_.BindExternalComputePipeline(pipeline); + + VkPipelineLayout pipeline_layout = rt_key.is_depth + ? dump_pipeline_layout_depth_ + : dump_pipeline_layout_color_; + + // Only need to bind the EDRAM buffer once (relying on pipeline layout + // compatibility). + if (!edram_buffer_bound) { + edram_buffer_bound = true; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, + kDumpDescriptorSetEdram, 1, &edram_storage_buffer_descriptor_set_, 0, + nullptr); + } + + VkDescriptorSet source_descriptor_set = + vulkan_rt.GetDescriptorSetTransferSource(); + if (last_source_descriptor_set != source_descriptor_set) { + last_source_descriptor_set = source_descriptor_set; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, + kDumpDescriptorSetSource, 1, &source_descriptor_set, 0, nullptr); + } + + DumpPitches pitches; + pitches.dest_pitch = dump_pitch; + pitches.source_pitch = rt_key.GetPitchTiles(); + if (last_pitches != pitches) { + last_pitches = pitches; + pitches_bound = false; + } + if (!pitches_bound) { + pitches_bound = true; + command_buffer.CmdVkPushConstants( + pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(uint32_t) * kDumpPushConstantPitches, sizeof(last_pitches), + &last_pitches); + } + + DumpOffsets offsets; + offsets.source_base_tiles = rt_key.base_tiles; + ResolveCopyDumpRectangle::Dispatch + dispatches[ResolveCopyDumpRectangle::kMaxDispatches]; + uint32_t dispatch_count = + rectangle.GetDispatches(dump_pitch, dump_row_length_used, dispatches); + for (uint32_t i = 0; i < dispatch_count; ++i) { + const ResolveCopyDumpRectangle::Dispatch& dispatch = dispatches[i]; + offsets.dispatch_first_tile = dump_base + dispatch.offset; + if (last_offsets != offsets) { + last_offsets = offsets; + offsets_bound = false; + } + if (!offsets_bound) { + offsets_bound = true; + command_buffer.CmdVkPushConstants( + pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(uint32_t) * kDumpPushConstantOffsets, sizeof(last_offsets), + &last_offsets); + } + command_processor_.SubmitBarriers(true); + command_buffer.CmdVkDispatch( + (draw_resolution_scale_x() * + (xenos::kEdramTileWidthSamples >> uint32_t(rt_key.Is64bpp())) * + dispatch.width_tiles + + (kDumpSamplesPerGroupX - 1)) / + kDumpSamplesPerGroupX, + (draw_resolution_scale_y() * xenos::kEdramTileHeightSamples * + dispatch.height_tiles + + (kDumpSamplesPerGroupY - 1)) / + kDumpSamplesPerGroupY, + 1); + } + MarkEdramBufferModified(); + } +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h new file mode 100644 index 000000000..d15ba2abc --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -0,0 +1,905 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ +#define XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ + +#include +#include +#include +#include +#include +#include + +#include "xenia/base/hash.h" +#include "xenia/base/xxhash.h" +#include "xenia/gpu/render_target_cache.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" +#include "xenia/gpu/vulkan/vulkan_texture_cache.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class VulkanRenderTargetCache final : public RenderTargetCache { + public: + union RenderPassKey { + struct { + // If emulating 2x as 4x, this is still 2x for simplicity of using this + // field to make guest-related decisions. Render pass objects are not very + // expensive, and their dependencies can't be shared between 2x-as-4x and + // true 4x MSAA passes (framebuffers because render target cache render + // targets are different for 2x and 4x guest MSAA, pipelines because the + // sample mask will have 2 samples excluded for 2x-as-4x). + xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 2 + // << 0 is depth, << 1...4 is color. + uint32_t depth_and_color_used : 1 + xenos::kMaxColorRenderTargets; // 7 + // 0 for unused attachments. + // If VK_FORMAT_D24_UNORM_S8_UINT is not supported, this must be kD24FS8 + // even for kD24S8. + xenos::DepthRenderTargetFormat depth_format + : xenos::kDepthRenderTargetFormatBits; // 8 + // Linear or sRGB included if host sRGB is used. + xenos::ColorRenderTargetFormat color_0_view_format + : xenos::kColorRenderTargetFormatBits; // 12 + xenos::ColorRenderTargetFormat color_1_view_format + : xenos::kColorRenderTargetFormatBits; // 16 + xenos::ColorRenderTargetFormat color_2_view_format + : xenos::kColorRenderTargetFormatBits; // 20 + xenos::ColorRenderTargetFormat color_3_view_format + : xenos::kColorRenderTargetFormatBits; // 24 + uint32_t color_rts_use_transfer_formats : 1; // 25 + }; + uint32_t key = 0; + struct Hasher { + size_t operator()(const RenderPassKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const RenderPassKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const RenderPassKey& other_key) const { + return !(*this == other_key); + } + bool operator<(const RenderPassKey& other_key) const { + return key < other_key.key; + } + }; + static_assert_size(RenderPassKey, sizeof(uint32_t)); + + struct Framebuffer { + VkFramebuffer framebuffer; + VkExtent2D host_extent; + Framebuffer(VkFramebuffer framebuffer, const VkExtent2D& host_extent) + : framebuffer(framebuffer), host_extent(host_extent) {} + }; + + VulkanRenderTargetCache(const RegisterFile& register_file, + const Memory& memory, TraceWriter& trace_writer, + uint32_t draw_resolution_scale_x, + uint32_t draw_resolution_scale_y, + VulkanCommandProcessor& command_processor); + ~VulkanRenderTargetCache(); + + // Transient descriptor set layouts must be initialized in the command + // processor. + bool Initialize(); + void Shutdown(bool from_destructor = false); + void ClearCache() override; + + void CompletedSubmissionUpdated(); + void EndSubmission(); + + // TODO(Triang3l): Fragment shader interlock. + Path GetPath() const override { return Path::kHostRenderTargets; } + + // Performs the resolve to a shared memory area according to the current + // register values, and also clears the render targets if needed. Must be in a + // frame for calling. + bool Resolve(const Memory& memory, VulkanSharedMemory& shared_memory, + VulkanTextureCache& texture_cache, uint32_t& written_address_out, + uint32_t& written_length_out); + + bool Update(bool is_rasterization_done, + reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t normalized_color_mask, + const Shader& vertex_shader) override; + // Binding information for the last successful update. + RenderPassKey last_update_render_pass_key() const { + return last_update_render_pass_key_; + } + VkRenderPass last_update_render_pass() const { + return last_update_render_pass_; + } + const Framebuffer* last_update_framebuffer() const { + return last_update_framebuffer_; + } + + // Using R16G16[B16A16]_SNORM, which are -1...1, not the needed -32...32. + // Persistent data doesn't depend on this, so can be overriden by per-game + // configuration. + bool IsFixedRG16TruncatedToMinus1To1() const { + // TODO(Triang3l): Not float16 condition. + return GetPath() == Path::kHostRenderTargets && + !cvars::snorm16_render_target_full_range; + } + bool IsFixedRGBA16TruncatedToMinus1To1() const { + // TODO(Triang3l): Not float16 condition. + return GetPath() == Path::kHostRenderTargets && + !cvars::snorm16_render_target_full_range; + } + + bool depth_unorm24_vulkan_format_supported() const { + return depth_unorm24_vulkan_format_supported_; + } + bool depth_float24_round() const { return depth_float24_round_; } + + bool msaa_2x_attachments_supported() const { + return msaa_2x_attachments_supported_; + } + bool msaa_2x_no_attachments_supported() const { + return msaa_2x_no_attachments_supported_; + } + bool IsMsaa2xSupported(bool subpass_has_attachments) const { + return subpass_has_attachments ? msaa_2x_attachments_supported_ + : msaa_2x_no_attachments_supported_; + } + + // Returns the render pass object, or VK_NULL_HANDLE if failed to create. + // A render pass managed by the render target cache may be ended and resumed + // at any time (to allow for things like copying and texture loading). + VkRenderPass GetRenderPass(RenderPassKey key); + + VkFormat GetDepthVulkanFormat(xenos::DepthRenderTargetFormat format) const; + VkFormat GetColorVulkanFormat(xenos::ColorRenderTargetFormat format) const; + VkFormat GetColorOwnershipTransferVulkanFormat( + xenos::ColorRenderTargetFormat format, + bool* is_integer_out = nullptr) const; + + protected: + uint32_t GetMaxRenderTargetWidth() const override; + uint32_t GetMaxRenderTargetHeight() const override; + + RenderTarget* CreateRenderTarget(RenderTargetKey key) override; + + bool IsHostDepthEncodingDifferent( + xenos::DepthRenderTargetFormat format) const override; + + private: + enum class EdramBufferUsage { + // There's no need for combined fragment and compute usages. + // With host render targets, the usual usage sequence is as follows: + // - Optionally compute writes - host depth copy storing for EDRAM range + // ownership transfers. + // - Optionally fragment reads - host depth copy storing for EDRAM range + // ownership transfers. + // - Compute writes - copying from host render targets during resolving. + // - Compute reads - writing to the shared memory during resolving. + // With the render backend implementation based on fragment shader + // interlocks, it's: + // - Fragment reads and writes - depth / stencil and color operations. + // - Compute reads - writing to the shared memory during resolving. + // So, fragment reads and compute reads normally don't follow each other, + // and there's no need to amortize the cost of a read > read barrier in an + // exceptional situation by using a wider barrier in the normal scenario. + + // Host depth copy storing. + kFragmentRead, + // Fragment shader interlock depth / stencil and color operations. + kFragmentReadWrite, + // Resolve - copying to the shared memory. + kComputeRead, + // Resolve - copying from host render targets. + kComputeWrite, + // Trace recording. + kTransferRead, + // Trace playback. + kTransferWrite, + }; + + enum class EdramBufferModificationStatus { + // The values are ordered by how strong the barrier conditions are. + // No uncommitted shader writes. + kUnmodified, + // Need to commit before the next fragment shader interlock usage with + // overlap. + kViaFragmentShaderInterlock, + // Need to commit before any next fragment shader interlock usage. + kViaUnordered, + }; + + enum ResolveCopyDescriptorSet : uint32_t { + // Never changes. + kResolveCopyDescriptorSetEdram, + // Shared memory or a region in it. + kResolveCopyDescriptorSetDest, + + kResolveCopyDescriptorSetCount, + }; + + struct ResolveCopyShaderCode { + const uint32_t* unscaled; + size_t unscaled_size_bytes; + const uint32_t* scaled; + size_t scaled_size_bytes; + }; + + static void GetEdramBufferUsageMasks(EdramBufferUsage usage, + VkPipelineStageFlags& stage_mask_out, + VkAccessFlags& access_mask_out); + void UseEdramBuffer(EdramBufferUsage new_usage); + void MarkEdramBufferModified( + EdramBufferModificationStatus modification_status = + EdramBufferModificationStatus::kViaUnordered); + void CommitEdramBufferShaderWrites( + EdramBufferModificationStatus commit_status = + EdramBufferModificationStatus::kViaFragmentShaderInterlock); + + VulkanCommandProcessor& command_processor_; + TraceWriter& trace_writer_; + + // Accessible in fragment and compute shaders. + VkDescriptorSetLayout descriptor_set_layout_storage_buffer_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_sampled_image_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_sampled_image_x2_ = + VK_NULL_HANDLE; + + std::unique_ptr + descriptor_set_pool_sampled_image_; + std::unique_ptr + descriptor_set_pool_sampled_image_x2_; + + VkDeviceMemory edram_buffer_memory_ = VK_NULL_HANDLE; + VkBuffer edram_buffer_ = VK_NULL_HANDLE; + EdramBufferUsage edram_buffer_usage_; + EdramBufferModificationStatus edram_buffer_modification_status_ = + EdramBufferModificationStatus::kUnmodified; + VkDescriptorPool edram_storage_buffer_descriptor_pool_ = VK_NULL_HANDLE; + VkDescriptorSet edram_storage_buffer_descriptor_set_; + + VkPipelineLayout resolve_copy_pipeline_layout_ = VK_NULL_HANDLE; + static const ResolveCopyShaderCode + kResolveCopyShaders[size_t(draw_util::ResolveCopyShaderIndex::kCount)]; + std::array + resolve_copy_pipelines_{}; + + // RenderPassKey::key -> VkRenderPass. + // VK_NULL_HANDLE if failed to create. + std::unordered_map render_passes_; + + // For host render targets. + + // Can only be destroyed when framebuffers referencing it are destroyed! + class VulkanRenderTarget final : public RenderTarget { + public: + static constexpr VkPipelineStageFlags kColorDrawStageMask = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + static constexpr VkAccessFlags kColorDrawAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + static constexpr VkImageLayout kColorDrawLayout = + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + static constexpr VkPipelineStageFlags kDepthDrawStageMask = + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + static constexpr VkAccessFlags kDepthDrawAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + static constexpr VkImageLayout kDepthDrawLayout = + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + // Takes ownership of the Vulkan objects passed to the constructor. + VulkanRenderTarget(RenderTargetKey key, + VulkanRenderTargetCache& render_target_cache, + VkImage image, VkDeviceMemory memory, + VkImageView view_depth_color, + VkImageView view_depth_stencil, VkImageView view_stencil, + VkImageView view_srgb, + VkImageView view_color_transfer_separate, + size_t descriptor_set_index_transfer_source) + : RenderTarget(key), + render_target_cache_(render_target_cache), + image_(image), + memory_(memory), + view_depth_color_(view_depth_color), + view_depth_stencil_(view_depth_stencil), + view_stencil_(view_stencil), + view_srgb_(view_srgb), + view_color_transfer_separate_(view_color_transfer_separate), + descriptor_set_index_transfer_source_( + descriptor_set_index_transfer_source) {} + ~VulkanRenderTarget(); + + VkImage image() const { return image_; } + + VkImageView view_depth_color() const { return view_depth_color_; } + VkImageView view_depth_stencil() const { return view_depth_stencil_; } + VkImageView view_color_transfer_separate() const { + return view_color_transfer_separate_; + } + VkImageView view_color_transfer() const { + return view_color_transfer_separate_ != VK_NULL_HANDLE + ? view_color_transfer_separate_ + : view_depth_color_; + } + VkDescriptorSet GetDescriptorSetTransferSource() const { + ui::vulkan::SingleLayoutDescriptorSetPool& descriptor_set_pool = + key().is_depth + ? *render_target_cache_.descriptor_set_pool_sampled_image_x2_ + : *render_target_cache_.descriptor_set_pool_sampled_image_; + return descriptor_set_pool.Get(descriptor_set_index_transfer_source_); + } + + static void GetDrawUsage(bool is_depth, + VkPipelineStageFlags* stage_mask_out, + VkAccessFlags* access_mask_out, + VkImageLayout* layout_out) { + if (stage_mask_out) { + *stage_mask_out = is_depth ? kDepthDrawStageMask : kColorDrawStageMask; + } + if (access_mask_out) { + *access_mask_out = + is_depth ? kDepthDrawAccessMask : kColorDrawAccessMask; + } + if (layout_out) { + *layout_out = is_depth ? kDepthDrawLayout : kColorDrawLayout; + } + } + void GetDrawUsage(VkPipelineStageFlags* stage_mask_out, + VkAccessFlags* access_mask_out, + VkImageLayout* layout_out) const { + GetDrawUsage(key().is_depth, stage_mask_out, access_mask_out, layout_out); + } + VkPipelineStageFlags current_stage_mask() const { + return current_stage_mask_; + } + VkAccessFlags current_access_mask() const { return current_access_mask_; } + VkImageLayout current_layout() const { return current_layout_; } + void SetUsage(VkPipelineStageFlags stage_mask, VkAccessFlags access_mask, + VkImageLayout layout) { + current_stage_mask_ = stage_mask; + current_access_mask_ = access_mask; + current_layout_ = layout; + } + + uint32_t temporary_sort_index() const { return temporary_sort_index_; } + void SetTemporarySortIndex(uint32_t index) { + temporary_sort_index_ = index; + } + + private: + VulkanRenderTargetCache& render_target_cache_; + + VkImage image_; + VkDeviceMemory memory_; + + // TODO(Triang3l): Per-format drawing views for mutable formats with EDRAM + // aliasing without transfers. + VkImageView view_depth_color_; + // Optional views. + VkImageView view_depth_stencil_; + VkImageView view_stencil_; + VkImageView view_srgb_; + VkImageView view_color_transfer_separate_; + + // 2 sampled images for depth / stencil, 1 sampled image for color. + size_t descriptor_set_index_transfer_source_; + + VkPipelineStageFlags current_stage_mask_ = 0; + VkAccessFlags current_access_mask_ = 0; + VkImageLayout current_layout_ = VK_IMAGE_LAYOUT_UNDEFINED; + + // Temporary storage for indices in operations like transfers and dumps. + uint32_t temporary_sort_index_ = 0; + }; + + struct FramebufferKey { + RenderPassKey render_pass_key; + + // Same as RenderTargetKey::pitch_tiles_at_32bpp. + uint32_t pitch_tiles_at_32bpp : 8; // 8 + // [0, 2047]. + uint32_t depth_base_tiles : xenos::kEdramBaseTilesBits - 1; // 19 + uint32_t color_0_base_tiles : xenos::kEdramBaseTilesBits - 1; // 30 + + uint32_t color_1_base_tiles : xenos::kEdramBaseTilesBits - 1; // 43 + uint32_t color_2_base_tiles : xenos::kEdramBaseTilesBits - 1; // 54 + + uint32_t color_3_base_tiles : xenos::kEdramBaseTilesBits - 1; // 75 + + // Including all the padding, for a stable hash. + FramebufferKey() { Reset(); } + FramebufferKey(const FramebufferKey& key) { + std::memcpy(this, &key, sizeof(*this)); + } + FramebufferKey& operator=(const FramebufferKey& key) { + std::memcpy(this, &key, sizeof(*this)); + return *this; + } + bool operator==(const FramebufferKey& key) const { + return std::memcmp(this, &key, sizeof(*this)) == 0; + } + using Hasher = xe::hash::XXHasher; + void Reset() { std::memset(this, 0, sizeof(*this)); } + }; + + enum TransferUsedDescriptorSet : uint32_t { + // Ordered from the least to the most frequently changed. + kTransferUsedDescriptorSetHostDepthBuffer, + kTransferUsedDescriptorSetHostDepthStencilTextures, + kTransferUsedDescriptorSetDepthStencilTextures, + // Mutually exclusive with kTransferUsedDescriptorSetDepthStencilTextures. + kTransferUsedDescriptorSetColorTexture, + + kTransferUsedDescriptorSetCount, + + kTransferUsedDescriptorSetHostDepthBufferBit = + uint32_t(1) << kTransferUsedDescriptorSetHostDepthBuffer, + kTransferUsedDescriptorSetHostDepthStencilTexturesBit = + uint32_t(1) << kTransferUsedDescriptorSetHostDepthStencilTextures, + kTransferUsedDescriptorSetDepthStencilTexturesBit = + uint32_t(1) << kTransferUsedDescriptorSetDepthStencilTextures, + kTransferUsedDescriptorSetColorTextureBit = + uint32_t(1) << kTransferUsedDescriptorSetColorTexture, + }; + + // 32-bit push constants (for simplicity of size calculation and to avoid + // std140 packing issues). + enum TransferUsedPushConstantDword : uint32_t { + kTransferUsedPushConstantDwordHostDepthAddress, + kTransferUsedPushConstantDwordAddress, + // Changed 8 times per transfer. + kTransferUsedPushConstantDwordStencilMask, + + kTransferUsedPushConstantDwordCount, + + kTransferUsedPushConstantDwordHostDepthAddressBit = + uint32_t(1) << kTransferUsedPushConstantDwordHostDepthAddress, + kTransferUsedPushConstantDwordAddressBit = + uint32_t(1) << kTransferUsedPushConstantDwordAddress, + kTransferUsedPushConstantDwordStencilMaskBit = + uint32_t(1) << kTransferUsedPushConstantDwordStencilMask, + }; + + enum class TransferPipelineLayoutIndex { + kColor, + kDepth, + kColorToStencilBit, + kDepthToStencilBit, + kColorAndHostDepthTexture, + kColorAndHostDepthBuffer, + kDepthAndHostDepthTexture, + kDepthAndHostDepthBuffer, + + kCount, + }; + + struct TransferPipelineLayoutInfo { + uint32_t used_descriptor_sets; + uint32_t used_push_constant_dwords; + }; + + static const TransferPipelineLayoutInfo + kTransferPipelineLayoutInfos[size_t(TransferPipelineLayoutIndex::kCount)]; + + enum class TransferMode : uint32_t { + kColorToDepth, + kColorToColor, + + kDepthToDepth, + kDepthToColor, + + kColorToStencilBit, + kDepthToStencilBit, + + // Two-source modes, using the host depth if it, when converted to the guest + // format, matches what's in the owner source (not modified, keep host + // precision), or the guest data otherwise (significantly modified, possibly + // cleared). Stencil for FragStencilRef is always taken from the guest + // source. + + kColorAndHostDepthToDepth, + // When using different source and destination depth formats. + kDepthAndHostDepthToDepth, + + // If host depth is fetched, but it's the same image as the destination, + // it's copied to the EDRAM buffer (but since it's just a scratch buffer, + // with tiles laid out linearly with the same pitch as in the original + // render target; also no swapping of 40-sample columns as opposed to the + // host render target - this is done only for the color source) and fetched + // from there instead of the host depth texture. + kColorAndHostDepthCopyToDepth, + kDepthAndHostDepthCopyToDepth, + + kCount, + }; + + enum class TransferOutput { + kColor, + kDepth, + kStencilBit, + }; + + struct TransferModeInfo { + TransferOutput output; + TransferPipelineLayoutIndex pipeline_layout; + }; + + static const TransferModeInfo kTransferModes[size_t(TransferMode::kCount)]; + + union TransferShaderKey { + uint32_t key; + struct { + xenos::MsaaSamples dest_msaa_samples : xenos::kMsaaSamplesBits; + uint32_t dest_color_rt_index : xenos::kColorRenderTargetIndexBits; + uint32_t dest_resource_format : xenos::kRenderTargetFormatBits; + xenos::MsaaSamples source_msaa_samples : xenos::kMsaaSamplesBits; + // Always 1x when the host depth is a copy from a buffer rather than an + // image, not to create the same pipeline for different MSAA sample counts + // as it doesn't matter in this case. + xenos::MsaaSamples host_depth_source_msaa_samples + : xenos::kMsaaSamplesBits; + uint32_t source_resource_format : xenos::kRenderTargetFormatBits; + + // Last bits because this affects the pipeline layout - after sorting, + // only change it as fewer times as possible. Depth buffers have an + // additional stencil texture. + static_assert(size_t(TransferMode::kCount) <= (size_t(1) << 4)); + TransferMode mode : 4; + }; + + TransferShaderKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const TransferShaderKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const TransferShaderKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const TransferShaderKey& other_key) const { + return !(*this == other_key); + } + bool operator<(const TransferShaderKey& other_key) const { + return key < other_key.key; + } + }; + + struct TransferPipelineKey { + RenderPassKey render_pass_key; + TransferShaderKey shader_key; + + TransferPipelineKey(RenderPassKey render_pass_key, + TransferShaderKey shader_key) + : render_pass_key(render_pass_key), shader_key(shader_key) {} + + struct Hasher { + size_t operator()(const TransferPipelineKey& key) const { + XXH3_state_t hash_state; + XXH3_64bits_reset(&hash_state); + XXH3_64bits_update(&hash_state, &key.render_pass_key, + sizeof(key.render_pass_key)); + XXH3_64bits_update(&hash_state, &key.shader_key, + sizeof(key.shader_key)); + return static_cast(XXH3_64bits_digest(&hash_state)); + } + }; + bool operator==(const TransferPipelineKey& other_key) const { + return render_pass_key == other_key.render_pass_key && + shader_key == other_key.shader_key; + } + bool operator!=(const TransferPipelineKey& other_key) const { + return !(*this == other_key); + } + bool operator<(const TransferPipelineKey& other_key) const { + if (render_pass_key != other_key.render_pass_key) { + return render_pass_key < other_key.render_pass_key; + } + return shader_key < other_key.shader_key; + } + }; + + union TransferAddressConstant { + uint32_t constant; + struct { + // All in tiles. + uint32_t dest_pitch : xenos::kEdramPitchTilesBits; + uint32_t source_pitch : xenos::kEdramPitchTilesBits; + // Safe to use 12 bits for signed difference - no ownership transfer can + // ever occur between render targets with EDRAM base >= 2048 as this would + // result in 0-length spans. 10 + 10 + 12 is exactly 32, any more bits, + // and more root 32-bit constants will be used. + // Destination base in tiles minus source base in tiles (not vice versa + // because this is a transform of the coordinate system, not addresses + // themselves). + // 0 for host_depth_source_is_copy (ignored in this case anyway as + // destination == source anyway). + int32_t source_to_dest : xenos::kEdramBaseTilesBits; + }; + TransferAddressConstant() : constant(0) { + static_assert_size(*this, sizeof(constant)); + } + bool operator==(const TransferAddressConstant& other_constant) const { + return constant == other_constant.constant; + } + bool operator!=(const TransferAddressConstant& other_constant) const { + return !(*this == other_constant); + } + }; + + struct TransferInvocation { + Transfer transfer; + TransferShaderKey shader_key; + TransferInvocation(const Transfer& transfer, + const TransferShaderKey& shader_key) + : transfer(transfer), shader_key(shader_key) {} + bool operator<(const TransferInvocation& other_invocation) { + // TODO(Triang3l): See if it may be better to sort by the source in the + // first place, especially when reading the same data multiple times (like + // to write the stencil bits after depth) for better read locality. + // Sort by the shader key primarily to reduce pipeline state (context) + // switches. + if (shader_key != other_invocation.shader_key) { + return shader_key < other_invocation.shader_key; + } + // Host depth render targets are changed rarely if they exist, won't save + // many binding changes, ignore them for simplicity (their existence is + // caught by the shader key change). + assert_not_null(transfer.source); + assert_not_null(other_invocation.transfer.source); + uint32_t source_index = + static_cast(transfer.source) + ->temporary_sort_index(); + uint32_t other_source_index = static_cast( + other_invocation.transfer.source) + ->temporary_sort_index(); + if (source_index != other_source_index) { + return source_index < other_source_index; + } + return transfer.start_tiles < other_invocation.transfer.start_tiles; + } + bool CanBeMergedIntoOneDraw( + const TransferInvocation& other_invocation) const { + return shader_key == other_invocation.shader_key && + transfer.AreSourcesSame(other_invocation.transfer); + } + }; + + union DumpPipelineKey { + uint32_t key; + struct { + xenos::MsaaSamples msaa_samples : 2; + uint32_t resource_format : 4; + // Last bit because this affects the pipeline - after sorting, only change + // it at most once. Depth buffers have an additional stencil SRV. + uint32_t is_depth : 1; + }; + + DumpPipelineKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const DumpPipelineKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const DumpPipelineKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const DumpPipelineKey& other_key) const { + return !(*this == other_key); + } + bool operator<(const DumpPipelineKey& other_key) const { + return key < other_key.key; + } + + xenos::ColorRenderTargetFormat GetColorFormat() const { + assert_false(is_depth); + return xenos::ColorRenderTargetFormat(resource_format); + } + xenos::DepthRenderTargetFormat GetDepthFormat() const { + assert_true(is_depth); + return xenos::DepthRenderTargetFormat(resource_format); + } + }; + + // There's no strict dependency on the group size in dumping, for simplicity + // calculations especially with resolution scaling, dividing manually (as the + // group size is not unlimited). The only restriction is that an integer + // multiple of it must be 80x16 samples (and no larger than that) for 32bpp, + // or 40x16 samples for 64bpp (because only a half of the pair of tiles may + // need to be dumped). Using 8x16 since that's 128 - the minimum required + // group size on Vulkan, and the maximum number of lanes in a subgroup on + // Vulkan. + static constexpr uint32_t kDumpSamplesPerGroupX = 8; + static constexpr uint32_t kDumpSamplesPerGroupY = 16; + + union DumpPitches { + uint32_t pitches; + struct { + // Both in tiles. + uint32_t dest_pitch : xenos::kEdramPitchTilesBits; + uint32_t source_pitch : xenos::kEdramPitchTilesBits; + }; + DumpPitches() : pitches(0) { static_assert_size(*this, sizeof(pitches)); } + bool operator==(const DumpPitches& other_pitches) const { + return pitches == other_pitches.pitches; + } + bool operator!=(const DumpPitches& other_pitches) const { + return !(*this == other_pitches); + } + }; + + union DumpOffsets { + uint32_t offsets; + struct { + uint32_t dispatch_first_tile : xenos::kEdramBaseTilesBits; + uint32_t source_base_tiles : xenos::kEdramBaseTilesBits; + }; + DumpOffsets() : offsets(0) { static_assert_size(*this, sizeof(offsets)); } + bool operator==(const DumpOffsets& other_offsets) const { + return offsets == other_offsets.offsets; + } + bool operator!=(const DumpOffsets& other_offsets) const { + return !(*this == other_offsets); + } + }; + + enum DumpDescriptorSet : uint32_t { + // Never changes. Same in both color and depth pipeline layouts, keep the + // first for pipeline layout compatibility, to only have to set it once. + kDumpDescriptorSetEdram, + // One resolve may need multiple sources. Different descriptor set layouts + // for color and depth. + kDumpDescriptorSetSource, + + kDumpDescriptorSetCount, + }; + + enum DumpPushConstant : uint32_t { + // May be different for different sources. + kDumpPushConstantPitches, + // May be changed multiple times for the same source. + kDumpPushConstantOffsets, + + kDumpPushConstantCount, + }; + + struct DumpInvocation { + ResolveCopyDumpRectangle rectangle; + DumpPipelineKey pipeline_key; + DumpInvocation(const ResolveCopyDumpRectangle& rectangle, + const DumpPipelineKey& pipeline_key) + : rectangle(rectangle), pipeline_key(pipeline_key) {} + bool operator<(const DumpInvocation& other_invocation) { + // Sort by the pipeline key primarily to reduce pipeline state (context) + // switches. + if (pipeline_key != other_invocation.pipeline_key) { + return pipeline_key < other_invocation.pipeline_key; + } + assert_not_null(rectangle.render_target); + uint32_t render_target_index = + static_cast(rectangle.render_target) + ->temporary_sort_index(); + const ResolveCopyDumpRectangle& other_rectangle = + other_invocation.rectangle; + uint32_t other_render_target_index = + static_cast(other_rectangle.render_target) + ->temporary_sort_index(); + if (render_target_index != other_render_target_index) { + return render_target_index < other_render_target_index; + } + if (rectangle.row_first != other_rectangle.row_first) { + return rectangle.row_first < other_rectangle.row_first; + } + return rectangle.row_first_start < other_rectangle.row_first_start; + } + }; + + // Returns the framebuffer object, or VK_NULL_HANDLE if failed to create. + const Framebuffer* GetFramebuffer( + RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp, + const RenderTarget* const* depth_and_color_render_targets); + + VkShaderModule GetTransferShader(TransferShaderKey key); + // With sample-rate shading, returns a pointer to one pipeline. Without + // sample-rate shading, returns a pointer to as many pipelines as there are + // samples. If there was a failure to create a pipeline, returns nullptr. + VkPipeline const* GetTransferPipelines(TransferPipelineKey key); + + // Do ownership transfers for render targets - each render target / vector may + // be null / empty in case there's nothing to do for them. + // resolve_clear_rectangle is expected to be provided by + // PrepareHostRenderTargetsResolveClear which should do all the needed size + // bound checks. + void PerformTransfersAndResolveClears( + uint32_t render_target_count, RenderTarget* const* render_targets, + const std::vector* render_target_transfers, + const uint64_t* render_target_resolve_clear_values = nullptr, + const Transfer::Rectangle* resolve_clear_rectangle = nullptr); + + VkPipeline GetDumpPipeline(DumpPipelineKey key); + + // Writes contents of host render targets within rectangles from + // ResolveInfo::GetCopyEdramTileSpan to edram_buffer_. + void DumpRenderTargets(uint32_t dump_base, uint32_t dump_row_length_used, + uint32_t dump_rows, uint32_t dump_pitch); + + bool gamma_render_target_as_srgb_ = false; + + bool depth_unorm24_vulkan_format_supported_ = false; + bool depth_float24_round_ = false; + + bool msaa_2x_attachments_supported_ = false; + bool msaa_2x_no_attachments_supported_ = false; + + std::unordered_map + framebuffers_; + + RenderPassKey last_update_render_pass_key_; + VkRenderPass last_update_render_pass_ = VK_NULL_HANDLE; + uint32_t last_update_framebuffer_pitch_tiles_at_32bpp_ = 0; + const RenderTarget* const* + last_update_framebuffer_attachments_[1 + xenos::kMaxColorRenderTargets] = + {}; + const Framebuffer* last_update_framebuffer_ = VK_NULL_HANDLE; + + // Set 0 - EDRAM storage buffer, set 1 - source depth sampled image (and + // unused stencil from the transfer descriptor set), HostDepthStoreConstants + // passed via push constants. + VkPipelineLayout host_depth_store_pipeline_layout_ = VK_NULL_HANDLE; + VkPipeline host_depth_store_pipelines_[size_t(xenos::MsaaSamples::k4X) + 1] = + {}; + + std::unique_ptr + transfer_vertex_buffer_pool_; + VkShaderModule transfer_passthrough_vertex_shader_ = VK_NULL_HANDLE; + VkPipelineLayout transfer_pipeline_layouts_[size_t( + TransferPipelineLayoutIndex::kCount)] = {}; + // VK_NULL_HANDLE if failed to create. + std::unordered_map + transfer_shaders_; + // With sample-rate shading, one pipeline per entry. Without sample-rate + // shading, one pipeline per sample per entry. VK_NULL_HANDLE if failed to + // create. + std::unordered_map, + TransferPipelineKey::Hasher> + transfer_pipelines_; + + VkPipelineLayout dump_pipeline_layout_color_ = VK_NULL_HANDLE; + VkPipelineLayout dump_pipeline_layout_depth_ = VK_NULL_HANDLE; + // Compute pipelines for copying host render target contents to the EDRAM + // buffer. VK_NULL_HANDLE if failed to create. + std::unordered_map + dump_pipelines_; + + // Temporary storage for Resolve. + std::vector clear_transfers_[2]; + + // Temporary storage for PerformTransfersAndResolveClears. + std::vector current_transfer_invocations_; + + // Temporary storage for DumpRenderTargets. + std::vector dump_rectangles_; + std::vector dump_invocations_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc index 535389ceb..1ff7734ff 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.cc +++ b/src/xenia/gpu/vulkan/vulkan_shader.cc @@ -2,24 +2,59 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/gpu/vulkan/vulkan_shader.h" -#include "third_party/fmt/include/fmt/format.h" -#include "xenia/base/assert.h" +#include + #include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/ui/vulkan/vulkan_util.h" +#include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { namespace gpu { namespace vulkan { -using xe::ui::vulkan::util::CheckResult; +VulkanShader::VulkanTranslation::~VulkanTranslation() { + if (shader_module_) { + const ui::vulkan::VulkanProvider& provider = + static_cast(shader()).provider_; + provider.dfn().vkDestroyShaderModule(provider.device(), shader_module_, + nullptr); + } +} + +VkShaderModule VulkanShader::VulkanTranslation::GetOrCreateShaderModule() { + if (!is_valid()) { + return VK_NULL_HANDLE; + } + if (shader_module_ != VK_NULL_HANDLE) { + return shader_module_; + } + const ui::vulkan::VulkanProvider& provider = + static_cast(shader()).provider_; + VkShaderModuleCreateInfo shader_module_create_info; + shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_module_create_info.pNext = nullptr; + shader_module_create_info.flags = 0; + shader_module_create_info.codeSize = translated_binary().size(); + shader_module_create_info.pCode = + reinterpret_cast(translated_binary().data()); + if (provider.dfn().vkCreateShaderModule(provider.device(), + &shader_module_create_info, nullptr, + &shader_module_) != VK_SUCCESS) { + XELOGE( + "VulkanShader::VulkanTranslation: Failed to create a Vulkan shader " + "module for shader {:016X} modification {:016X}", + shader().ucode_data_hash(), modification()); + MakeInvalid(); + return VK_NULL_HANDLE; + } + return shader_module_; +} VulkanShader::VulkanShader(const ui::vulkan::VulkanProvider& provider, xenos::ShaderType shader_type, @@ -27,60 +62,10 @@ VulkanShader::VulkanShader(const ui::vulkan::VulkanProvider& provider, const uint32_t* ucode_dwords, size_t ucode_dword_count, std::endian ucode_source_endian) - : Shader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count, - ucode_source_endian), + : SpirvShader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count, + ucode_source_endian), provider_(provider) {} -VulkanShader::VulkanTranslation::~VulkanTranslation() { - if (shader_module_) { - const ui::vulkan::VulkanProvider& provider = - static_cast(shader()).provider_; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkDevice device = provider.device(); - dfn.vkDestroyShaderModule(device, shader_module_, nullptr); - shader_module_ = nullptr; - } -} - -bool VulkanShader::VulkanTranslation::Prepare() { - assert_null(shader_module_); - assert_true(is_valid()); - - const VulkanShader& vulkan_shader = static_cast(shader()); - const ui::vulkan::VulkanProvider& provider = vulkan_shader.provider_; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkDevice device = provider.device(); - - // Create the shader module. - VkShaderModuleCreateInfo shader_info; - shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_info.pNext = nullptr; - shader_info.flags = 0; - shader_info.codeSize = translated_binary().size(); - shader_info.pCode = - reinterpret_cast(translated_binary().data()); - auto status = - dfn.vkCreateShaderModule(device, &shader_info, nullptr, &shader_module_); - CheckResult(status, "vkCreateShaderModule"); - - char type_char; - switch (vulkan_shader.type()) { - case xenos::ShaderType::kVertex: - type_char = 'v'; - break; - case xenos::ShaderType::kPixel: - type_char = 'p'; - break; - default: - type_char = 'u'; - } - provider.SetDeviceObjectName( - VK_OBJECT_TYPE_SHADER_MODULE, uint64_t(shader_module_), - fmt::format("S({}): {:016X}", type_char, vulkan_shader.ucode_data_hash()) - .c_str()); - return status == VK_SUCCESS; -} - Shader::Translation* VulkanShader::CreateTranslationInstance( uint64_t modification) { return new VulkanTranslation(*this, modification); diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h index d6515df30..7e78ac3b6 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.h +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,42 +10,68 @@ #ifndef XENIA_GPU_VULKAN_VULKAN_SHADER_H_ #define XENIA_GPU_VULKAN_VULKAN_SHADER_H_ -#include +#include -#include "xenia/gpu/shader.h" +#include "xenia/gpu/spirv_shader.h" +#include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { namespace gpu { namespace vulkan { -class VulkanShader : public Shader { +class VulkanShader : public SpirvShader { public: - class VulkanTranslation : public Translation { + class VulkanTranslation : public SpirvTranslation { public: - VulkanTranslation(VulkanShader& shader, uint64_t modification) - : Translation(shader, modification) {} + explicit VulkanTranslation(VulkanShader& shader, uint64_t modification) + : SpirvTranslation(shader, modification) {} ~VulkanTranslation() override; - bool Prepare(); - - // Available only if the translation is_valid and has been prepared. + VkShaderModule GetOrCreateShaderModule(); VkShaderModule shader_module() const { return shader_module_; } private: - VkShaderModule shader_module_ = nullptr; + VkShaderModule shader_module_ = VK_NULL_HANDLE; }; - VulkanShader(const ui::vulkan::VulkanProvider& provider, - xenos::ShaderType shader_type, uint64_t ucode_data_hash, - const uint32_t* ucode_dwords, size_t ucode_dword_count, - std::endian ucode_source_endian = std::endian::big); + explicit VulkanShader(const ui::vulkan::VulkanProvider& provider, + xenos::ShaderType shader_type, uint64_t ucode_data_hash, + const uint32_t* ucode_dwords, size_t ucode_dword_count, + std::endian ucode_source_endian = std::endian::big); + + // For owning subsystem like the pipeline cache, accessors for unique + // identifiers (used instead of hashes to make sure collisions can't happen) + // of binding layouts used by the shader, for invalidation if a shader with an + // incompatible layout has been bound. + size_t GetTextureBindingLayoutUserUID() const { + return texture_binding_layout_user_uid_; + } + size_t GetSamplerBindingLayoutUserUID() const { + return sampler_binding_layout_user_uid_; + } + // Modifications of the same shader can be translated on different threads. + // The "set" function must only be called if "enter" returned true - these are + // set up only once. + bool EnterBindingLayoutUserUIDSetup() { + return !binding_layout_user_uids_set_up_.test_and_set(); + } + void SetTextureBindingLayoutUserUID(size_t uid) { + texture_binding_layout_user_uid_ = uid; + } + void SetSamplerBindingLayoutUserUID(size_t uid) { + sampler_binding_layout_user_uid_ = uid; + } protected: Translation* CreateTranslationInstance(uint64_t modification) override; private: const ui::vulkan::VulkanProvider& provider_; + + std::atomic_flag binding_layout_user_uids_set_up_ = ATOMIC_FLAG_INIT; + size_t texture_binding_layout_user_uid_ = 0; + size_t sampler_binding_layout_user_uid_ = 0; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc new file mode 100644 index 000000000..c321b9840 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -0,0 +1,499 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" + +#include +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/cvar.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +DEFINE_bool(vulkan_sparse_shared_memory, true, + "Enable sparse binding for shared memory emulation. Disabling it " + "increases video memory usage - a 512 MB buffer is created - but " + "allows graphics debuggers that don't support sparse binding to " + "work.", + "Vulkan"); + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanSharedMemory::VulkanSharedMemory( + VulkanCommandProcessor& command_processor, Memory& memory, + TraceWriter& trace_writer, + VkPipelineStageFlags guest_shader_pipeline_stages) + : SharedMemory(memory), + command_processor_(command_processor), + trace_writer_(trace_writer), + guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {} + +VulkanSharedMemory::~VulkanSharedMemory() { Shutdown(true); } + +bool VulkanSharedMemory::Initialize() { + InitializeCommon(); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + const VkBufferCreateFlags sparse_flags = + VK_BUFFER_CREATE_SPARSE_BINDING_BIT | + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; + + // Try to create a sparse buffer. + VkBufferCreateInfo buffer_create_info; + buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_create_info.pNext = nullptr; + buffer_create_info.flags = sparse_flags; + buffer_create_info.size = kBufferSize; + buffer_create_info.usage = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_create_info.queueFamilyIndexCount = 0; + buffer_create_info.pQueueFamilyIndices = nullptr; + if (cvars::vulkan_sparse_shared_memory && + provider.IsSparseBindingSupported() && + device_features.sparseResidencyBuffer) { + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) == + VK_SUCCESS) { + VkMemoryRequirements buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer_, + &buffer_memory_requirements); + if (xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &buffer_memory_type_)) { + uint32_t allocation_size_log2; + xe::bit_scan_forward( + std::max(uint64_t(buffer_memory_requirements.alignment), + uint64_t(1)), + &allocation_size_log2); + if (allocation_size_log2 < kBufferSizeLog2) { + // Maximum of 1024 allocations in the worst case for all of the + // buffer because of the overall 4096 allocation count limit on + // Windows drivers. + InitializeSparseHostGpuMemory( + std::max(allocation_size_log2, + std::max(kHostGpuMemoryOptimalSparseAllocationLog2, + kBufferSizeLog2 - uint32_t(10)))); + } else { + // Shouldn't happen on any real platform, but no point allocating the + // buffer sparsely. + dfn.vkDestroyBuffer(device, buffer_, nullptr); + buffer_ = VK_NULL_HANDLE; + } + } else { + XELOGE( + "Shared memory: Failed to get a device-local Vulkan memory type " + "for the sparse buffer"); + dfn.vkDestroyBuffer(device, buffer_, nullptr); + buffer_ = VK_NULL_HANDLE; + } + } else { + XELOGE("Shared memory: Failed to create the {} MB Vulkan sparse buffer", + kBufferSize >> 20); + } + } + + // Create a non-sparse buffer if there were issues with the sparse buffer. + if (buffer_ == VK_NULL_HANDLE) { + XELOGGPU( + "Vulkan sparse binding is not used for shared memory emulation - video " + "memory usage may increase significantly because a full {} MB buffer " + "will be created", + kBufferSize >> 20); + buffer_create_info.flags &= ~sparse_flags; + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) != + VK_SUCCESS) { + XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer", + kBufferSize >> 20); + Shutdown(); + return false; + } + VkMemoryRequirements buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer_, + &buffer_memory_requirements); + if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &buffer_memory_type_)) { + XELOGE( + "Shared memory: Failed to get a device-local Vulkan memory type for " + "the buffer"); + Shutdown(); + return false; + } + VkMemoryAllocateInfo buffer_memory_allocate_info; + VkMemoryAllocateInfo* buffer_memory_allocate_info_last = + &buffer_memory_allocate_info; + buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + buffer_memory_allocate_info.pNext = nullptr; + buffer_memory_allocate_info.allocationSize = + buffer_memory_requirements.size; + buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_; + VkMemoryDedicatedAllocateInfoKHR buffer_memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + buffer_memory_allocate_info_last->pNext = + &buffer_memory_dedicated_allocate_info; + buffer_memory_allocate_info_last = + reinterpret_cast( + &buffer_memory_dedicated_allocate_info); + buffer_memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + buffer_memory_dedicated_allocate_info.pNext = nullptr; + buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE; + buffer_memory_dedicated_allocate_info.buffer = buffer_; + } + VkDeviceMemory buffer_memory; + if (dfn.vkAllocateMemory(device, &buffer_memory_allocate_info, nullptr, + &buffer_memory) != VK_SUCCESS) { + XELOGE( + "Shared memory: Failed to allocate {} MB of memory for the Vulkan " + "buffer", + kBufferSize >> 20); + Shutdown(); + return false; + } + buffer_memory_.push_back(buffer_memory); + if (dfn.vkBindBufferMemory(device, buffer_, buffer_memory, 0) != + VK_SUCCESS) { + XELOGE("Shared memory: Failed to bind memory to the Vulkan buffer"); + Shutdown(); + return false; + } + } + + // The first usage will likely be uploading. + last_usage_ = Usage::kTransferDestination; + last_written_range_ = std::make_pair(0, 0); + + upload_buffer_pool_ = std::make_unique( + provider, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + xe::align(ui::vulkan::VulkanUploadBufferPool::kDefaultPageSize, + size_t(1) << page_size_log2())); + + return true; +} + +void VulkanSharedMemory::Shutdown(bool from_destructor) { + ResetTraceDownload(); + + upload_buffer_pool_.reset(); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_); + for (VkDeviceMemory memory : buffer_memory_) { + dfn.vkFreeMemory(device, memory, nullptr); + } + buffer_memory_.clear(); + + // If calling from the destructor, the SharedMemory destructor will call + // ShutdownCommon. + if (!from_destructor) { + ShutdownCommon(); + } +} + +void VulkanSharedMemory::CompletedSubmissionUpdated() { + upload_buffer_pool_->Reclaim(command_processor_.GetCompletedSubmission()); +} + +void VulkanSharedMemory::EndSubmission() { upload_buffer_pool_->FlushWrites(); } + +void VulkanSharedMemory::Use(Usage usage, + std::pair written_range) { + written_range.first = std::min(written_range.first, kBufferSize); + written_range.second = + std::min(written_range.second, kBufferSize - written_range.first); + assert_true(usage != Usage::kRead || !written_range.second); + if (last_usage_ != usage || last_written_range_.second) { + VkPipelineStageFlags src_stage_mask, dst_stage_mask; + VkAccessFlags src_access_mask, dst_access_mask; + GetUsageMasks(last_usage_, src_stage_mask, src_access_mask); + GetUsageMasks(usage, dst_stage_mask, dst_access_mask); + VkDeviceSize offset, size; + if (last_usage_ == usage) { + // Committing the previous write, while not changing the access mask + // (passing false as whether to skip the barrier if no masks are changed + // for this reason). + offset = VkDeviceSize(last_written_range_.first); + size = VkDeviceSize(last_written_range_.second); + } else { + // Changing the stage and access mask - all preceding writes must be + // available not only to the source stage, but to the destination as well. + offset = 0; + size = VK_WHOLE_SIZE; + last_usage_ = usage; + } + command_processor_.PushBufferMemoryBarrier( + buffer_, offset, size, src_stage_mask, dst_stage_mask, src_access_mask, + dst_access_mask, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + false); + } + last_written_range_ = written_range; +} + +bool VulkanSharedMemory::InitializeTraceSubmitDownloads() { + ResetTraceDownload(); + PrepareForTraceDownload(); + uint32_t download_page_count = trace_download_page_count(); + if (!download_page_count) { + return false; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, download_page_count << page_size_log2(), + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + ui::vulkan::util::MemoryPurpose::kReadback, trace_download_buffer_, + trace_download_buffer_memory_)) { + XELOGE( + "Shared memory: Failed to create a {} KB GPU-written memory download " + "buffer for frame tracing", + download_page_count << page_size_log2() >> 10); + ResetTraceDownload(); + return false; + } + + Use(Usage::kRead); + command_processor_.SubmitBarriers(true); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + size_t download_range_count = trace_download_ranges().size(); + VkBufferCopy* download_regions = command_buffer.CmdCopyBufferEmplace( + buffer_, trace_download_buffer_, uint32_t(download_range_count)); + VkDeviceSize download_buffer_offset = 0; + for (size_t i = 0; i < download_range_count; ++i) { + VkBufferCopy& download_region = download_regions[i]; + const std::pair& download_range = + trace_download_ranges()[i]; + download_region.srcOffset = download_range.first; + download_region.dstOffset = download_buffer_offset; + download_region.size = download_range.second; + download_buffer_offset += download_range.second; + } + + command_processor_.PushBufferMemoryBarrier( + trace_download_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_HOST_READ_BIT); + + return true; +} + +void VulkanSharedMemory::InitializeTraceCompleteDownloads() { + if (!trace_download_buffer_memory_) { + return; + } + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + void* download_mapping; + if (dfn.vkMapMemory(device, trace_download_buffer_memory_, 0, VK_WHOLE_SIZE, + 0, &download_mapping) == VK_SUCCESS) { + uint32_t download_buffer_offset = 0; + for (const auto& download_range : trace_download_ranges()) { + trace_writer_.WriteMemoryRead( + download_range.first, download_range.second, + reinterpret_cast(download_mapping) + + download_buffer_offset); + } + dfn.vkUnmapMemory(device, trace_download_buffer_memory_); + } else { + XELOGE( + "Shared memory: Failed to map the GPU-written memory download buffer " + "for frame tracing"); + } + ResetTraceDownload(); +} + +bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange( + uint32_t offset_allocations, uint32_t length_allocations) { + if (!length_allocations) { + return true; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkMemoryAllocateInfo memory_allocate_info; + memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_allocate_info.pNext = nullptr; + memory_allocate_info.allocationSize = + length_allocations << host_gpu_memory_sparse_granularity_log2(); + memory_allocate_info.memoryTypeIndex = buffer_memory_type_; + VkDeviceMemory memory; + if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) != + VK_SUCCESS) { + XELOGE("Shared memory: Failed to allocate sparse buffer memory"); + return false; + } + buffer_memory_.push_back(memory); + + VkSparseMemoryBind bind; + bind.resourceOffset = offset_allocations + << host_gpu_memory_sparse_granularity_log2(); + bind.size = memory_allocate_info.allocationSize; + bind.memory = memory; + bind.memoryOffset = 0; + bind.flags = 0; + VkPipelineStageFlags bind_wait_stage_mask = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + if (provider.device_features().tessellationShader) { + bind_wait_stage_mask |= + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + } + command_processor_.SparseBindBuffer(buffer_, 1, &bind, bind_wait_stage_mask); + + return true; +} + +bool VulkanSharedMemory::UploadRanges( + const std::vector>& upload_page_ranges) { + if (upload_page_ranges.empty()) { + return true; + } + // upload_page_ranges are sorted, use them to determine the range for the + // ordering barrier. + Use(Usage::kTransferDestination, + std::make_pair( + upload_page_ranges.front().first << page_size_log2(), + (upload_page_ranges.back().first + upload_page_ranges.back().second - + upload_page_ranges.front().first) + << page_size_log2())); + command_processor_.SubmitBarriers(true); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + uint64_t submission_current = command_processor_.GetCurrentSubmission(); + bool successful = true; + upload_regions_.clear(); + VkBuffer upload_buffer_previous = VK_NULL_HANDLE; + for (auto upload_range : upload_page_ranges) { + uint32_t upload_range_start = upload_range.first; + uint32_t upload_range_length = upload_range.second; + trace_writer_.WriteMemoryRead(upload_range_start << page_size_log2(), + upload_range_length << page_size_log2()); + while (upload_range_length) { + VkBuffer upload_buffer; + VkDeviceSize upload_buffer_offset, upload_buffer_size; + uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial( + submission_current, upload_range_length << page_size_log2(), + size_t(1) << page_size_log2(), upload_buffer, upload_buffer_offset, + upload_buffer_size); + if (upload_buffer_mapping == nullptr) { + XELOGE("Shared memory: Failed to get a Vulkan upload buffer"); + successful = false; + break; + } + MakeRangeValid(upload_range_start << page_size_log2(), + uint32_t(upload_buffer_size), false, false); + std::memcpy( + upload_buffer_mapping, + memory().TranslatePhysical(upload_range_start << page_size_log2()), + upload_buffer_size); + if (upload_buffer_previous != upload_buffer && !upload_regions_.empty()) { + assert_true(upload_buffer_previous != VK_NULL_HANDLE); + command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_, + uint32_t(upload_regions_.size()), + upload_regions_.data()); + upload_regions_.clear(); + } + upload_buffer_previous = upload_buffer; + VkBufferCopy& upload_region = upload_regions_.emplace_back(); + upload_region.srcOffset = upload_buffer_offset; + upload_region.dstOffset = + VkDeviceSize(upload_range_start << page_size_log2()); + upload_region.size = upload_buffer_size; + uint32_t upload_buffer_pages = + uint32_t(upload_buffer_size >> page_size_log2()); + upload_range_start += upload_buffer_pages; + upload_range_length -= upload_buffer_pages; + } + if (!successful) { + break; + } + } + if (!upload_regions_.empty()) { + assert_true(upload_buffer_previous != VK_NULL_HANDLE); + command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_, + uint32_t(upload_regions_.size()), + upload_regions_.data()); + upload_regions_.clear(); + } + return successful; +} + +void VulkanSharedMemory::GetUsageMasks(Usage usage, + VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask) const { + switch (usage) { + case Usage::kComputeWrite: + stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask = VK_ACCESS_SHADER_READ_BIT; + return; + case Usage::kTransferDestination: + stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + return; + default: + break; + } + stage_mask = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | guest_shader_pipeline_stages_; + access_mask = VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_SHADER_READ_BIT; + switch (usage) { + case Usage::kRead: + stage_mask |= + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask |= VK_ACCESS_TRANSFER_READ_BIT; + break; + case Usage::kGuestDrawReadWrite: + access_mask |= VK_ACCESS_SHADER_WRITE_BIT; + break; + default: + assert_unhandled_case(usage); + } +} + +void VulkanSharedMemory::ResetTraceDownload() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + trace_download_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + trace_download_buffer_memory_); + ReleaseTraceDownloadRanges(); +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h new file mode 100644 index 000000000..14214a5d0 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h @@ -0,0 +1,97 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_ +#define XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_ + +#include +#include +#include +#include + +#include "xenia/gpu/shared_memory.h" +#include "xenia/gpu/trace_writer.h" +#include "xenia/memory.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class VulkanSharedMemory : public SharedMemory { + public: + VulkanSharedMemory(VulkanCommandProcessor& command_processor, Memory& memory, + TraceWriter& trace_writer, + VkPipelineStageFlags guest_shader_pipeline_stages); + ~VulkanSharedMemory() override; + + bool Initialize(); + void Shutdown(bool from_destructor = false); + + void CompletedSubmissionUpdated(); + void EndSubmission(); + + enum class Usage { + // Index buffer, vfetch, compute read, transfer source. + kRead, + // Index buffer, vfetch, memexport. + kGuestDrawReadWrite, + kComputeWrite, + kTransferDestination, + }; + // Inserts a pipeline barrier for the target usage, also ensuring consecutive + // read-write accesses are ordered with each other. + void Use(Usage usage, std::pair written_range = {}); + + VkBuffer buffer() const { return buffer_; } + + // Returns true if any downloads were submitted to the command processor. + bool InitializeTraceSubmitDownloads(); + void InitializeTraceCompleteDownloads(); + + protected: + bool AllocateSparseHostGpuMemoryRange(uint32_t offset_allocations, + uint32_t length_allocations) override; + + bool UploadRanges(const std::vector>& + upload_page_ranges) override; + + private: + void GetUsageMasks(Usage usage, VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask) const; + + VulkanCommandProcessor& command_processor_; + TraceWriter& trace_writer_; + VkPipelineStageFlags guest_shader_pipeline_stages_; + + VkBuffer buffer_ = VK_NULL_HANDLE; + uint32_t buffer_memory_type_; + // Single for non-sparse, every allocation so far for sparse. + std::vector buffer_memory_; + + Usage last_usage_; + std::pair last_written_range_; + + std::unique_ptr upload_buffer_pool_; + std::vector upload_regions_; + + // Created temporarily, only for downloading. + VkBuffer trace_download_buffer_ = VK_NULL_HANDLE; + VkDeviceMemory trace_download_buffer_memory_ = VK_NULL_HANDLE; + void ResetTraceDownload(); +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index 389749f3b..580696a30 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,1671 +10,2655 @@ #include "xenia/gpu/vulkan/vulkan_texture_cache.h" #include +#include +#include -#include "third_party/fmt/include/fmt/format.h" -#include "xenia/base/literals.h" +#include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" -#include "xenia/base/memory.h" #include "xenia/base/profiling.h" -#include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/sampler_info.h" -#include "xenia/gpu/texture_conversion.h" #include "xenia/gpu/texture_info.h" -#include "xenia/gpu/vulkan/texture_config.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" +#include "xenia/gpu/texture_util.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/ui/vulkan/vulkan_mem_alloc.h" #include "xenia/ui/vulkan/vulkan_util.h" -DECLARE_bool(texture_dump); - namespace xe { namespace gpu { - -void TextureDump(const TextureInfo& src, void* buffer, size_t length); - namespace vulkan { -using xe::ui::vulkan::util::CheckResult; - -using namespace xe::literals; - -constexpr uint32_t kMaxTextureSamplers = 32; -constexpr VkDeviceSize kStagingBufferSize = 64_MiB; - -const char* get_dimension_name(xenos::DataDimension dimension) { - static const char* names[] = { - "1D", - "2D", - "3D", - "cube", - }; - auto value = static_cast(dimension); - if (value < xe::countof(names)) { - return names[value]; - } - return "unknown"; -} - -VulkanTextureCache::VulkanTextureCache(Memory* memory, - RegisterFile* register_file, - TraceWriter* trace_writer, - ui::vulkan::VulkanProvider& provider) - : memory_(memory), - register_file_(register_file), - trace_writer_(trace_writer), - provider_(provider), - staging_buffer_(provider, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - kStagingBufferSize), - wb_staging_buffer_(provider, VK_BUFFER_USAGE_TRANSFER_DST_BIT, - kStagingBufferSize) {} - -VulkanTextureCache::~VulkanTextureCache() { Shutdown(); } - -VkResult VulkanTextureCache::Initialize() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - - // Descriptor pool used for all of our cached descriptors. - VkDescriptorPoolSize pool_sizes[1]; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - pool_sizes[0].descriptorCount = 32768; - descriptor_pool_ = std::make_unique( - provider_, 32768, - std::vector(pool_sizes, std::end(pool_sizes))); - - wb_command_pool_ = std::make_unique( - provider_, provider_.queue_family_graphics_compute()); - - // Check some device limits - // On low sampler counts: Rarely would we experience over 16 unique samplers. - // This code could be refactored to scale up/down to the # of samplers. - auto& limits = provider_.device_properties().limits; - if (limits.maxPerStageDescriptorSamplers < kMaxTextureSamplers || - limits.maxPerStageDescriptorSampledImages < kMaxTextureSamplers) { - XELOGE( - "Physical device is unable to support required number of sampled " - "images! Expect instability! (maxPerStageDescriptorSamplers={}, " - "maxPerStageDescriptorSampledImages={})", - limits.maxPerStageDescriptorSamplers, - limits.maxPerStageDescriptorSampledImages); - // assert_always(); - } - - // Create the descriptor set layout used for rendering. - // We always have the same number of samplers but only some are used. - // The shaders will alias the bindings to the 4 dimensional types. - VkDescriptorSetLayoutBinding bindings[1]; - bindings[0].binding = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[0].descriptorCount = kMaxTextureSamplers; - bindings[0].stageFlags = - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[0].pImmutableSamplers = nullptr; - - VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info; - descriptor_set_layout_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_set_layout_info.pNext = nullptr; - descriptor_set_layout_info.flags = 0; - descriptor_set_layout_info.bindingCount = - static_cast(xe::countof(bindings)); - descriptor_set_layout_info.pBindings = bindings; - status = - dfn.vkCreateDescriptorSetLayout(device, &descriptor_set_layout_info, - nullptr, &texture_descriptor_set_layout_); - if (status != VK_SUCCESS) { - return status; - } - - status = staging_buffer_.Initialize(); - if (status != VK_SUCCESS) { - return status; - } - - status = wb_staging_buffer_.Initialize(); - if (status != VK_SUCCESS) { - return status; - } - - // Create a memory allocator for textures. - VmaVulkanFunctions vulkan_funcs = {}; - ui::vulkan::FillVMAVulkanFunctions(&vulkan_funcs, provider_); - - VmaAllocatorCreateInfo alloc_info = {}; - alloc_info.physicalDevice = provider_.physical_device(); - alloc_info.device = device; - alloc_info.pVulkanFunctions = &vulkan_funcs; - alloc_info.instance = provider_.instance(); - status = vmaCreateAllocator(&alloc_info, &mem_allocator_); - if (status != VK_SUCCESS) { - dfn.vkDestroyDescriptorSetLayout(device, texture_descriptor_set_layout_, - nullptr); - return status; - } - - invalidated_textures_sets_[0].reserve(64); - invalidated_textures_sets_[1].reserve(64); - invalidated_textures_ = &invalidated_textures_sets_[0]; - - memory_invalidation_callback_handle_ = - memory_->RegisterPhysicalMemoryInvalidationCallback( - MemoryInvalidationCallbackThunk, this); - - return VK_SUCCESS; -} - -void VulkanTextureCache::Shutdown() { - if (memory_invalidation_callback_handle_ != nullptr) { - memory_->UnregisterPhysicalMemoryInvalidationCallback( - memory_invalidation_callback_handle_); - memory_invalidation_callback_handle_ = nullptr; - } - - // Free all textures allocated. - ClearCache(); - Scavenge(); - - if (mem_allocator_ != nullptr) { - vmaDestroyAllocator(mem_allocator_); - mem_allocator_ = nullptr; - } - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkDestroyDescriptorSetLayout(device, texture_descriptor_set_layout_, - nullptr); -} - -VulkanTextureCache::Texture* VulkanTextureCache::AllocateTexture( - const TextureInfo& texture_info, VkFormatFeatureFlags required_flags) { - auto format_info = texture_info.format_info(); - assert_not_null(format_info); - - auto& config = texture_configs[int(format_info->format)]; - VkFormat format = config.host_format; - if (format == VK_FORMAT_UNDEFINED) { - XELOGE( - "Texture Cache: Attempted to allocate texture format {}, which is " - "defined as VK_FORMAT_UNDEFINED!", - texture_info.format_info()->name); - return nullptr; - } - - bool is_cube = false; - // Create an image first. - VkImageCreateInfo image_info = {}; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.flags = 0; - - switch (texture_info.dimension) { - case xenos::DataDimension::k1D: - case xenos::DataDimension::k2DOrStacked: - if (!texture_info.is_stacked) { - image_info.imageType = VK_IMAGE_TYPE_2D; - } else { - image_info.imageType = VK_IMAGE_TYPE_3D; - image_info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; - } - break; - case xenos::DataDimension::k3D: - image_info.imageType = VK_IMAGE_TYPE_3D; - break; - case xenos::DataDimension::kCube: - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; - is_cube = true; - break; - default: - assert_unhandled_case(texture_info.dimension); - return nullptr; - } - - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_info.usage = - VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; - - const ui::vulkan::VulkanProvider::InstanceFunctions& ifn = provider_.ifn(); - - // Check the device limits for the format before we create it. - VkFormatProperties props; - ifn.vkGetPhysicalDeviceFormatProperties(provider_.physical_device(), format, - &props); - if ((props.optimalTilingFeatures & required_flags) != required_flags) { - // Texture needs conversion on upload to a native format. - XELOGE( - "Texture Cache: Invalid usage flag specified on format {} (0x{:X})\n\t" - "(requested: 0x{:X})", - texture_info.format_info()->name, uint32_t(format), - uint32_t(required_flags & ~props.optimalTilingFeatures)); - } - - if (texture_info.dimension != xenos::DataDimension::kCube && - props.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) { - // Add color attachment usage if it's supported. - image_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - } else if (props.optimalTilingFeatures & - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) { - // Add depth/stencil usage as well. - image_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - } - - if (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT) { - image_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; - } - - VkImageFormatProperties image_props; - ifn.vkGetPhysicalDeviceImageFormatProperties( - provider_.physical_device(), format, image_info.imageType, - image_info.tiling, image_info.usage, image_info.flags, &image_props); - - // TODO(DrChat): Actually check the image properties. - - image_info.format = format; - image_info.extent.width = texture_info.width + 1; - image_info.extent.height = texture_info.height + 1; - image_info.extent.depth = !is_cube ? 1 + texture_info.depth : 1; - image_info.mipLevels = texture_info.mip_min_level + texture_info.mip_levels(); - image_info.arrayLayers = !is_cube ? 1 : 1 + texture_info.depth; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - VkImage image; - - assert_true(image_props.maxExtent.width >= image_info.extent.width); - assert_true(image_props.maxExtent.height >= image_info.extent.height); - assert_true(image_props.maxExtent.depth >= image_info.extent.depth); - assert_true(image_props.maxMipLevels >= image_info.mipLevels); - assert_true(image_props.maxArrayLayers >= image_info.arrayLayers); - - VmaAllocation alloc; - VmaAllocationCreateInfo vma_create_info = { - 0, VMA_MEMORY_USAGE_GPU_ONLY, 0, 0, 0, nullptr, nullptr, - }; - VmaAllocationInfo vma_info = {}; - VkResult status = vmaCreateImage(mem_allocator_, &image_info, - &vma_create_info, &image, &alloc, &vma_info); - if (status != VK_SUCCESS) { - // Allocation failed. - return nullptr; - } - - auto texture = new Texture(); - texture->format = image_info.format; - texture->image = image; - texture->image_layout = image_info.initialLayout; - texture->alloc = alloc; - texture->alloc_info = vma_info; - texture->framebuffer = nullptr; - texture->usage_flags = image_info.usage; - texture->is_watched = false; - texture->texture_info = texture_info; - return texture; -} - -bool VulkanTextureCache::FreeTexture(Texture* texture) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - if (texture->in_flight_fence) { - VkResult status = dfn.vkGetFenceStatus(device, texture->in_flight_fence); - if (status != VK_SUCCESS && status != VK_ERROR_DEVICE_LOST) { - // Texture still in flight. - return false; - } - } - - if (texture->framebuffer) { - dfn.vkDestroyFramebuffer(device, texture->framebuffer, nullptr); - } - - for (auto it = texture->views.begin(); it != texture->views.end();) { - dfn.vkDestroyImageView(device, (*it)->view, nullptr); - it = texture->views.erase(it); - } - - { - global_critical_region_.Acquire(); - if (texture->is_watched) { - for (auto it = watched_textures_.begin(); - it != watched_textures_.end();) { - if (it->texture == texture) { - watched_textures_.erase(it); - break; - } - ++it; - } - texture->is_watched = false; - } - } - - vmaDestroyImage(mem_allocator_, texture->image, texture->alloc); - delete texture; - return true; -} - -void VulkanTextureCache::WatchTexture(Texture* texture) { - uint32_t address, size; - - { - global_critical_region_.Acquire(); - - assert_false(texture->is_watched); - - WatchedTexture watched_texture; - if (texture->texture_info.memory.base_address && - texture->texture_info.memory.base_size) { - watched_texture.is_mip = false; - address = texture->texture_info.memory.base_address; - size = texture->texture_info.memory.base_size; - } else if (texture->texture_info.memory.mip_address && - texture->texture_info.memory.mip_size) { - watched_texture.is_mip = true; - address = texture->texture_info.memory.mip_address; - size = texture->texture_info.memory.mip_size; - } else { - return; - } - watched_texture.texture = texture; - - // Fire any access watches that overlap this region. - for (auto it = watched_textures_.begin(); it != watched_textures_.end();) { - // Case 1: 2222222|222|11111111 - // Case 2: 1111111|222|22222222 - // Case 3: 1111111|222|11111111 (fragmentation) - // Case 4: 2222222|222|22222222 (complete overlap) - Texture* other_texture = it->texture; - uint32_t other_address, other_size; - if (it->is_mip) { - other_address = other_texture->texture_info.memory.mip_address; - other_size = other_texture->texture_info.memory.mip_size; - } else { - other_address = other_texture->texture_info.memory.base_address; - other_size = other_texture->texture_info.memory.base_size; - } - - bool hit = false; - if (address <= other_address && address + size > other_address) { - hit = true; - } else if (other_address <= address && - other_address + other_size > address) { - hit = true; - } else if (other_address <= address && - other_address + other_size > address + size) { - hit = true; - } else if (other_address >= address && - other_address + other_size < address + size) { - hit = true; - } - - if (hit) { - TextureTouched(other_texture); - it = watched_textures_.erase(it); - continue; - } - - ++it; - } - - watched_textures_.push_back(watched_texture); - texture->is_watched = true; - } - - memory_->EnablePhysicalMemoryAccessCallbacks(address, size, true, false); -} - -void VulkanTextureCache::TextureTouched(Texture* texture) { - if (texture->pending_invalidation) { - return; - } - { - auto global_lock = global_critical_region_.Acquire(); - assert_true(texture->is_watched); - texture->is_watched = false; - // Add to pending list so Scavenge will clean it up. - invalidated_textures_->insert(texture); - } - texture->pending_invalidation = true; -} - -std::pair VulkanTextureCache::MemoryInvalidationCallback( - uint32_t physical_address_start, uint32_t length, bool exact_range) { - global_critical_region_.Acquire(); - if (watched_textures_.empty()) { - return std::make_pair(0, UINT32_MAX); - } - // Get the texture within the range, or otherwise get the gap between two - // adjacent textures that can be safely unwatched. - uint32_t written_range_end = physical_address_start + length; - uint32_t previous_end = 0, next_start = UINT32_MAX; - for (auto it = watched_textures_.begin(); it != watched_textures_.end();) { - Texture* texture = it->texture; - uint32_t texture_address, texture_size; - if (it->is_mip) { - texture_address = texture->texture_info.memory.mip_address; - texture_size = texture->texture_info.memory.mip_size; - } else { - texture_address = texture->texture_info.memory.base_address; - texture_size = texture->texture_info.memory.base_size; - } - if (texture_address >= written_range_end) { - // Completely after the written range. - next_start = std::min(next_start, texture_address); - } else { - uint32_t texture_end = texture_address + texture_size; - if (texture_end <= physical_address_start) { - // Completely before the written range. - previous_end = std::max(previous_end, texture_end); - } else { - // Hit. - TextureTouched(texture); - it = watched_textures_.erase(it); - return std::make_pair(texture_address, texture_size); - } - } - ++it; - } - return std::make_pair(previous_end, next_start - previous_end); -} - -std::pair -VulkanTextureCache::MemoryInvalidationCallbackThunk( - void* context_ptr, uint32_t physical_address_start, uint32_t length, - bool exact_range) { - return reinterpret_cast(context_ptr) - ->MemoryInvalidationCallback(physical_address_start, length, exact_range); -} - -VulkanTextureCache::Texture* VulkanTextureCache::DemandResolveTexture( - const TextureInfo& texture_info) { - auto texture_hash = texture_info.hash(); - for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { - if (it->second->texture_info == texture_info) { - if (it->second->pending_invalidation) { - // This texture has been invalidated! - RemoveInvalidatedTextures(); - break; - } - - // Tell the trace writer to "cache" this memory (but not read it) - if (texture_info.memory.base_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.base_address, - texture_info.memory.base_size); - } - if (texture_info.memory.mip_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.mip_address, - texture_info.memory.mip_size); - } - - return it->second; - } - } - - VkFormatFeatureFlags required_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - if (texture_info.format == xenos::TextureFormat::k_24_8 || - texture_info.format == xenos::TextureFormat::k_24_8_FLOAT) { - required_flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; - } else { - required_flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - } - - // No texture at this location. Make a new one. - auto texture = AllocateTexture(texture_info, required_flags); - if (!texture) { - // Failed to allocate texture (out of memory) - XELOGE("Vulkan Texture Cache: Failed to allocate texture!"); - return nullptr; - } - - // Setup a debug name for the texture. - provider_.SetDeviceObjectName( - VK_OBJECT_TYPE_IMAGE, uint64_t(texture->image), - fmt::format( - "RT: 0x{:08X} - 0x{:08X} ({}, {})", texture_info.memory.base_address, - texture_info.memory.base_address + texture_info.memory.base_size, - texture_info.format_info()->name, - get_dimension_name(texture_info.dimension)) - .c_str()); - - // Setup an access watch. If this texture is touched, it is destroyed. - WatchTexture(texture); - - textures_[texture_hash] = texture; - COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); - return texture; -} - -VulkanTextureCache::Texture* VulkanTextureCache::Demand( - const TextureInfo& texture_info, VkCommandBuffer command_buffer, - VkFence completion_fence) { - // Run a tight loop to scan for an exact match existing texture. - auto texture_hash = texture_info.hash(); - for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { - if (it->second->texture_info == texture_info) { - if (it->second->pending_invalidation) { - // This texture has been invalidated! - RemoveInvalidatedTextures(); - break; - } - - if (texture_info.memory.base_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.base_address, - texture_info.memory.base_size); - } - if (texture_info.memory.mip_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.mip_address, - texture_info.memory.mip_size); - } - return it->second; - } - } - - if (!command_buffer) { - // Texture not found and no command buffer was passed, preventing us from - // uploading a new one. - return nullptr; - } - - // Create a new texture and cache it. - auto texture = AllocateTexture(texture_info); - if (!texture) { - // Failed to allocate texture (out of memory) - XELOGE("Vulkan Texture Cache: Failed to allocate texture!"); - return nullptr; - } - - // Though we didn't find an exact match, that doesn't mean we're out of the - // woods yet. This texture could either be a portion of another texture or - // vice versa. Copy any overlapping textures into this texture. - // TODO: Byte count -> pixel count (on x and y axes) - VkOffset2D offset; - auto collide_tex = LookupAddress( - texture_info.memory.base_address, texture_info.width + 1, - texture_info.height + 1, texture_info.format_info()->format, &offset); - if (collide_tex != nullptr) { - // assert_always(); - } - - if (texture_info.memory.base_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.base_address, - texture_info.memory.base_size); - } - if (texture_info.memory.mip_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.mip_address, - texture_info.memory.mip_size); - } - - if (!UploadTexture(command_buffer, completion_fence, texture, texture_info)) { - FreeTexture(texture); - return nullptr; - } - - // Setup a debug name for the texture. - provider_.SetDeviceObjectName( - VK_OBJECT_TYPE_IMAGE, uint64_t(texture->image), - fmt::format( - "T: 0x{:08X} - 0x{:08X} ({}, {})", texture_info.memory.base_address, - texture_info.memory.base_address + texture_info.memory.base_size, - texture_info.format_info()->name, - get_dimension_name(texture_info.dimension)) - .c_str()); - - textures_[texture_hash] = texture; - COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); - - // Okay. Put a writewatch on it to tell us if it's been modified from the - // guest. - WatchTexture(texture); - - return texture; -} - -VulkanTextureCache::TextureView* VulkanTextureCache::DemandView( - Texture* texture, uint16_t swizzle) { - for (auto it = texture->views.begin(); it != texture->views.end(); ++it) { - if ((*it)->swizzle == swizzle) { - return (*it).get(); - } - } - - auto& config = texture_configs[uint32_t(texture->texture_info.format)]; - - VkImageViewCreateInfo view_info; - view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - view_info.pNext = nullptr; - view_info.flags = 0; - view_info.image = texture->image; - view_info.format = texture->format; - - bool is_cube = false; - switch (texture->texture_info.dimension) { - case xenos::DataDimension::k1D: - case xenos::DataDimension::k2DOrStacked: - if (!texture->texture_info.is_stacked) { - view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - } else { - view_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; - } - break; - case xenos::DataDimension::k3D: - view_info.viewType = VK_IMAGE_VIEW_TYPE_3D; - break; - case xenos::DataDimension::kCube: - view_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; - is_cube = true; - break; - default: - assert_always(); - } - - VkComponentSwizzle swizzle_component_map[] = { - config.component_swizzle.r, config.component_swizzle.g, - config.component_swizzle.b, config.component_swizzle.a, - VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, - VK_COMPONENT_SWIZZLE_IDENTITY, - }; - - VkComponentSwizzle components[] = { - swizzle_component_map[(swizzle >> 0) & 0x7], - swizzle_component_map[(swizzle >> 3) & 0x7], - swizzle_component_map[(swizzle >> 6) & 0x7], - swizzle_component_map[(swizzle >> 9) & 0x7], - }; - - view_info.components.r = components[config.vector_swizzle.x]; - view_info.components.g = components[config.vector_swizzle.y]; - view_info.components.b = components[config.vector_swizzle.z]; - view_info.components.a = components[config.vector_swizzle.w]; - - if (texture->format == VK_FORMAT_D16_UNORM_S8_UINT || - texture->format == VK_FORMAT_D24_UNORM_S8_UINT || - texture->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - // This applies to any depth/stencil format, but we only use D24S8 / D32FS8. - view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - } else { - view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - } - view_info.subresourceRange.baseMipLevel = texture->texture_info.mip_min_level; - view_info.subresourceRange.levelCount = texture->texture_info.mip_levels(); - view_info.subresourceRange.baseArrayLayer = 0; - view_info.subresourceRange.layerCount = - !is_cube ? 1 : 1 + texture->texture_info.depth; - - VkImageView view; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - auto status = dfn.vkCreateImageView(device, &view_info, nullptr, &view); - CheckResult(status, "vkCreateImageView"); - if (status == VK_SUCCESS) { - auto texture_view = new TextureView(); - texture_view->texture = texture; - texture_view->view = view; - texture_view->swizzle = swizzle; - texture->views.push_back(std::unique_ptr(texture_view)); - return texture_view; - } - - return nullptr; -} - -VulkanTextureCache::Sampler* VulkanTextureCache::Demand( - const SamplerInfo& sampler_info) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - auto sampler_hash = sampler_info.hash(); - for (auto it = samplers_.find(sampler_hash); it != samplers_.end(); ++it) { - if (it->second->sampler_info == sampler_info) { - // Found a compatible sampler. - return it->second; - } - } - - VkResult status = VK_SUCCESS; - - // Create a new sampler and cache it. - VkSamplerCreateInfo sampler_create_info; - sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - sampler_create_info.pNext = nullptr; - sampler_create_info.flags = 0; - sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - sampler_create_info.maxAnisotropy = 1.0f; - - // Texture level filtering. - VkSamplerMipmapMode mip_filter; - switch (sampler_info.mip_filter) { - case xenos::TextureFilter::kBaseMap: - // TODO(DrChat): ? - mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST; - break; - case xenos::TextureFilter::kPoint: - mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST; - break; - case xenos::TextureFilter::kLinear: - mip_filter = VK_SAMPLER_MIPMAP_MODE_LINEAR; - break; - default: - assert_unhandled_case(sampler_info.mip_filter); - return nullptr; - } - - VkFilter min_filter; - switch (sampler_info.min_filter) { - case xenos::TextureFilter::kPoint: - min_filter = VK_FILTER_NEAREST; - break; - case xenos::TextureFilter::kLinear: - min_filter = VK_FILTER_LINEAR; - break; - default: - assert_unhandled_case(sampler_info.min_filter); - return nullptr; - } - VkFilter mag_filter; - switch (sampler_info.mag_filter) { - case xenos::TextureFilter::kPoint: - mag_filter = VK_FILTER_NEAREST; - break; - case xenos::TextureFilter::kLinear: - mag_filter = VK_FILTER_LINEAR; - break; - default: - assert_unhandled_case(mag_filter); - return nullptr; - } - - sampler_create_info.minFilter = min_filter; - sampler_create_info.magFilter = mag_filter; - sampler_create_info.mipmapMode = mip_filter; - - // FIXME: Both halfway / mirror clamp to border aren't mapped properly. - VkSamplerAddressMode address_mode_map[] = { - /* kRepeat */ VK_SAMPLER_ADDRESS_MODE_REPEAT, - /* kMirroredRepeat */ VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, - /* kClampToEdge */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - /* kMirrorClampToEdge */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, - /* kClampToHalfway */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - /* kMirrorClampToHalfway */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, - /* kClampToBorder */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - /* kMirrorClampToBorder */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, - }; - sampler_create_info.addressModeU = - address_mode_map[static_cast(sampler_info.clamp_u)]; - sampler_create_info.addressModeV = - address_mode_map[static_cast(sampler_info.clamp_v)]; - sampler_create_info.addressModeW = - address_mode_map[static_cast(sampler_info.clamp_w)]; - - float aniso = 0.f; - switch (sampler_info.aniso_filter) { - case xenos::AnisoFilter::kDisabled: - aniso = 1.0f; - break; - case xenos::AnisoFilter::kMax_1_1: - aniso = 1.0f; - break; - case xenos::AnisoFilter::kMax_2_1: - aniso = 2.0f; - break; - case xenos::AnisoFilter::kMax_4_1: - aniso = 4.0f; - break; - case xenos::AnisoFilter::kMax_8_1: - aniso = 8.0f; - break; - case xenos::AnisoFilter::kMax_16_1: - aniso = 16.0f; - break; - default: - assert_unhandled_case(aniso); - return nullptr; - } - - sampler_create_info.anisotropyEnable = - sampler_info.aniso_filter != xenos::AnisoFilter::kDisabled ? VK_TRUE - : VK_FALSE; - sampler_create_info.maxAnisotropy = aniso; - - sampler_create_info.compareEnable = VK_FALSE; - sampler_create_info.compareOp = VK_COMPARE_OP_NEVER; - sampler_create_info.mipLodBias = sampler_info.lod_bias; - sampler_create_info.minLod = float(sampler_info.mip_min_level); - sampler_create_info.maxLod = float(sampler_info.mip_max_level); - sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - sampler_create_info.unnormalizedCoordinates = VK_FALSE; - VkSampler vk_sampler; - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - status = - dfn.vkCreateSampler(device, &sampler_create_info, nullptr, &vk_sampler); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return nullptr; - } - - auto sampler = new Sampler(); - sampler->sampler = vk_sampler; - sampler->sampler_info = sampler_info; - samplers_[sampler_hash] = sampler; - - return sampler; -} - -bool TextureFormatIsSimilar(xenos::TextureFormat left, - xenos::TextureFormat right) { -#define COMPARE_FORMAT(x, y) \ - if ((left == xenos::TextureFormat::x && right == xenos::TextureFormat::y) || \ - (left == xenos::TextureFormat::y && right == xenos::TextureFormat::x)) { \ - return true; \ - } - - if (left == right) return true; - if (GetBaseFormat(left) == GetBaseFormat(right)) return true; - - return false; -#undef COMPARE_FORMAT -} - -VulkanTextureCache::Texture* VulkanTextureCache::Lookup( - const TextureInfo& texture_info) { - auto texture_hash = texture_info.hash(); - for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { - if (it->second->texture_info == texture_info) { - return it->second; - } - } - - // slow path - for (auto it = textures_.begin(); it != textures_.end(); ++it) { - const auto& other_texture_info = it->second->texture_info; - -#define COMPARE_FIELD(x) \ - if (texture_info.x != other_texture_info.x) continue - COMPARE_FIELD(memory.base_address); - COMPARE_FIELD(memory.base_size); - COMPARE_FIELD(dimension); - COMPARE_FIELD(width); - COMPARE_FIELD(height); - COMPARE_FIELD(depth); - COMPARE_FIELD(endianness); - COMPARE_FIELD(is_tiled); -#undef COMPARE_FIELD - - if (!TextureFormatIsSimilar(texture_info.format, - other_texture_info.format)) { - continue; - } - - /*const auto format_info = texture_info.format_info(); - const auto other_format_info = other_texture_info.format_info(); -#define COMPARE_FIELD(x) if (format_info->x != other_format_info->x) continue - COMPARE_FIELD(type); - COMPARE_FIELD(block_width); - COMPARE_FIELD(block_height); - COMPARE_FIELD(bits_per_pixel); -#undef COMPARE_FIELD*/ - return it->second; - } - - return nullptr; -} - -VulkanTextureCache::Texture* VulkanTextureCache::LookupAddress( - uint32_t guest_address, uint32_t width, uint32_t height, - xenos::TextureFormat format, VkOffset2D* out_offset) { - for (auto it = textures_.begin(); it != textures_.end(); ++it) { - const auto& texture_info = it->second->texture_info; - if (guest_address >= texture_info.memory.base_address && - guest_address < - texture_info.memory.base_address + texture_info.memory.base_size && - texture_info.pitch >= width && texture_info.height >= height && - out_offset) { - auto offset_bytes = guest_address - texture_info.memory.base_address; - - if (texture_info.dimension == xenos::DataDimension::k2DOrStacked) { - out_offset->x = 0; - out_offset->y = offset_bytes / texture_info.pitch; - if (offset_bytes % texture_info.pitch != 0) { - // TODO: offset_x - } - } - - return it->second; - } - - if (texture_info.memory.base_address == guest_address && - texture_info.dimension == xenos::DataDimension::k2DOrStacked && - texture_info.pitch == width && texture_info.height == height) { - if (out_offset) { - out_offset->x = 0; - out_offset->y = 0; - } - - return it->second; - } - } - - return nullptr; -} - -void VulkanTextureCache::FlushPendingCommands(VkCommandBuffer command_buffer, - VkFence completion_fence) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - auto status = dfn.vkEndCommandBuffer(command_buffer); - CheckResult(status, "vkEndCommandBuffer"); - - VkSubmitInfo submit_info; - std::memset(&submit_info, 0, sizeof(submit_info)); - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - - { - ui::vulkan::VulkanProvider::QueueAcquisition queue_acquisition( - provider_.AcquireQueue(provider_.queue_family_graphics_compute(), 0)); - auto status = dfn.vkQueueSubmit(queue_acquisition.queue, 1, &submit_info, - completion_fence); - CheckResult(status, "vkQueueSubmit"); - } - - dfn.vkWaitForFences(device, 1, &completion_fence, VK_TRUE, -1); - staging_buffer_.Scavenge(); - dfn.vkResetFences(device, 1, &completion_fence); - - // Reset the command buffer and put it back into the recording state. - dfn.vkResetCommandBuffer(command_buffer, 0); - VkCommandBufferBeginInfo begin_info; - std::memset(&begin_info, 0, sizeof(begin_info)); - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - dfn.vkBeginCommandBuffer(command_buffer, &begin_info); -} - -bool VulkanTextureCache::ConvertTexture(uint8_t* dest, - VkBufferImageCopy* copy_region, - uint32_t mip, const TextureInfo& src) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - uint32_t offset_x = 0; - uint32_t offset_y = 0; - uint32_t address = src.GetMipLocation(mip, &offset_x, &offset_y, true); - if (!address) { - return false; - } - - void* host_address = memory_->TranslatePhysical(address); - - auto is_cube = src.dimension == xenos::DataDimension::kCube; - auto src_extent = src.GetMipExtent(mip, true); - auto dst_extent = GetMipExtent(src, mip); - - uint32_t src_pitch = - src_extent.block_pitch_h * src.format_info()->bytes_per_block(); - uint32_t dst_pitch = - dst_extent.block_pitch_h * GetFormatInfo(src.format)->bytes_per_block(); - - auto copy_block = GetFormatCopyBlock(src.format); - - const uint8_t* src_mem = reinterpret_cast(host_address); - if (!src.is_tiled) { - for (uint32_t face = 0; face < dst_extent.depth; face++) { - src_mem += offset_y * src_pitch; - src_mem += offset_x * src.format_info()->bytes_per_block(); - for (uint32_t y = 0; y < dst_extent.block_height; y++) { - copy_block(src.endianness, dest + y * dst_pitch, - src_mem + y * src_pitch, dst_pitch); - } - src_mem += src_pitch * src_extent.block_pitch_v; - dest += dst_pitch * dst_extent.block_pitch_v; - } - } else { - // Untile image. - // We could do this in a shader to speed things up, as this is pretty slow. - for (uint32_t face = 0; face < dst_extent.depth; face++) { - texture_conversion::UntileInfo untile_info; - std::memset(&untile_info, 0, sizeof(untile_info)); - untile_info.offset_x = offset_x; - untile_info.offset_y = offset_y; - untile_info.width = src_extent.block_width; - untile_info.height = src_extent.block_height; - untile_info.input_pitch = src_extent.block_pitch_h; - untile_info.output_pitch = dst_extent.block_pitch_h; - untile_info.input_format_info = src.format_info(); - untile_info.output_format_info = GetFormatInfo(src.format); - untile_info.copy_callback = [=](auto o, auto i, auto l) { - copy_block(src.endianness, o, i, l); - }; - texture_conversion::Untile(dest, src_mem, &untile_info); - src_mem += src_pitch * src_extent.block_pitch_v; - dest += dst_pitch * dst_extent.block_pitch_v; - } - } - - copy_region->bufferRowLength = dst_extent.pitch; - copy_region->bufferImageHeight = dst_extent.height; - copy_region->imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - copy_region->imageSubresource.mipLevel = mip; - copy_region->imageSubresource.baseArrayLayer = 0; - copy_region->imageSubresource.layerCount = !is_cube ? 1 : dst_extent.depth; - copy_region->imageExtent.width = std::max(1u, (src.width + 1) >> mip); - copy_region->imageExtent.height = std::max(1u, (src.height + 1) >> mip); - copy_region->imageExtent.depth = !is_cube ? dst_extent.depth : 1; - return true; -} - -bool VulkanTextureCache::UploadTexture(VkCommandBuffer command_buffer, - VkFence completion_fence, Texture* dest, - const TextureInfo& src) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - size_t unpack_length = ComputeTextureStorage(src); - - XELOGGPU( - "Uploading texture @ 0x{:08X}/0x{:08X} ({}x{}x{}, format: {}, dim: {}, " - "levels: {} ({}-{}), stacked: {}, pitch: {}, tiled: {}, packed mips: {}, " - "unpack length: 0x{:X})", - src.memory.base_address, src.memory.mip_address, src.width + 1, - src.height + 1, src.depth + 1, src.format_info()->name, - get_dimension_name(src.dimension), src.mip_levels(), src.mip_min_level, - src.mip_max_level, src.is_stacked ? "yes" : "no", src.pitch, - src.is_tiled ? "yes" : "no", src.has_packed_mips ? "yes" : "no", - unpack_length); - - XELOGGPU("Extent: {}x{}x{} {},{},{}", src.extent.pitch, src.extent.height, - src.extent.depth, src.extent.block_pitch_h, src.extent.block_height, - src.extent.block_pitch_v); - - if (!unpack_length) { - XELOGW("Failed to compute texture storage!"); - return false; - } - - if (!staging_buffer_.CanAcquire(unpack_length)) { - // Need to have unique memory for every upload for at least one frame. If we - // run out of memory, we need to flush all queued upload commands to the - // GPU. - FlushPendingCommands(command_buffer, completion_fence); - - // Uploads have been flushed. Continue. - if (!staging_buffer_.CanAcquire(unpack_length)) { - // The staging buffer isn't big enough to hold this texture. - XELOGE( - "VulkanTextureCache staging buffer is too small! (uploading 0x{:X} " - "bytes)", - unpack_length); - assert_always(); - return false; - } - } - - // Grab some temporary memory for staging. - auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence); - assert_not_null(alloc); - if (!alloc) { - XELOGE("{}: Failed to acquire staging memory!", __func__); - return false; - } - - // DEBUG: Check the source address. If it's completely zero'd out, print it. - bool valid = false; - auto src_data = memory_->TranslatePhysical(src.memory.base_address); - for (uint32_t i = 0; i < src.memory.base_size; i++) { - if (src_data[i] != 0) { - valid = true; - break; - } - } - - if (!valid) { - XELOGW("Warning: Texture @ 0x{:08X} is blank!", src.memory.base_address); - } - - // Upload texture into GPU memory. - // TODO: If the GPU supports it, we can submit a compute batch to convert the - // texture and copy it to its destination. Otherwise, fallback to conversion - // on the CPU. - uint32_t copy_region_count = src.mip_levels(); - std::vector copy_regions(copy_region_count); - - // Upload all mips. - auto unpack_buffer = reinterpret_cast(alloc->host_ptr); - VkDeviceSize unpack_offset = 0; - for (uint32_t mip = src.mip_min_level, region = 0; mip <= src.mip_max_level; - mip++, region++) { - if (!ConvertTexture(&unpack_buffer[unpack_offset], ©_regions[region], - mip, src)) { - XELOGW("Failed to convert texture mip {}!", mip); - return false; - } - copy_regions[region].bufferOffset = alloc->offset + unpack_offset; - copy_regions[region].imageOffset = {0, 0, 0}; - - /* - XELOGGPU("Mip {} {}x{}x{} @ 0x{:X}", mip, - copy_regions[region].imageExtent.width, - copy_regions[region].imageExtent.height, - copy_regions[region].imageExtent.depth, unpack_offset); - */ - - unpack_offset += ComputeMipStorage(src, mip); - } - - if (cvars::texture_dump) { - TextureDump(src, unpack_buffer, unpack_length); - } - - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - - // Transition the texture into a transfer destination layout. - VkImageMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = 0; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.oldLayout = dest->image_layout; - barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - barrier.srcQueueFamilyIndex = VK_FALSE; - barrier.dstQueueFamilyIndex = VK_FALSE; - barrier.image = dest->image; - if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT || - dest->format == VK_FORMAT_D24_UNORM_S8_UINT || - dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - barrier.subresourceRange.aspectMask = - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - } else { - barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - } - barrier.subresourceRange.baseMipLevel = src.mip_min_level; - barrier.subresourceRange.levelCount = src.mip_levels(); - barrier.subresourceRange.baseArrayLayer = - copy_regions[0].imageSubresource.baseArrayLayer; - barrier.subresourceRange.layerCount = - copy_regions[0].imageSubresource.layerCount; - - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, - nullptr, 1, &barrier); - - // Now move the converted texture into the destination. - if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT || - dest->format == VK_FORMAT_D24_UNORM_S8_UINT || - dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - // Do just a depth upload (for now). - // This assumes depth buffers don't have mips (hopefully they don't) - assert_true(src.mip_levels() == 1); - copy_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - } - - dfn.vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(), - dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - copy_region_count, copy_regions.data()); - - // Now transition the texture into a shader readonly source. - barrier.srcAccessMask = barrier.dstAccessMask; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.oldLayout = barrier.newLayout; - barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &barrier); - - dest->image_layout = barrier.newLayout; - return true; -} - -const FormatInfo* VulkanTextureCache::GetFormatInfo( - xenos::TextureFormat format) { - switch (format) { - case xenos::TextureFormat::k_CTX1: - return FormatInfo::Get(xenos::TextureFormat::k_8_8); - case xenos::TextureFormat::k_DXT3A: - return FormatInfo::Get(xenos::TextureFormat::k_DXT2_3); - default: - return FormatInfo::Get(format); - } -} - -texture_conversion::CopyBlockCallback VulkanTextureCache::GetFormatCopyBlock( - xenos::TextureFormat format) { - switch (format) { - case xenos::TextureFormat::k_CTX1: - return texture_conversion::ConvertTexelCTX1ToR8G8; - case xenos::TextureFormat::k_DXT3A: - return texture_conversion::ConvertTexelDXT3AToDXT3; - default: - return texture_conversion::CopySwapBlock; - } -} - -TextureExtent VulkanTextureCache::GetMipExtent(const TextureInfo& src, - uint32_t mip) { - auto format_info = GetFormatInfo(src.format); - uint32_t width = src.width + 1; - uint32_t height = src.height + 1; - uint32_t depth = src.depth + 1; - TextureExtent extent; - if (mip == 0) { - extent = TextureExtent::Calculate(format_info, width, height, depth, false, - false); - } else { - uint32_t mip_width = std::max(1u, width >> mip); - uint32_t mip_height = std::max(1u, height >> mip); - extent = TextureExtent::Calculate(format_info, mip_width, mip_height, depth, - false, false); - } - return extent; -} - -uint32_t VulkanTextureCache::ComputeMipStorage(const FormatInfo* format_info, - uint32_t width, uint32_t height, - uint32_t depth, uint32_t mip) { - assert_not_null(format_info); - TextureExtent extent; - if (mip == 0) { - extent = TextureExtent::Calculate(format_info, width, height, depth, false, - false); - } else { - uint32_t mip_width = std::max(1u, width >> mip); - uint32_t mip_height = std::max(1u, height >> mip); - extent = TextureExtent::Calculate(format_info, mip_width, mip_height, depth, - false, false); - } - uint32_t bytes_per_block = format_info->bytes_per_block(); - return extent.all_blocks() * bytes_per_block; -} - -uint32_t VulkanTextureCache::ComputeMipStorage(const TextureInfo& src, - uint32_t mip) { - uint32_t size = ComputeMipStorage(GetFormatInfo(src.format), src.width + 1, - src.height + 1, src.depth + 1, mip); - // ensure 4-byte alignment - return (size + 3) & (~3u); -} - -uint32_t VulkanTextureCache::ComputeTextureStorage(const TextureInfo& src) { - auto format_info = GetFormatInfo(src.format); - uint32_t width = src.width + 1; - uint32_t height = src.height + 1; - uint32_t depth = src.depth + 1; - uint32_t length = 0; - for (uint32_t mip = src.mip_min_level; mip <= src.mip_max_level; ++mip) { - if (mip == 0 && !src.memory.base_address) { - continue; - } else if (mip > 0 && !src.memory.mip_address) { - continue; - } - length += ComputeMipStorage(format_info, width, height, depth, mip); - } - return length; -} - -void VulkanTextureCache::WritebackTexture(Texture* texture) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkResult status = VK_SUCCESS; - VkFence fence = wb_command_pool_->BeginBatch(); - auto alloc = wb_staging_buffer_.Acquire(texture->alloc_info.size, fence); - if (!alloc) { - wb_command_pool_->EndBatch(); - return; - } - - auto command_buffer = wb_command_pool_->AcquireEntry(); - - VkCommandBufferBeginInfo begin_info = { - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - nullptr, - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - nullptr, - }; - dfn.vkBeginCommandBuffer(command_buffer, &begin_info); - - // TODO: Transition the texture to a transfer source. - // TODO: copy depth/layers? - - VkBufferImageCopy region; - region.bufferOffset = alloc->offset; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.imageSubresource.mipLevel = 0; - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - region.imageOffset.x = 0; - region.imageOffset.y = 0; - region.imageOffset.z = 0; - region.imageExtent.width = texture->texture_info.width + 1; - region.imageExtent.height = texture->texture_info.height + 1; - region.imageExtent.depth = 1; - - dfn.vkCmdCopyImageToBuffer(command_buffer, texture->image, - VK_IMAGE_LAYOUT_GENERAL, - wb_staging_buffer_.gpu_buffer(), 1, ®ion); - - // TODO: Transition the texture back to a shader resource. - - dfn.vkEndCommandBuffer(command_buffer); - - // Submit the command buffer. - // Submit commands and wait. - { - ui::vulkan::VulkanProvider::QueueAcquisition queue_acquisition( - provider_.AcquireQueue(provider_.queue_family_graphics_compute(), 0)); - VkSubmitInfo submit_info = { - VK_STRUCTURE_TYPE_SUBMIT_INFO, - nullptr, - 0, - nullptr, - nullptr, - 1, - &command_buffer, - 0, - nullptr, - }; - status = dfn.vkQueueSubmit(queue_acquisition.queue, 1, &submit_info, fence); - CheckResult(status, "vkQueueSubmit"); - - if (status == VK_SUCCESS) { - status = dfn.vkQueueWaitIdle(queue_acquisition.queue); - CheckResult(status, "vkQueueWaitIdle"); - } - } - - wb_command_pool_->EndBatch(); - - if (status == VK_SUCCESS) { - auto dest = - memory_->TranslatePhysical(texture->texture_info.memory.base_address); - std::memcpy(dest, alloc->host_ptr, texture->texture_info.memory.base_size); - } - - wb_staging_buffer_.Scavenge(); -} - -void VulkanTextureCache::HashTextureBindings( - XXH3_state_t* hash_state, uint32_t& fetch_mask, - const std::vector& bindings) { - for (auto& binding : bindings) { - uint32_t fetch_bit = 1 << binding.fetch_constant; - if (fetch_mask & fetch_bit) { - // We've covered this binding. - continue; - } - fetch_mask |= fetch_bit; - - auto& regs = *register_file_; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; - auto group = - reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; - - XXH3_64bits_update(hash_state, &fetch, sizeof(fetch)); - } -} - -VkDescriptorSet VulkanTextureCache::PrepareTextureSet( - VkCommandBuffer command_buffer, VkFence completion_fence, - const std::vector& vertex_bindings, - const std::vector& pixel_bindings) { - XXH3_state_t hash_state; - XXH3_64bits_reset(&hash_state); - - // (quickly) Generate a hash. - uint32_t fetch_mask = 0; - HashTextureBindings(&hash_state, fetch_mask, vertex_bindings); - HashTextureBindings(&hash_state, fetch_mask, pixel_bindings); - uint64_t hash = XXH3_64bits_digest(&hash_state); - for (auto it = texture_sets_.find(hash); it != texture_sets_.end(); ++it) { - // TODO(DrChat): We need to compare the bindings and ensure they're equal. - return it->second; - } - - // Clear state. - auto update_set_info = &update_set_info_; - update_set_info->has_setup_fetch_mask = 0; - update_set_info->image_write_count = 0; - - std::memset(update_set_info, 0, sizeof(update_set_info_)); - - // Process vertex and pixel shader bindings. - // This does things lazily and de-dupes fetch constants reused in both - // shaders. - bool any_failed = false; - any_failed = !SetupTextureBindings(command_buffer, completion_fence, - update_set_info, vertex_bindings) || - any_failed; - any_failed = !SetupTextureBindings(command_buffer, completion_fence, - update_set_info, pixel_bindings) || - any_failed; - if (any_failed) { - XELOGW("Failed to setup one or more texture bindings!"); - // TODO(benvanik): actually bail out here? - } - - // Open a new batch of descriptor sets (for this frame) - if (!descriptor_pool_->has_open_batch()) { - descriptor_pool_->BeginBatch(completion_fence); - } - - auto descriptor_set = - descriptor_pool_->AcquireEntry(texture_descriptor_set_layout_); - if (!descriptor_set) { - return nullptr; - } - - for (uint32_t i = 0; i < update_set_info->image_write_count; i++) { - update_set_info->image_writes[i].dstSet = descriptor_set; - } - - // Update the descriptor set. - if (update_set_info->image_write_count > 0) { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkUpdateDescriptorSets(device, update_set_info->image_write_count, - update_set_info->image_writes, 0, nullptr); - } - - texture_sets_[hash] = descriptor_set; - return descriptor_set; -} - -bool VulkanTextureCache::SetupTextureBindings( - VkCommandBuffer command_buffer, VkFence completion_fence, - UpdateSetInfo* update_set_info, - const std::vector& bindings) { - bool any_failed = false; - for (auto& binding : bindings) { - uint32_t fetch_bit = 1 << binding.fetch_constant; - if ((update_set_info->has_setup_fetch_mask & fetch_bit) == 0) { - // Needs setup. - any_failed = !SetupTextureBinding(command_buffer, completion_fence, - update_set_info, binding) || - any_failed; - update_set_info->has_setup_fetch_mask |= fetch_bit; - } - } - return !any_failed; -} - -bool VulkanTextureCache::SetupTextureBinding( - VkCommandBuffer command_buffer, VkFence completion_fence, - UpdateSetInfo* update_set_info, const Shader::TextureBinding& binding) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - auto& regs = *register_file_; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; - auto group = - reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; - - // Disabled? - // TODO(benvanik): reset sampler. - switch (fetch.type) { - case xenos::FetchConstantType::kTexture: - break; - case xenos::FetchConstantType::kInvalidTexture: - if (cvars::gpu_allow_invalid_fetch_constants) { - break; - } - XELOGW( - "Texture fetch constant {} ({:08X} {:08X} {:08X} {:08X} {:08X} " - "{:08X}) has " - "\"invalid\" type! This is incorrect behavior, but you can try " - "bypassing this by launching Xenia with " - "--gpu_allow_invalid_fetch_constants=true.", - binding.fetch_constant, fetch.dword_0, fetch.dword_1, fetch.dword_2, - fetch.dword_3, fetch.dword_4, fetch.dword_5); - return false; - default: - XELOGW( - "Texture fetch constant {} ({:08X} {:08X} {:08X} {:08X} {:08X} " - "{:08X}) is " - "completely invalid!", - binding.fetch_constant, fetch.dword_0, fetch.dword_1, fetch.dword_2, - fetch.dword_3, fetch.dword_4, fetch.dword_5); - return false; - } - - TextureInfo texture_info; - if (!TextureInfo::Prepare(fetch, &texture_info)) { - XELOGE("Unable to parse texture fetcher info"); - return false; // invalid texture used - } - SamplerInfo sampler_info; - if (!SamplerInfo::Prepare(fetch, binding.fetch_instr, &sampler_info)) { - XELOGE("Unable to parse sampler info"); - return false; // invalid texture used - } - - // Search via the base format. - texture_info.format = GetBaseFormat(texture_info.format); - - auto texture = Demand(texture_info, command_buffer, completion_fence); - auto sampler = Demand(sampler_info); - if (texture == nullptr || sampler == nullptr) { - XELOGE("Texture or sampler is NULL!"); - return false; - } - - uint16_t swizzle = static_cast(fetch.swizzle); - auto view = DemandView(texture, swizzle); - - auto image_info = - &update_set_info->image_infos[update_set_info->image_write_count]; - auto image_write = - &update_set_info->image_writes[update_set_info->image_write_count]; - update_set_info->image_write_count++; - - // Sanity check, we only have 32 binding slots. - assert(binding.binding_index < 32); - - image_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - image_write->pNext = nullptr; - // image_write->dstSet is set later... - image_write->dstBinding = 0; - image_write->dstArrayElement = uint32_t(binding.binding_index); - image_write->descriptorCount = 1; - image_write->descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - image_write->pImageInfo = image_info; - image_write->pBufferInfo = nullptr; - image_write->pTexelBufferView = nullptr; - - image_info->imageView = view->view; - image_info->imageLayout = texture->image_layout; - image_info->sampler = sampler->sampler; - texture->in_flight_fence = completion_fence; - - return true; -} - -void VulkanTextureCache::RemoveInvalidatedTextures() { - std::unordered_set& invalidated_textures = *invalidated_textures_; - - // Clean up any invalidated textures. - { - auto global_lock = global_critical_region_.Acquire(); - if (invalidated_textures_ == &invalidated_textures_sets_[0]) { - invalidated_textures_ = &invalidated_textures_sets_[1]; - } else { - invalidated_textures_ = &invalidated_textures_sets_[0]; - } - } - - // Append all invalidated textures to a deletion queue. They will be deleted - // when all command buffers using them have finished executing. - if (!invalidated_textures.empty()) { - for (auto it = invalidated_textures.begin(); - it != invalidated_textures.end(); ++it) { - pending_delete_textures_.push_back(*it); - textures_.erase((*it)->texture_info.hash()); - } - - COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); - COUNT_profile_set("gpu/texture_cache/pending_deletes", - pending_delete_textures_.size()); - invalidated_textures.clear(); - } -} - -void VulkanTextureCache::ClearCache() { - RemoveInvalidatedTextures(); - for (auto it = textures_.begin(); it != textures_.end(); ++it) { - while (!FreeTexture(it->second)) { - // Texture still in use. Busy loop. - xe::threading::MaybeYield(); - } - } - textures_.clear(); - COUNT_profile_set("gpu/texture_cache/textures", 0); - - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - for (auto it = samplers_.begin(); it != samplers_.end(); ++it) { - dfn.vkDestroySampler(device, it->second->sampler, nullptr); - delete it->second; +// Generated with `xb buildshaders`. +namespace shaders { +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_128bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_128bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_16bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_16bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_32bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_32bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_64bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_64bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_8bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_8bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_bgrg8_rgb8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_ctx1_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_unorm_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_unorm_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxn_rg8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt1_rgba8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt3_rgba8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt3a_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt3aas1111_argb4_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt5_rgba8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt5a_r8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_gbgr8_rgb8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_snorm_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_snorm_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r11g11b10_rgba16_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r11g11b10_rgba16_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r11g11b10_rgba16_snorm_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r11g11b10_rgba16_snorm_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r16_snorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r16_snorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r16_unorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r16_unorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r4g4b4a4_a4r4g4b4_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r4g4b4a4_a4r4g4b4_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g5b5a1_b5g5r5a1_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g5b5a1_b5g5r5a1_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g6b5_b5g6r5_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g6b5_b5g6r5_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rg16_snorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rg16_snorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rg16_unorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rg16_unorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_snorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_snorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_unorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_unorm_float_scaled_cs.h" +} // namespace shaders + +static_assert(VK_FORMAT_UNDEFINED == VkFormat(0), + "Assuming that skipping a VkFormat in an initializer results in " + "VK_FORMAT_UNDEFINED"); +const VulkanTextureCache::HostFormatPair + VulkanTextureCache::kBestHostFormats[64] = { + // k_1_REVERSE + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_1 + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_8 + {{kLoadShaderIndex8bpb, VK_FORMAT_R8_UNORM}, + {kLoadShaderIndex8bpb, VK_FORMAT_R8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_1_5_5_5 + // Red and blue swapped in the load shader for simplicity. + {{kLoadShaderIndexR5G5B5A1ToB5G5R5A1, VK_FORMAT_A1R5G5B5_UNORM_PACK16}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_5_6_5 + // Red and blue swapped in the load shader for simplicity. + {{kLoadShaderIndexR5G6B5ToB5G6R5, VK_FORMAT_R5G6B5_UNORM_PACK16}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_6_5_5 + // On the host, green bits in blue, blue bits in green. + {{kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle, + VK_FORMAT_R5G6B5_UNORM_PACK16}, + {kLoadShaderIndexUnknown}, + XE_GPU_MAKE_TEXTURE_SWIZZLE(R, B, G, G)}, + // k_8_8_8_8 + {{kLoadShaderIndex32bpb, VK_FORMAT_R8G8B8A8_UNORM}, + {kLoadShaderIndex32bpb, VK_FORMAT_R8G8B8A8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_2_10_10_10 + // VK_FORMAT_A2B10G10R10_SNORM_PACK32 is optional. + {{kLoadShaderIndex32bpb, VK_FORMAT_A2B10G10R10_UNORM_PACK32}, + {kLoadShaderIndex32bpb, VK_FORMAT_A2B10G10R10_SNORM_PACK32}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_8_A + {{kLoadShaderIndex8bpb, VK_FORMAT_R8_UNORM}, + {kLoadShaderIndex8bpb, VK_FORMAT_R8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_8_B + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_8_8 + {{kLoadShaderIndex16bpb, VK_FORMAT_R8G8_UNORM}, + {kLoadShaderIndex16bpb, VK_FORMAT_R8G8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_Cr_Y1_Cb_Y0_REP + // VK_FORMAT_G8B8G8R8_422_UNORM_KHR (added in + // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is + // optional. + {{kLoadShaderIndex32bpb, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, true}, + {kLoadShaderIndexGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_Y1_Cr_Y0_Cb_REP + // VK_FORMAT_B8G8R8G8_422_UNORM_KHR (added in + // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is + // optional. + {{kLoadShaderIndex32bpb, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, true}, + {kLoadShaderIndexBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_16_16_EDRAM + // Not usable as a texture, also has -32...32 range. + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_8_8_8_8_A + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_4_4_4_4 + // Components swapped in the load shader for simplicity. + {{kLoadShaderIndexRGBA4ToARGB4, VK_FORMAT_B4G4R4A4_UNORM_PACK16}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_10_11_11 + // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 + // instead. + {{kLoadShaderIndexR11G11B10ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndexR11G11B10ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_11_11_10 + // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 + // instead. + {{kLoadShaderIndexR10G11B11ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndexR10G11B11ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_DXT1 + // VK_FORMAT_BC1_RGBA_UNORM_BLOCK is optional. + {{kLoadShaderIndex64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_DXT2_3 + // VK_FORMAT_BC2_UNORM_BLOCK is optional. + {{kLoadShaderIndex128bpb, VK_FORMAT_BC2_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_DXT4_5 + // VK_FORMAT_BC3_UNORM_BLOCK is optional. + {{kLoadShaderIndex128bpb, VK_FORMAT_BC3_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_16_16_16_16_EDRAM + // Not usable as a texture, also has -32...32 range. + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_24_8 + {{kLoadShaderIndexDepthUnorm, VK_FORMAT_R32_SFLOAT}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_24_8_FLOAT + {{kLoadShaderIndexDepthFloat, VK_FORMAT_R32_SFLOAT}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_16 + // VK_FORMAT_R16_UNORM and VK_FORMAT_R16_SNORM are optional. + {{kLoadShaderIndex16bpb, VK_FORMAT_R16_UNORM}, + {kLoadShaderIndex16bpb, VK_FORMAT_R16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_16_16 + // VK_FORMAT_R16G16_UNORM and VK_FORMAT_R16G16_SNORM are optional. + {{kLoadShaderIndex32bpb, VK_FORMAT_R16G16_UNORM}, + {kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_16_16_16_16 + // VK_FORMAT_R16G16B16A16_UNORM and VK_FORMAT_R16G16B16A16_SNORM are + // optional. + {{kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_16_EXPAND + {{kLoadShaderIndex16bpb, VK_FORMAT_R16_SFLOAT}, + {kLoadShaderIndex16bpb, VK_FORMAT_R16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_16_16_EXPAND + {{kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SFLOAT}, + {kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_16_16_16_16_EXPAND + {{kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + {kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_16_FLOAT + {{kLoadShaderIndex16bpb, VK_FORMAT_R16_SFLOAT}, + {kLoadShaderIndex16bpb, VK_FORMAT_R16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_16_16_FLOAT + {{kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SFLOAT}, + {kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_16_16_16_16_FLOAT + {{kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + {kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_32 + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_32_32 + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_32_32_32_32 + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_32_FLOAT + {{kLoadShaderIndex32bpb, VK_FORMAT_R32_SFLOAT}, + {kLoadShaderIndex32bpb, VK_FORMAT_R32_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_32_32_FLOAT + {{kLoadShaderIndex64bpb, VK_FORMAT_R32G32_SFLOAT}, + {kLoadShaderIndex64bpb, VK_FORMAT_R32G32_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_32_32_32_32_FLOAT + {{kLoadShaderIndex128bpb, VK_FORMAT_R32G32B32A32_SFLOAT}, + {kLoadShaderIndex128bpb, VK_FORMAT_R32G32B32A32_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_32_AS_8 + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_32_AS_8_8 + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_16_MPEG + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_16_16_MPEG + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_8_INTERLACED + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_32_AS_8_INTERLACED + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_32_AS_8_8_INTERLACED + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_16_INTERLACED + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_16_MPEG_INTERLACED + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_16_16_MPEG_INTERLACED + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_DXN + // VK_FORMAT_BC5_UNORM_BLOCK is optional. + {{kLoadShaderIndex128bpb, VK_FORMAT_BC5_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_8_8_8_8_AS_16_16_16_16 + {{kLoadShaderIndex32bpb, VK_FORMAT_R8G8B8A8_UNORM}, + {kLoadShaderIndex32bpb, VK_FORMAT_R8G8B8A8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_DXT1_AS_16_16_16_16 + // VK_FORMAT_BC1_RGBA_UNORM_BLOCK is optional. + {{kLoadShaderIndex64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_DXT2_3_AS_16_16_16_16 + // VK_FORMAT_BC2_UNORM_BLOCK is optional. + {{kLoadShaderIndex128bpb, VK_FORMAT_BC2_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_DXT4_5_AS_16_16_16_16 + // VK_FORMAT_BC3_UNORM_BLOCK is optional. + {{kLoadShaderIndex128bpb, VK_FORMAT_BC3_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_2_10_10_10_AS_16_16_16_16 + // VK_FORMAT_A2B10G10R10_SNORM_PACK32 is optional. + {{kLoadShaderIndex32bpb, VK_FORMAT_A2B10G10R10_UNORM_PACK32}, + {kLoadShaderIndex32bpb, VK_FORMAT_A2B10G10R10_SNORM_PACK32}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_10_11_11_AS_16_16_16_16 + // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 + // instead. + {{kLoadShaderIndexR11G11B10ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndexR11G11B10ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_11_11_10_AS_16_16_16_16 + // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 + // instead. + {{kLoadShaderIndexR10G11B11ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndexR10G11B11ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_32_32_32_FLOAT + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_DXT3A + {{kLoadShaderIndexDXT3A, VK_FORMAT_R8_UNORM}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_DXT5A + // VK_FORMAT_BC4_UNORM_BLOCK is optional. + {{kLoadShaderIndex64bpb, VK_FORMAT_BC4_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_CTX1 + {{kLoadShaderIndexCTX1, VK_FORMAT_R8G8_UNORM}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_DXT3A_AS_1_1_1_1 + {{kLoadShaderIndexDXT3AAs1111ToARGB4, VK_FORMAT_B4G4R4A4_UNORM_PACK16}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_8_8_8_8_GAMMA_EDRAM + // Not usable as a texture. + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_2_10_10_10_FLOAT_EDRAM + // Not usable as a texture. + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, +}; + +// Vulkan requires 2x1 (4:2:2) subsampled images to have an even width. +// Always decompressing them to RGBA8, which is required to be linear-filterable +// as UNORM and SNORM. + +const VulkanTextureCache::HostFormatPair + VulkanTextureCache::kHostFormatGBGRUnaligned = { + {kLoadShaderIndexGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_UNORM, false, true}, + {kLoadShaderIndexGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM, false, true}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB, + true}; + +const VulkanTextureCache::HostFormatPair + VulkanTextureCache::kHostFormatBGRGUnaligned = { + {kLoadShaderIndexBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_UNORM, false, true}, + {kLoadShaderIndexBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM, false, true}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB, + true}; + +VulkanTextureCache::~VulkanTextureCache() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + for (const std::pair& sampler_pair : + samplers_) { + dfn.vkDestroySampler(device, sampler_pair.second.sampler, nullptr); } samplers_.clear(); + COUNT_profile_set("gpu/texture_cache/vulkan/samplers", 0); + sampler_used_last_ = nullptr; + sampler_used_first_ = nullptr; + + if (null_image_view_3d_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, null_image_view_3d_, nullptr); + } + if (null_image_view_cube_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, null_image_view_cube_, nullptr); + } + if (null_image_view_2d_array_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, null_image_view_2d_array_, nullptr); + } + if (null_image_3d_ != VK_NULL_HANDLE) { + dfn.vkDestroyImage(device, null_image_3d_, nullptr); + } + if (null_image_2d_array_cube_ != VK_NULL_HANDLE) { + dfn.vkDestroyImage(device, null_image_2d_array_cube_, nullptr); + } + for (VkDeviceMemory null_images_memory : null_images_memory_) { + if (null_images_memory != VK_NULL_HANDLE) { + dfn.vkFreeMemory(device, null_images_memory, nullptr); + } + } + for (VkPipeline load_pipeline : load_pipelines_scaled_) { + if (load_pipeline != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, load_pipeline, nullptr); + } + } + for (VkPipeline load_pipeline : load_pipelines_) { + if (load_pipeline != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, load_pipeline, nullptr); + } + } + if (load_pipeline_layout_ != VK_NULL_HANDLE) { + dfn.vkDestroyPipelineLayout(device, load_pipeline_layout_, nullptr); + } + + // Textures memory is allocated using the Vulkan Memory Allocator, destroy all + // textures before destroying VMA. + DestroyAllTextures(true); + + if (vma_allocator_ != VK_NULL_HANDLE) { + vmaDestroyAllocator(vma_allocator_); + } } -void VulkanTextureCache::Scavenge() { - SCOPE_profile_cpu_f("gpu"); +void VulkanTextureCache::BeginSubmission(uint64_t new_submission_index) { + TextureCache::BeginSubmission(new_submission_index); - // Close any open descriptor pool batches - if (descriptor_pool_->has_open_batch()) { - descriptor_pool_->EndBatch(); + if (!null_images_cleared_) { + VkImage null_images[] = {null_image_2d_array_cube_, null_image_3d_}; + VkImageSubresourceRange null_image_subresource_range( + ui::vulkan::util::InitializeSubresourceRange()); + for (size_t i = 0; i < xe::countof(null_images); ++i) { + command_processor_.PushImageMemoryBarrier( + null_images[i], null_image_subresource_range, 0, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, false); + } + command_processor_.SubmitBarriers(true); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + // TODO(Triang3l): Find the return value for invalid texture fetch constants + // on the real hardware. + VkClearColorValue null_image_clear_color; + null_image_clear_color.float32[0] = 0.0f; + null_image_clear_color.float32[1] = 0.0f; + null_image_clear_color.float32[2] = 0.0f; + null_image_clear_color.float32[3] = 0.0f; + for (size_t i = 0; i < xe::countof(null_images); ++i) { + command_buffer.CmdVkClearColorImage( + null_images[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + &null_image_clear_color, 1, &null_image_subresource_range); + } + for (size_t i = 0; i < xe::countof(null_images); ++i) { + command_processor_.PushImageMemoryBarrier( + null_images[i], null_image_subresource_range, + VK_PIPELINE_STAGE_TRANSFER_BIT, guest_shader_pipeline_stages_, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, false); + } + null_images_cleared_ = true; + } +} + +void VulkanTextureCache::RequestTextures(uint32_t used_texture_mask) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + TextureCache::RequestTextures(used_texture_mask); + + // Transition the textures into the needed usage. + VkPipelineStageFlags dst_stage_mask; + VkAccessFlags dst_access_mask; + VkImageLayout new_layout; + GetTextureUsageMasks(VulkanTexture::Usage::kGuestShaderSampled, + dst_stage_mask, dst_access_mask, new_layout); + uint32_t textures_remaining = used_texture_mask; + uint32_t index; + while (xe::bit_scan_forward(textures_remaining, &index)) { + textures_remaining &= ~(uint32_t(1) << index); + const TextureBinding* binding = GetValidTextureBinding(index); + if (!binding) { + continue; + } + VulkanTexture* binding_texture = + static_cast(binding->texture); + if (binding_texture != nullptr) { + // Will be referenced by the command buffer, so mark as used. + binding_texture->MarkAsUsed(); + VulkanTexture::Usage old_usage = + binding_texture->SetUsage(VulkanTexture::Usage::kGuestShaderSampled); + if (old_usage != VulkanTexture::Usage::kGuestShaderSampled) { + VkPipelineStageFlags src_stage_mask; + VkAccessFlags src_access_mask; + VkImageLayout old_layout; + GetTextureUsageMasks(old_usage, src_stage_mask, src_access_mask, + old_layout); + command_processor_.PushImageMemoryBarrier( + binding_texture->image(), + ui::vulkan::util::InitializeSubresourceRange(), src_stage_mask, + dst_stage_mask, src_access_mask, dst_access_mask, old_layout, + new_layout); + } + } + VulkanTexture* binding_texture_signed = + static_cast(binding->texture_signed); + if (binding_texture_signed != nullptr) { + binding_texture_signed->MarkAsUsed(); + VulkanTexture::Usage old_usage = binding_texture_signed->SetUsage( + VulkanTexture::Usage::kGuestShaderSampled); + if (old_usage != VulkanTexture::Usage::kGuestShaderSampled) { + VkPipelineStageFlags src_stage_mask; + VkAccessFlags src_access_mask; + VkImageLayout old_layout; + GetTextureUsageMasks(old_usage, src_stage_mask, src_access_mask, + old_layout); + command_processor_.PushImageMemoryBarrier( + binding_texture_signed->image(), + ui::vulkan::util::InitializeSubresourceRange(), src_stage_mask, + dst_stage_mask, src_access_mask, dst_access_mask, old_layout, + new_layout); + } + } + } +} + +VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView( + uint32_t fetch_constant_index, xenos::FetchOpDimension dimension, + bool is_signed) const { + VkImageView image_view = VK_NULL_HANDLE; + const TextureBinding* binding = GetValidTextureBinding(fetch_constant_index); + if (binding && AreDimensionsCompatible(dimension, binding->key.dimension)) { + const VulkanTextureBinding& vulkan_binding = + vulkan_texture_bindings_[fetch_constant_index]; + image_view = is_signed ? vulkan_binding.image_view_signed + : vulkan_binding.image_view_unsigned; + } + if (image_view != VK_NULL_HANDLE) { + return image_view; + } + switch (dimension) { + case xenos::FetchOpDimension::k3DOrStacked: + return null_image_view_3d_; + case xenos::FetchOpDimension::kCube: + return null_image_view_cube_; + default: + return null_image_view_2d_array_; + } +} + +VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters( + const VulkanShader::SamplerBinding& binding) const { + const auto& regs = register_file(); + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); + + SamplerParameters parameters; + + xenos::ClampMode fetch_clamp_x, fetch_clamp_y, fetch_clamp_z; + texture_util::GetClampModesForDimension(fetch, fetch_clamp_x, fetch_clamp_y, + fetch_clamp_z); + parameters.clamp_x = NormalizeClampMode(fetch_clamp_x); + parameters.clamp_y = NormalizeClampMode(fetch_clamp_y); + parameters.clamp_z = NormalizeClampMode(fetch_clamp_z); + if (xenos::ClampModeUsesBorder(parameters.clamp_x) || + xenos::ClampModeUsesBorder(parameters.clamp_y) || + xenos::ClampModeUsesBorder(parameters.clamp_z)) { + parameters.border_color = fetch.border_color; + } else { + parameters.border_color = xenos::BorderColor::k_ABGR_Black; } - // Free unused descriptor sets - // TODO(DrChat): These sets could persist across frames, we just need a smart - // way to detect if they're unused and free them. - texture_sets_.clear(); - descriptor_pool_->Scavenge(); - staging_buffer_.Scavenge(); - - // Kill all pending delete textures. - RemoveInvalidatedTextures(); - if (!pending_delete_textures_.empty()) { - for (auto it = pending_delete_textures_.begin(); - it != pending_delete_textures_.end();) { - if (!FreeTexture(*it)) { - break; + xenos::TextureFilter mag_filter = + binding.mag_filter == xenos::TextureFilter::kUseFetchConst + ? fetch.mag_filter + : binding.mag_filter; + parameters.mag_linear = mag_filter == xenos::TextureFilter::kLinear; + xenos::TextureFilter min_filter = + binding.min_filter == xenos::TextureFilter::kUseFetchConst + ? fetch.min_filter + : binding.min_filter; + parameters.min_linear = min_filter == xenos::TextureFilter::kLinear; + xenos::TextureFilter mip_filter = + binding.mip_filter == xenos::TextureFilter::kUseFetchConst + ? fetch.mip_filter + : binding.mip_filter; + parameters.mip_linear = mip_filter == xenos::TextureFilter::kLinear; + if (parameters.mag_linear || parameters.min_linear || parameters.mip_linear) { + // Check if the texture is actually filterable on the host. + bool linear_filterable = true; + TextureKey texture_key; + uint8_t texture_swizzled_signs; + BindingInfoFromFetchConstant(fetch, texture_key, &texture_swizzled_signs); + if (texture_key.is_valid) { + const HostFormatPair& host_format_pair = GetHostFormatPair(texture_key); + if ((texture_util::IsAnySignNotSigned(texture_swizzled_signs) && + !host_format_pair.format_unsigned.linear_filterable) || + (texture_util::IsAnySignSigned(texture_swizzled_signs) && + !host_format_pair.format_signed.linear_filterable)) { + linear_filterable = false; } + } else { + linear_filterable = false; + } + if (!linear_filterable) { + parameters.mag_linear = 0; + parameters.min_linear = 0; + parameters.mip_linear = 0; + } + } + xenos::AnisoFilter aniso_filter = + binding.aniso_filter == xenos::AnisoFilter::kUseFetchConst + ? fetch.aniso_filter + : binding.aniso_filter; + parameters.aniso_filter = std::min(aniso_filter, max_anisotropy_); + parameters.mip_base_map = mip_filter == xenos::TextureFilter::kBaseMap; - it = pending_delete_textures_.erase(it); + uint32_t mip_min_level; + texture_util::GetSubresourcesFromFetchConstant(fetch, nullptr, nullptr, + nullptr, nullptr, nullptr, + &mip_min_level, nullptr); + parameters.mip_min_level = mip_min_level; + + return parameters; +} + +VkSampler VulkanTextureCache::UseSampler(SamplerParameters parameters, + bool& has_overflown_out) { + assert_true(command_processor_.submission_open()); + uint64_t submission_current = command_processor_.GetCurrentSubmission(); + + // Try to find an existing sampler. + auto it_existing = samplers_.find(parameters); + if (it_existing != samplers_.end()) { + std::pair& sampler = *it_existing; + assert_true(sampler.second.last_usage_submission <= submission_current); + // This is called very frequently, don't relink unless needed for caching. + if (sampler.second.last_usage_submission < submission_current) { + // Move to the front of the LRU queue. + sampler.second.last_usage_submission = submission_current; + if (sampler.second.used_next) { + if (sampler.second.used_previous) { + sampler.second.used_previous->second.used_next = + sampler.second.used_next; + } else { + sampler_used_first_ = sampler.second.used_next; + } + sampler.second.used_next->second.used_previous = + sampler.second.used_previous; + sampler.second.used_previous = sampler_used_last_; + sampler.second.used_next = nullptr; + sampler_used_last_->second.used_next = &sampler; + sampler_used_last_ = &sampler; + } + } + has_overflown_out = false; + return sampler.second.sampler; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // See if an existing sampler can be destroyed to create space for the new + // one. + if (samplers_.size() >= sampler_max_count_) { + assert_not_null(sampler_used_first_); + if (!sampler_used_first_) { + has_overflown_out = false; + return VK_NULL_HANDLE; + } + if (sampler_used_first_->second.last_usage_submission > + command_processor_.GetCompletedSubmission()) { + has_overflown_out = true; + return VK_NULL_HANDLE; + } + auto it_reuse = samplers_.find(sampler_used_first_->first); + dfn.vkDestroySampler(device, sampler_used_first_->second.sampler, nullptr); + if (sampler_used_first_->second.used_next) { + sampler_used_first_->second.used_next->second.used_previous = + sampler_used_first_->second.used_previous; + } else { + sampler_used_last_ = sampler_used_first_->second.used_previous; + } + sampler_used_first_ = sampler_used_first_->second.used_next; + assert_true(it_reuse != samplers_.end()); + if (it_reuse != samplers_.end()) { + // This destroys the Sampler object. + samplers_.erase(it_reuse); + COUNT_profile_set("gpu/texture_cache/vulkan/samplers", samplers_.size()); + } else { + has_overflown_out = false; + return VK_NULL_HANDLE; + } + } + + // Create a new sampler and make it the least recently used. + // The values are normalized, and unsupported ones are excluded, in + // GetSamplerParameters. + VkSamplerCreateInfo sampler_create_info = {}; + sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + // TODO(Triang3l): VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT if + // VK_EXT_non_seamless_cube_map and the nonSeamlessCubeMap feature are + // supported. + sampler_create_info.magFilter = + parameters.mag_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + sampler_create_info.minFilter = + parameters.mag_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + sampler_create_info.mipmapMode = parameters.mag_linear + ? VK_SAMPLER_MIPMAP_MODE_LINEAR + : VK_SAMPLER_MIPMAP_MODE_NEAREST; + static const VkSamplerAddressMode kAddressModeMap[] = { + // kRepeat + VK_SAMPLER_ADDRESS_MODE_REPEAT, + // kMirroredRepeat + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, + // kClampToEdge + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + // kMirrorClampToEdge + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + // kClampToHalfway + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + // kMirrorClampToHalfway + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + // kClampToBorder + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + // kMirrorClampToBorder + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + }; + sampler_create_info.addressModeU = + kAddressModeMap[uint32_t(parameters.clamp_x)]; + sampler_create_info.addressModeV = + kAddressModeMap[uint32_t(parameters.clamp_y)]; + sampler_create_info.addressModeW = + kAddressModeMap[uint32_t(parameters.clamp_z)]; + // LOD biasing is performed in shaders. + if (parameters.aniso_filter != xenos::AnisoFilter::kDisabled) { + sampler_create_info.anisotropyEnable = VK_TRUE; + sampler_create_info.maxAnisotropy = + float(UINT32_C(1) << (uint32_t(parameters.aniso_filter) - + uint32_t(xenos::AnisoFilter::kMax_1_1))); + } + sampler_create_info.minLod = float(parameters.mip_min_level); + if (parameters.mip_base_map) { + assert_false(parameters.mip_linear); + sampler_create_info.maxLod = sampler_create_info.minLod + 0.25f; + } else { + sampler_create_info.maxLod = VK_LOD_CLAMP_NONE; + } + // TODO(Triang3l): Custom border colors for CrYCb / YCrCb. + switch (parameters.border_color) { + case xenos::BorderColor::k_ABGR_White: + sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + break; + default: + sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + break; + } + VkSampler vulkan_sampler; + if (dfn.vkCreateSampler(device, &sampler_create_info, nullptr, + &vulkan_sampler) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to create the sampler for parameters " + "0x{:08X}", + parameters.value); + has_overflown_out = false; + return VK_NULL_HANDLE; + } + std::pair& new_sampler = + *(samplers_ + .emplace(std::piecewise_construct, + std::forward_as_tuple(parameters), std::forward_as_tuple()) + .first); + COUNT_profile_set("gpu/texture_cache/vulkan/samplers", samplers_.size()); + new_sampler.second.sampler = vulkan_sampler; + new_sampler.second.last_usage_submission = submission_current; + new_sampler.second.used_previous = sampler_used_last_; + new_sampler.second.used_next = nullptr; + if (sampler_used_last_) { + sampler_used_last_->second.used_next = &new_sampler; + } else { + sampler_used_first_ = &new_sampler; + } + sampler_used_last_ = &new_sampler; + return vulkan_sampler; +} + +uint64_t VulkanTextureCache::GetSubmissionToAwaitOnSamplerOverflow( + uint32_t overflowed_sampler_count) const { + if (!overflowed_sampler_count) { + return 0; + } + std::pair* sampler_used = + sampler_used_first_; + if (!sampler_used_first_) { + return 0; + } + for (uint32_t samplers_remaining = overflowed_sampler_count - 1; + samplers_remaining; --samplers_remaining) { + std::pair* sampler_used_next = + sampler_used->second.used_next; + if (!sampler_used_next) { + break; + } + sampler_used = sampler_used_next; + } + return sampler_used->second.last_usage_submission; +} + +VkImageView VulkanTextureCache::RequestSwapTexture( + uint32_t& width_scaled_out, uint32_t& height_scaled_out, + xenos::TextureFormat& format_out) { + const auto& regs = register_file(); + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); + TextureKey key; + BindingInfoFromFetchConstant(fetch, key, nullptr); + if (!key.is_valid || key.base_page == 0 || + key.dimension != xenos::DataDimension::k2DOrStacked) { + return nullptr; + } + VulkanTexture* texture = + static_cast(FindOrCreateTexture(key)); + if (!texture) { + return VK_NULL_HANDLE; + } + VkImageView texture_view = texture->GetView( + false, GuestToHostSwizzle(fetch.swizzle, GetHostFormatSwizzle(key)), + false); + if (texture_view == VK_NULL_HANDLE) { + return VK_NULL_HANDLE; + } + if (!LoadTextureData(*texture)) { + return VK_NULL_HANDLE; + } + texture->MarkAsUsed(); + VulkanTexture::Usage old_usage = + texture->SetUsage(VulkanTexture::Usage::kSwapSampled); + if (old_usage != VulkanTexture::Usage::kSwapSampled) { + VkPipelineStageFlags src_stage_mask, dst_stage_mask; + VkAccessFlags src_access_mask, dst_access_mask; + VkImageLayout old_layout, new_layout; + GetTextureUsageMasks(old_usage, src_stage_mask, src_access_mask, + old_layout); + GetTextureUsageMasks(VulkanTexture::Usage::kSwapSampled, dst_stage_mask, + dst_access_mask, new_layout); + command_processor_.PushImageMemoryBarrier( + texture->image(), ui::vulkan::util::InitializeSubresourceRange(), + src_stage_mask, dst_stage_mask, src_access_mask, dst_access_mask, + old_layout, new_layout); + } + // Only texture->key, not the result of BindingInfoFromFetchConstant, contains + // whether the texture is scaled. + key = texture->key(); + width_scaled_out = + key.GetWidth() * (key.scaled_resolve ? draw_resolution_scale_x() : 1); + height_scaled_out = + key.GetHeight() * (key.scaled_resolve ? draw_resolution_scale_y() : 1); + format_out = key.format; + return texture_view; +} + +bool VulkanTextureCache::IsSignedVersionSeparateForFormat( + TextureKey key) const { + const HostFormatPair& host_format_pair = GetHostFormatPair(key); + if (host_format_pair.format_unsigned.format == VK_FORMAT_UNDEFINED || + host_format_pair.format_signed.format == VK_FORMAT_UNDEFINED) { + // Just one signedness. + return false; + } + return !host_format_pair.unsigned_signed_compatible; +} + +uint32_t VulkanTextureCache::GetHostFormatSwizzle(TextureKey key) const { + return GetHostFormatPair(key).swizzle; +} + +uint32_t VulkanTextureCache::GetMaxHostTextureWidthHeight( + xenos::DataDimension dimension) const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + switch (dimension) { + case xenos::DataDimension::k1D: + case xenos::DataDimension::k2DOrStacked: + // 1D and 2D are emulated as 2D arrays. + return device_limits.maxImageDimension2D; + case xenos::DataDimension::k3D: + return device_limits.maxImageDimension3D; + case xenos::DataDimension::kCube: + return device_limits.maxImageDimensionCube; + default: + assert_unhandled_case(dimension); + return 0; + } +} + +uint32_t VulkanTextureCache::GetMaxHostTextureDepthOrArraySize( + xenos::DataDimension dimension) const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + switch (dimension) { + case xenos::DataDimension::k1D: + case xenos::DataDimension::k2DOrStacked: + // 1D and 2D are emulated as 2D arrays. + return device_limits.maxImageArrayLayers; + case xenos::DataDimension::k3D: + return device_limits.maxImageDimension3D; + case xenos::DataDimension::kCube: + // Not requesting the imageCubeArray feature, and the Xenos doesn't + // support cube map arrays. + return 6; + default: + assert_unhandled_case(dimension); + return 0; + } +} + +std::unique_ptr VulkanTextureCache::CreateTexture( + TextureKey key) { + VkFormat formats[] = {VK_FORMAT_UNDEFINED, VK_FORMAT_UNDEFINED}; + const HostFormatPair& host_format = GetHostFormatPair(key); + if (host_format.format_signed.format == VK_FORMAT_UNDEFINED) { + // Only the unsigned format may be available, if at all. + formats[0] = host_format.format_unsigned.format; + } else if (host_format.format_unsigned.format == VK_FORMAT_UNDEFINED) { + // Only the signed format may be available, if at all. + formats[0] = host_format.format_signed.format; + } else { + // Both unsigned and signed formats are available. + if (IsSignedVersionSeparateForFormat(key)) { + formats[0] = key.signed_separate ? host_format.format_signed.format + : host_format.format_unsigned.format; + } else { + // Same format for unsigned and signed, or compatible formats. + formats[0] = host_format.format_unsigned.format; + if (host_format.format_signed.format != + host_format.format_unsigned.format) { + assert_not_zero(host_format.unsigned_signed_compatible); + formats[1] = host_format.format_signed.format; + } + } + } + if (formats[0] == VK_FORMAT_UNDEFINED) { + // TODO(Triang3l): If there's no best format, set that a format unsupported + // by the emulator completely is used to report at the end of the frame. + return nullptr; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + bool is_3d = key.dimension == xenos::DataDimension::k3D; + uint32_t depth_or_array_size = key.GetDepthOrArraySize(); + + VkImageCreateInfo image_create_info; + VkImageCreateInfo* image_create_info_last = &image_create_info; + image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_create_info.pNext = nullptr; + image_create_info.flags = 0; + if (formats[1] != VK_FORMAT_UNDEFINED) { + image_create_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + } + if (key.dimension == xenos::DataDimension::kCube) { + image_create_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + } + image_create_info.imageType = is_3d ? VK_IMAGE_TYPE_3D : VK_IMAGE_TYPE_2D; + image_create_info.format = formats[0]; + image_create_info.extent.width = key.GetWidth(); + image_create_info.extent.height = key.GetHeight(); + if (key.scaled_resolve) { + image_create_info.extent.width *= draw_resolution_scale_x(); + image_create_info.extent.height *= draw_resolution_scale_y(); + } + image_create_info.extent.depth = is_3d ? depth_or_array_size : 1; + image_create_info.mipLevels = key.mip_max_level + 1; + image_create_info.arrayLayers = is_3d ? 1 : depth_or_array_size; + image_create_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_create_info.usage = + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_create_info.queueFamilyIndexCount = 0; + image_create_info.pQueueFamilyIndices = nullptr; + image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VkImageFormatListCreateInfoKHR image_format_list_create_info; + if (formats[1] != VK_FORMAT_UNDEFINED && + provider.device_extensions().khr_image_format_list) { + image_create_info_last->pNext = &image_format_list_create_info; + image_create_info_last = + reinterpret_cast(&image_format_list_create_info); + image_format_list_create_info.sType = + VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR; + image_format_list_create_info.pNext = nullptr; + image_format_list_create_info.viewFormatCount = 2; + image_format_list_create_info.pViewFormats = formats; + } + + VmaAllocationCreateInfo allocation_create_info = {}; + allocation_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + + VkImage image; + VmaAllocation allocation; + if (vmaCreateImage(vma_allocator_, &image_create_info, + &allocation_create_info, &image, &allocation, nullptr)) { + return nullptr; + } + + return std::unique_ptr( + new VulkanTexture(*this, key, image, allocation)); +} + +bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, + bool load_base, + bool load_mips) { + VulkanTexture& vulkan_texture = static_cast(texture); + TextureKey texture_key = vulkan_texture.key(); + + // Get the pipeline. + const HostFormatPair& host_format_pair = GetHostFormatPair(texture_key); + bool host_format_is_signed; + if (IsSignedVersionSeparateForFormat(texture_key)) { + host_format_is_signed = bool(texture_key.signed_separate); + } else { + host_format_is_signed = + host_format_pair.format_unsigned.load_shader == kLoadShaderIndexUnknown; + } + const HostFormat& host_format = host_format_is_signed + ? host_format_pair.format_signed + : host_format_pair.format_unsigned; + LoadShaderIndex load_shader = host_format.load_shader; + if (load_shader == kLoadShaderIndexUnknown) { + return false; + } + VkPipeline pipeline = texture_key.scaled_resolve + ? load_pipelines_scaled_[load_shader] + : load_pipelines_[load_shader]; + if (pipeline == VK_NULL_HANDLE) { + return false; + } + const LoadShaderInfo& load_shader_info = GetLoadShaderInfo(load_shader); + + // Get the guest layout. + const texture_util::TextureGuestLayout& guest_layout = + vulkan_texture.guest_layout(); + xenos::DataDimension dimension = texture_key.dimension; + bool is_3d = dimension == xenos::DataDimension::k3D; + uint32_t width = texture_key.GetWidth(); + uint32_t height = texture_key.GetHeight(); + uint32_t depth_or_array_size = texture_key.GetDepthOrArraySize(); + uint32_t depth = is_3d ? depth_or_array_size : 1; + uint32_t array_size = is_3d ? 1 : depth_or_array_size; + xenos::TextureFormat guest_format = texture_key.format; + const FormatInfo* guest_format_info = FormatInfo::Get(guest_format); + uint32_t block_width = guest_format_info->block_width; + uint32_t block_height = guest_format_info->block_height; + uint32_t bytes_per_block = guest_format_info->bytes_per_block(); + uint32_t level_first = load_base ? 0 : 1; + uint32_t level_last = load_mips ? texture_key.mip_max_level : 0; + assert_true(level_first <= level_last); + uint32_t level_packed = guest_layout.packed_level; + uint32_t level_stored_first = std::min(level_first, level_packed); + uint32_t level_stored_last = std::min(level_last, level_packed); + uint32_t texture_resolution_scale_x = + texture_key.scaled_resolve ? draw_resolution_scale_x() : 1; + uint32_t texture_resolution_scale_y = + texture_key.scaled_resolve ? draw_resolution_scale_y() : 1; + + // The loop counter can mean two things depending on whether the packed mip + // tail is stored as mip 0, because in this case, it would be ambiguous since + // both the base and the mips would be on "level 0", but stored in separate + // places. + uint32_t loop_level_first, loop_level_last; + if (level_packed == 0) { + // Packed mip tail is the level 0 - may need to load mip tails for the base, + // the mips, or both. + // Loop iteration 0 - base packed mip tail. + // Loop iteration 1 - mips packed mip tail. + loop_level_first = uint32_t(level_first != 0); + loop_level_last = uint32_t(level_last != 0); + } else { + // Packed mip tail is not the level 0. + // Loop iteration is the actual level being loaded. + loop_level_first = level_stored_first; + loop_level_last = level_stored_last; + } + + // Get the host layout and the buffer. + uint32_t host_block_width = host_format.block_compressed ? block_width : 1; + uint32_t host_block_height = host_format.block_compressed ? block_height : 1; + uint32_t host_x_blocks_per_thread = + UINT32_C(1) << load_shader_info.guest_x_blocks_per_thread_log2; + if (!host_format.block_compressed) { + // Decompressing guest blocks. + host_x_blocks_per_thread *= block_width; + } + VkDeviceSize host_buffer_size = 0; + struct HostLayout { + VkDeviceSize offset_bytes; + VkDeviceSize slice_size_bytes; + uint32_t x_pitch_blocks; + uint32_t y_pitch_blocks; + }; + HostLayout host_layout_base; + // Indexing is the same as for guest stored mips: + // 1...min(level_last, level_packed) if level_packed is not 0, or only 0 if + // level_packed == 0. + HostLayout host_layout_mips[xenos::kTextureMaxMips]; + for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; + ++loop_level) { + bool is_base = loop_level == 0; + uint32_t level = (level_packed == 0) ? 0 : loop_level; + HostLayout& level_host_layout = + is_base ? host_layout_base : host_layout_mips[level]; + level_host_layout.offset_bytes = host_buffer_size; + uint32_t level_guest_x_extent_texels_unscaled; + uint32_t level_guest_y_extent_texels_unscaled; + uint32_t level_guest_z_extent_texels; + if (level == level_packed) { + // Loading the packed tail for the base or the mips - load the whole tail + // to copy regions out of it. + const texture_util::TextureGuestLayout::Level& guest_layout_packed = + is_base ? guest_layout.base : guest_layout.mips[level]; + level_guest_x_extent_texels_unscaled = + guest_layout_packed.x_extent_blocks * block_width; + level_guest_y_extent_texels_unscaled = + guest_layout_packed.y_extent_blocks * block_height; + level_guest_z_extent_texels = guest_layout_packed.z_extent; + } else { + level_guest_x_extent_texels_unscaled = + std::max(width >> level, UINT32_C(1)); + level_guest_y_extent_texels_unscaled = + std::max(height >> level, UINT32_C(1)); + level_guest_z_extent_texels = std::max(depth >> level, UINT32_C(1)); + } + level_host_layout.x_pitch_blocks = xe::round_up( + (level_guest_x_extent_texels_unscaled * texture_resolution_scale_x + + (host_block_width - 1)) / + host_block_width, + host_x_blocks_per_thread); + level_host_layout.y_pitch_blocks = + (level_guest_y_extent_texels_unscaled * texture_resolution_scale_y + + (host_block_height - 1)) / + host_block_height; + level_host_layout.slice_size_bytes = + VkDeviceSize(load_shader_info.bytes_per_host_block) * + level_host_layout.x_pitch_blocks * level_host_layout.y_pitch_blocks * + level_guest_z_extent_texels; + host_buffer_size += level_host_layout.slice_size_bytes * array_size; + } + VulkanCommandProcessor::ScratchBufferAcquisition scratch_buffer_acquisition( + command_processor_.AcquireScratchGpuBuffer( + host_buffer_size, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT)); + VkBuffer scratch_buffer = scratch_buffer_acquisition.buffer(); + if (scratch_buffer == VK_NULL_HANDLE) { + return false; + } + + // Begin loading. + // TODO(Triang3l): Going from one descriptor to another on per-array-layer + // or even per-8-depth-slices level to stay within maxStorageBufferRange. + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VulkanSharedMemory& vulkan_shared_memory = + static_cast(shared_memory()); + std::array write_descriptor_sets; + uint32_t write_descriptor_set_count = 0; + VkDescriptorSet descriptor_set_dest = + command_processor_.AllocateSingleTransientDescriptor( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + if (!descriptor_set_dest) { + return false; + } + VkDescriptorBufferInfo write_descriptor_set_dest_buffer_info; + { + write_descriptor_set_dest_buffer_info.buffer = scratch_buffer; + write_descriptor_set_dest_buffer_info.offset = 0; + write_descriptor_set_dest_buffer_info.range = host_buffer_size; + VkWriteDescriptorSet& write_descriptor_set_dest = + write_descriptor_sets[write_descriptor_set_count++]; + write_descriptor_set_dest.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_dest.pNext = nullptr; + write_descriptor_set_dest.dstSet = descriptor_set_dest; + write_descriptor_set_dest.dstBinding = 0; + write_descriptor_set_dest.dstArrayElement = 0; + write_descriptor_set_dest.descriptorCount = 1; + write_descriptor_set_dest.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_set_dest.pImageInfo = nullptr; + write_descriptor_set_dest.pBufferInfo = + &write_descriptor_set_dest_buffer_info; + write_descriptor_set_dest.pTexelBufferView = nullptr; + } + // TODO(Triang3l): Use a single 512 MB shared memory binding if possible. + // TODO(Triang3l): Scaled resolve buffer bindings. + // Aligning because if the data for a vector in a storage buffer is provided + // partially, the value read may still be (0, 0, 0, 0), and small (especially + // linear) textures won't be loaded correctly. + uint32_t source_length_alignment = UINT32_C(1) + << load_shader_info.source_bpe_log2; + VkDescriptorSet descriptor_set_source_base = VK_NULL_HANDLE; + VkDescriptorSet descriptor_set_source_mips = VK_NULL_HANDLE; + VkDescriptorBufferInfo write_descriptor_set_source_base_buffer_info; + VkDescriptorBufferInfo write_descriptor_set_source_mips_buffer_info; + if (level_first == 0) { + descriptor_set_source_base = + command_processor_.AllocateSingleTransientDescriptor( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + if (!descriptor_set_source_base) { + return false; + } + write_descriptor_set_source_base_buffer_info.buffer = + vulkan_shared_memory.buffer(); + write_descriptor_set_source_base_buffer_info.offset = texture_key.base_page + << 12; + write_descriptor_set_source_base_buffer_info.range = + xe::align(vulkan_texture.GetGuestBaseSize(), source_length_alignment); + VkWriteDescriptorSet& write_descriptor_set_source_base = + write_descriptor_sets[write_descriptor_set_count++]; + write_descriptor_set_source_base.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_source_base.pNext = nullptr; + write_descriptor_set_source_base.dstSet = descriptor_set_source_base; + write_descriptor_set_source_base.dstBinding = 0; + write_descriptor_set_source_base.dstArrayElement = 0; + write_descriptor_set_source_base.descriptorCount = 1; + write_descriptor_set_source_base.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_set_source_base.pImageInfo = nullptr; + write_descriptor_set_source_base.pBufferInfo = + &write_descriptor_set_source_base_buffer_info; + write_descriptor_set_source_base.pTexelBufferView = nullptr; + } + if (level_last != 0) { + descriptor_set_source_mips = + command_processor_.AllocateSingleTransientDescriptor( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + if (!descriptor_set_source_mips) { + return false; + } + write_descriptor_set_source_mips_buffer_info.buffer = + vulkan_shared_memory.buffer(); + write_descriptor_set_source_mips_buffer_info.offset = texture_key.mip_page + << 12; + write_descriptor_set_source_mips_buffer_info.range = + xe::align(vulkan_texture.GetGuestMipsSize(), source_length_alignment); + VkWriteDescriptorSet& write_descriptor_set_source_mips = + write_descriptor_sets[write_descriptor_set_count++]; + write_descriptor_set_source_mips.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_source_mips.pNext = nullptr; + write_descriptor_set_source_mips.dstSet = descriptor_set_source_mips; + write_descriptor_set_source_mips.dstBinding = 0; + write_descriptor_set_source_mips.dstArrayElement = 0; + write_descriptor_set_source_mips.descriptorCount = 1; + write_descriptor_set_source_mips.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_set_source_mips.pImageInfo = nullptr; + write_descriptor_set_source_mips.pBufferInfo = + &write_descriptor_set_source_mips_buffer_info; + write_descriptor_set_source_mips.pTexelBufferView = nullptr; + } + if (write_descriptor_set_count) { + dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count, + write_descriptor_sets.data(), 0, nullptr); + } + vulkan_shared_memory.Use(VulkanSharedMemory::Usage::kRead); + + // Submit the copy buffer population commands. + + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + command_processor_.BindExternalComputePipeline(pipeline); + + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, load_pipeline_layout_, + kLoadDescriptorSetIndexDestination, 1, &descriptor_set_dest, 0, nullptr); + + VkDescriptorSet descriptor_set_source_current = VK_NULL_HANDLE; + + LoadConstants load_constants; + load_constants.is_tiled_3d_endian_scale = + uint32_t(texture_key.tiled) | (uint32_t(is_3d) << 1) | + (uint32_t(texture_key.endianness) << 2) | + (texture_resolution_scale_x << 4) | (texture_resolution_scale_y << 6); + + uint32_t guest_x_blocks_per_group_log2 = + load_shader_info.GetGuestXBlocksPerGroupLog2(); + for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; + ++loop_level) { + bool is_base = loop_level == 0; + uint32_t level = (level_packed == 0) ? 0 : loop_level; + + VkDescriptorSet descriptor_set_source = + is_base ? descriptor_set_source_base : descriptor_set_source_mips; + if (descriptor_set_source_current != descriptor_set_source) { + descriptor_set_source_current = descriptor_set_source; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, load_pipeline_layout_, + kLoadDescriptorSetIndexSource, 1, &descriptor_set_source, 0, nullptr); } - COUNT_profile_set("gpu/texture_cache/pending_deletes", - pending_delete_textures_.size()); + // TODO(Triang3l): guest_offset relative to the storage buffer origin. + load_constants.guest_offset = 0; + if (!is_base) { + load_constants.guest_offset += + guest_layout.mip_offsets_bytes[level] * + (texture_resolution_scale_x * texture_resolution_scale_y); + } + const texture_util::TextureGuestLayout::Level& level_guest_layout = + is_base ? guest_layout.base : guest_layout.mips[level]; + uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes; + if (texture_key.tiled) { + // Shaders expect pitch in blocks for tiled textures. + level_guest_pitch /= bytes_per_block; + assert_zero(level_guest_pitch & (xenos::kTextureTileWidthHeight - 1)); + } + load_constants.guest_pitch_aligned = level_guest_pitch; + load_constants.guest_z_stride_block_rows_aligned = + level_guest_layout.z_slice_stride_block_rows; + assert_true(dimension != xenos::DataDimension::k3D || + !(load_constants.guest_z_stride_block_rows_aligned & + (xenos::kTextureTileWidthHeight - 1))); + + uint32_t level_width, level_height, level_depth; + if (level == level_packed) { + // This is the packed mip tail, containing not only the specified level, + // but also other levels at different offsets - load the entire needed + // extents. + level_width = level_guest_layout.x_extent_blocks * block_width; + level_height = level_guest_layout.y_extent_blocks * block_height; + level_depth = level_guest_layout.z_extent; + } else { + level_width = std::max(width >> level, UINT32_C(1)); + level_height = std::max(height >> level, UINT32_C(1)); + level_depth = std::max(depth >> level, UINT32_C(1)); + } + load_constants.size_blocks[0] = (level_width + (block_width - 1)) / + block_width * texture_resolution_scale_x; + load_constants.size_blocks[1] = (level_height + (block_height - 1)) / + block_height * texture_resolution_scale_y; + load_constants.size_blocks[2] = level_depth; + load_constants.height_texels = level_height; + + uint32_t group_count_x = + (load_constants.size_blocks[0] + + ((UINT32_C(1) << guest_x_blocks_per_group_log2) - 1)) >> + guest_x_blocks_per_group_log2; + uint32_t group_count_y = + (load_constants.size_blocks[1] + + ((UINT32_C(1) << kLoadGuestYBlocksPerGroupLog2) - 1)) >> + kLoadGuestYBlocksPerGroupLog2; + + // TODO(Triang3l): host_offset relative to the storage buffer origin. + const HostLayout& level_host_layout = + is_base ? host_layout_base : host_layout_mips[level]; + load_constants.host_offset = uint32_t(level_host_layout.offset_bytes); + load_constants.host_pitch = load_shader_info.bytes_per_host_block * + level_host_layout.x_pitch_blocks; + + uint32_t level_array_slice_stride_bytes_scaled = + level_guest_layout.array_slice_stride_bytes * + (texture_resolution_scale_x * texture_resolution_scale_y); + for (uint32_t slice = 0; slice < array_size; ++slice) { + VkDescriptorSet descriptor_set_constants; + void* constants_mapping = + command_processor_.WriteTransientUniformBufferBinding( + sizeof(load_constants), + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kUniformBufferCompute, + descriptor_set_constants); + if (!constants_mapping) { + return false; + } + std::memcpy(constants_mapping, &load_constants, sizeof(load_constants)); + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, load_pipeline_layout_, + kLoadDescriptorSetIndexConstants, 1, &descriptor_set_constants, 0, + nullptr); + command_processor_.SubmitBarriers(true); + command_buffer.CmdVkDispatch(group_count_x, group_count_y, + load_constants.size_blocks[2]); + load_constants.guest_offset += level_array_slice_stride_bytes_scaled; + load_constants.host_offset += + uint32_t(level_host_layout.slice_size_bytes); + } } + + // Submit copying from the copy buffer to the host texture. + command_processor_.PushBufferMemoryBarrier( + scratch_buffer, 0, VK_WHOLE_SIZE, + scratch_buffer_acquisition.SetStageMask(VK_PIPELINE_STAGE_TRANSFER_BIT), + VK_PIPELINE_STAGE_TRANSFER_BIT, + scratch_buffer_acquisition.SetAccessMask(VK_ACCESS_TRANSFER_READ_BIT), + VK_ACCESS_TRANSFER_READ_BIT); + vulkan_texture.MarkAsUsed(); + VulkanTexture::Usage texture_old_usage = + vulkan_texture.SetUsage(VulkanTexture::Usage::kTransferDestination); + if (texture_old_usage != VulkanTexture::Usage::kTransferDestination) { + VkPipelineStageFlags texture_src_stage_mask, texture_dst_stage_mask; + VkAccessFlags texture_src_access_mask, texture_dst_access_mask; + VkImageLayout texture_old_layout, texture_new_layout; + GetTextureUsageMasks(texture_old_usage, texture_src_stage_mask, + texture_src_access_mask, texture_old_layout); + GetTextureUsageMasks(VulkanTexture::Usage::kTransferDestination, + texture_dst_stage_mask, texture_dst_access_mask, + texture_new_layout); + command_processor_.PushImageMemoryBarrier( + vulkan_texture.image(), ui::vulkan::util::InitializeSubresourceRange(), + texture_src_stage_mask, texture_dst_stage_mask, texture_src_access_mask, + texture_dst_access_mask, texture_old_layout, texture_new_layout); + } + command_processor_.SubmitBarriers(true); + VkBufferImageCopy* copy_regions = command_buffer.CmdCopyBufferToImageEmplace( + scratch_buffer, vulkan_texture.image(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, level_last - level_first + 1); + for (uint32_t level = level_first; level <= level_last; ++level) { + VkBufferImageCopy& copy_region = copy_regions[level - level_first]; + const HostLayout& level_host_layout = + level != 0 ? host_layout_mips[std::min(level, level_packed)] + : host_layout_base; + copy_region.bufferOffset = level_host_layout.offset_bytes; + if (level >= level_packed) { + uint32_t level_offset_blocks_x, level_offset_blocks_y, level_offset_z; + texture_util::GetPackedMipOffset(width, height, depth, guest_format, + level, level_offset_blocks_x, + level_offset_blocks_y, level_offset_z); + uint32_t level_offset_host_blocks_x = + texture_resolution_scale_x * level_offset_blocks_x; + uint32_t level_offset_host_blocks_y = + texture_resolution_scale_y * level_offset_blocks_y; + if (!host_format.block_compressed) { + level_offset_host_blocks_x *= block_width; + level_offset_host_blocks_y *= block_height; + } + copy_region.bufferOffset += + load_shader_info.bytes_per_host_block * + (level_offset_host_blocks_x + + level_host_layout.x_pitch_blocks * + (level_offset_host_blocks_y + level_host_layout.y_pitch_blocks * + VkDeviceSize(level_offset_z))); + } + copy_region.bufferRowLength = + level_host_layout.x_pitch_blocks * host_block_width; + copy_region.bufferImageHeight = + level_host_layout.y_pitch_blocks * host_block_height; + copy_region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy_region.imageSubresource.mipLevel = level; + copy_region.imageSubresource.baseArrayLayer = 0; + copy_region.imageSubresource.layerCount = array_size; + copy_region.imageOffset.x = 0; + copy_region.imageOffset.y = 0; + copy_region.imageOffset.z = 0; + copy_region.imageExtent.width = + std::max((width * texture_resolution_scale_x) >> level, UINT32_C(1)); + copy_region.imageExtent.height = + std::max((height * texture_resolution_scale_y) >> level, UINT32_C(1)); + copy_region.imageExtent.depth = std::max(depth >> level, UINT32_C(1)); + } + + return true; +} + +void VulkanTextureCache::UpdateTextureBindingsImpl( + uint32_t fetch_constant_mask) { + uint32_t bindings_remaining = fetch_constant_mask; + uint32_t binding_index; + while (xe::bit_scan_forward(bindings_remaining, &binding_index)) { + bindings_remaining &= ~(UINT32_C(1) << binding_index); + VulkanTextureBinding& vulkan_binding = + vulkan_texture_bindings_[binding_index]; + vulkan_binding.Reset(); + const TextureBinding* binding = GetValidTextureBinding(binding_index); + if (!binding) { + continue; + } + if (IsSignedVersionSeparateForFormat(binding->key)) { + if (binding->texture && + texture_util::IsAnySignNotSigned(binding->swizzled_signs)) { + vulkan_binding.image_view_unsigned = + static_cast(binding->texture) + ->GetView(false, binding->host_swizzle); + } + if (binding->texture_signed && + texture_util::IsAnySignSigned(binding->swizzled_signs)) { + vulkan_binding.image_view_signed = + static_cast(binding->texture_signed) + ->GetView(true, binding->host_swizzle); + } + } else { + VulkanTexture* texture = static_cast(binding->texture); + if (texture) { + if (texture_util::IsAnySignNotSigned(binding->swizzled_signs)) { + vulkan_binding.image_view_unsigned = + texture->GetView(false, binding->host_swizzle); + } + if (texture_util::IsAnySignSigned(binding->swizzled_signs)) { + vulkan_binding.image_view_signed = + texture->GetView(true, binding->host_swizzle); + } + } + } + } +} + +VulkanTextureCache::VulkanTexture::VulkanTexture( + VulkanTextureCache& texture_cache, const TextureKey& key, VkImage image, + VmaAllocation allocation) + : Texture(texture_cache, key), image_(image), allocation_(allocation) { + VmaAllocationInfo allocation_info; + vmaGetAllocationInfo(texture_cache.vma_allocator_, allocation_, + &allocation_info); + SetHostMemoryUsage(uint64_t(allocation_info.size)); +} + +VulkanTextureCache::VulkanTexture::~VulkanTexture() { + const VulkanTextureCache& vulkan_texture_cache = + static_cast(texture_cache()); + const ui::vulkan::VulkanProvider& provider = + vulkan_texture_cache.command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + for (const auto& view_pair : views_) { + dfn.vkDestroyImageView(device, view_pair.second, nullptr); + } + vmaDestroyImage(vulkan_texture_cache.vma_allocator_, image_, allocation_); +} + +VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed, + uint32_t host_swizzle, + bool is_array) { + xenos::DataDimension dimension = key().dimension; + if (dimension == xenos::DataDimension::k3D || + dimension == xenos::DataDimension::kCube) { + is_array = false; + } + + const VulkanTextureCache& vulkan_texture_cache = + static_cast(texture_cache()); + + ViewKey view_key; + + const HostFormatPair& host_format_pair = + vulkan_texture_cache.GetHostFormatPair(key()); + VkFormat format = (is_signed ? host_format_pair.format_signed + : host_format_pair.format_unsigned) + .format; + if (format == VK_FORMAT_UNDEFINED) { + return VK_NULL_HANDLE; + } + // If not distinguishing between unsigned and signed formats for the same + // image, don't create two views. As this happens within an image, no need to + // care about whether unsigned and signed images are separate - if they are + // (or if there are only unsigned or only signed images), this image will have + // either all views unsigned or all views signed. + view_key.is_signed_separate_view = + is_signed && (host_format_pair.format_signed.format != + host_format_pair.format_unsigned.format); + + const ui::vulkan::VulkanProvider& provider = + vulkan_texture_cache.command_processor_.GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features && + !device_portability_subset_features->imageViewFormatSwizzle) { + host_swizzle = xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA; + } + view_key.host_swizzle = host_swizzle; + + view_key.is_array = uint32_t(is_array); + + // Try to find an existing view. + auto it = views_.find(view_key); + if (it != views_.end()) { + return it->second; + } + + // Create a new view. + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkImageViewCreateInfo view_create_info; + view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_create_info.pNext = nullptr; + view_create_info.flags = 0; + view_create_info.image = image(); + view_create_info.format = format; + view_create_info.components.r = GetComponentSwizzle(host_swizzle, 0); + view_create_info.components.g = GetComponentSwizzle(host_swizzle, 1); + view_create_info.components.b = GetComponentSwizzle(host_swizzle, 2); + view_create_info.components.a = GetComponentSwizzle(host_swizzle, 3); + view_create_info.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange(); + switch (dimension) { + case xenos::DataDimension::k3D: + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + break; + case xenos::DataDimension::kCube: + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; + break; + default: + if (is_array) { + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + } else { + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_create_info.subresourceRange.layerCount = 1; + } + break; + } + VkImageView view; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, &view) != + VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to create an image view for Vulkan format " + "{} ({}signed) with swizzle 0x{:3X}", + uint32_t(format), is_signed ? "" : "un", host_swizzle); + return VK_NULL_HANDLE; + } + views_.emplace(view_key, view); + return view; +} + +VulkanTextureCache::VulkanTextureCache( + const RegisterFile& register_file, VulkanSharedMemory& shared_memory, + uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, + VulkanCommandProcessor& command_processor, + VkPipelineStageFlags guest_shader_pipeline_stages) + : TextureCache(register_file, shared_memory, draw_resolution_scale_x, + draw_resolution_scale_y), + command_processor_(command_processor), + guest_shader_pipeline_stages_(guest_shader_pipeline_stages) { + // TODO(Triang3l): Support draw resolution scaling. + assert_true(draw_resolution_scale_x == 1 && draw_resolution_scale_y == 1); +} + +bool VulkanTextureCache::Initialize() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::InstanceFunctions& ifn = provider.ifn(); + VkPhysicalDevice physical_device = provider.physical_device(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + + // Vulkan Memory Allocator. + + vma_allocator_ = ui::vulkan::CreateVmaAllocator(provider, true); + if (vma_allocator_ == VK_NULL_HANDLE) { + return false; + } + + // Image formats. + + // Initialize to the best formats. + for (size_t i = 0; i < xe::countof(host_formats_); ++i) { + host_formats_[i] = kBestHostFormats[i]; + } + + // Check format support and switch to fallbacks if needed. + constexpr VkFormatFeatureFlags kLinearFilterFeatures = + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + VkFormatProperties r16_unorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_R16_UNORM, + &r16_unorm_properties); + VkFormatProperties r16_snorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_R16_SNORM, + &r16_snorm_properties); + VkFormatProperties r16g16_unorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_R16G16_UNORM, &r16g16_unorm_properties); + VkFormatProperties r16g16_snorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_R16G16_SNORM, &r16g16_snorm_properties); + VkFormatProperties r16g16b16a16_unorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, + VK_FORMAT_R16G16B16A16_UNORM, + &r16g16b16a16_unorm_properties); + VkFormatProperties r16g16b16a16_snorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, + VK_FORMAT_R16G16B16A16_SNORM, + &r16g16b16a16_snorm_properties); + VkFormatProperties format_properties; + // TODO(Triang3l): k_2_10_10_10 signed -> filterable R16G16B16A16_SFLOAT + // (enough storage precision, possibly unwanted filtering precision change). + // k_Cr_Y1_Cb_Y0_REP, k_Y1_Cr_Y0_Cb_REP. + HostFormatPair& host_format_gbgr = + host_formats_[uint32_t(xenos::TextureFormat::k_Cr_Y1_Cb_Y0_REP)]; + assert_true(host_format_gbgr.format_unsigned.format == + VK_FORMAT_G8B8G8R8_422_UNORM_KHR); + assert_true(host_format_gbgr.format_signed.format == + VK_FORMAT_R8G8B8A8_SNORM); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_gbgr.format_unsigned.load_shader = kLoadShaderIndexGBGR8ToRGB8; + host_format_gbgr.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_gbgr.format_unsigned.block_compressed = false; + host_format_gbgr.unsigned_signed_compatible = true; + } + HostFormatPair& host_format_bgrg = + host_formats_[uint32_t(xenos::TextureFormat::k_Y1_Cr_Y0_Cb_REP)]; + assert_true(host_format_bgrg.format_unsigned.format == + VK_FORMAT_B8G8R8G8_422_UNORM_KHR); + assert_true(host_format_bgrg.format_signed.format == + VK_FORMAT_R8G8B8A8_SNORM); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_bgrg.format_unsigned.load_shader = kLoadShaderIndexBGRG8ToRGB8; + host_format_bgrg.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_bgrg.format_unsigned.block_compressed = false; + host_format_bgrg.unsigned_signed_compatible = true; + } + // TODO(Triang3l): k_10_11_11 -> filterable R16G16B16A16_SFLOAT (enough + // storage precision, possibly unwanted filtering precision change). + // TODO(Triang3l): k_11_11_10 -> filterable R16G16B16A16_SFLOAT (enough + // storage precision, possibly unwanted filtering precision change). + // S3TC. + // Not checking the textureCompressionBC feature because its availability + // means that all BC formats are supported, however, the device may expose + // some BC formats without this feature. Xenia doesn't BC6H and BC7 at all, + // and has fallbacks for each used format. + // TODO(Triang3l): Raise the host texture memory usage limit if S3TC has to be + // decompressed. + // TODO(Triang3l): S3TC -> 5551 or 4444 as an option. + // TODO(Triang3l): S3TC -> ETC2 / EAC (a huge research topic). + HostFormatPair& host_format_dxt1 = + host_formats_[uint32_t(xenos::TextureFormat::k_DXT1)]; + assert_true(host_format_dxt1.format_unsigned.format == + VK_FORMAT_BC1_RGBA_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_dxt1.format_unsigned.load_shader = kLoadShaderIndexDXT1ToRGBA8; + host_format_dxt1.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_dxt1.format_unsigned.block_compressed = false; + host_formats_[uint32_t(xenos::TextureFormat::k_DXT1_AS_16_16_16_16)] = + host_format_dxt1; + } + HostFormatPair& host_format_dxt2_3 = + host_formats_[uint32_t(xenos::TextureFormat::k_DXT2_3)]; + assert_true(host_format_dxt2_3.format_unsigned.format == + VK_FORMAT_BC2_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC2_UNORM_BLOCK, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_dxt2_3.format_unsigned.load_shader = + kLoadShaderIndexDXT3ToRGBA8; + host_format_dxt2_3.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_dxt2_3.format_unsigned.block_compressed = false; + host_formats_[uint32_t(xenos::TextureFormat::k_DXT2_3_AS_16_16_16_16)] = + host_format_dxt2_3; + } + HostFormatPair& host_format_dxt4_5 = + host_formats_[uint32_t(xenos::TextureFormat::k_DXT4_5)]; + assert_true(host_format_dxt4_5.format_unsigned.format == + VK_FORMAT_BC3_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC3_UNORM_BLOCK, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_dxt4_5.format_unsigned.load_shader = + kLoadShaderIndexDXT5ToRGBA8; + host_format_dxt4_5.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_dxt4_5.format_unsigned.block_compressed = false; + host_formats_[uint32_t(xenos::TextureFormat::k_DXT4_5_AS_16_16_16_16)] = + host_format_dxt4_5; + } + HostFormatPair& host_format_dxn = + host_formats_[uint32_t(xenos::TextureFormat::k_DXN)]; + assert_true(host_format_dxn.format_unsigned.format == + VK_FORMAT_BC5_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC5_UNORM_BLOCK, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_dxn.format_unsigned.load_shader = kLoadShaderIndexDXNToRG8; + host_format_dxn.format_unsigned.format = VK_FORMAT_R8G8_UNORM; + host_format_dxn.format_unsigned.block_compressed = false; + } + HostFormatPair& host_format_dxt5a = + host_formats_[uint32_t(xenos::TextureFormat::k_DXT5A)]; + assert_true(host_format_dxt5a.format_unsigned.format == + VK_FORMAT_BC4_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC4_UNORM_BLOCK, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_dxt5a.format_unsigned.load_shader = kLoadShaderIndexDXT5AToR8; + host_format_dxt5a.format_unsigned.format = VK_FORMAT_R8_UNORM; + host_format_dxt5a.format_unsigned.block_compressed = false; + } + // k_16, k_16_16, k_16_16_16_16 - UNORM / SNORM are optional, fall back to + // SFLOAT, which is mandatory and is always filterable (the guest 16-bit + // format is filterable, 16-bit fixed-point is the full texture filtering + // precision on the Xenos overall). Let the user choose what's more important, + // precision (use host UNORM / SNORM if available even if they're not + // filterable) or filterability (use host UNORM / SNORM only if they're + // available and filterable). + // TODO(Triang3l): Expose a cvar for selecting the preference (filterability + // or precision). + VkFormatFeatureFlags norm16_required_features = + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + HostFormatPair& host_format_16 = + host_formats_[uint32_t(xenos::TextureFormat::k_16)]; + assert_true(host_format_16.format_unsigned.format == VK_FORMAT_R16_UNORM); + if ((r16_unorm_properties.optimalTilingFeatures & norm16_required_features) != + norm16_required_features) { + host_format_16.format_unsigned.load_shader = + kLoadShaderIndexR16UNormToFloat; + host_format_16.format_unsigned.format = VK_FORMAT_R16_SFLOAT; + } + assert_true(host_format_16.format_signed.format == VK_FORMAT_R16_SNORM); + if ((r16_snorm_properties.optimalTilingFeatures & norm16_required_features) != + norm16_required_features) { + host_format_16.format_signed.load_shader = kLoadShaderIndexR16SNormToFloat; + host_format_16.format_signed.format = VK_FORMAT_R16_SFLOAT; + } + host_format_16.unsigned_signed_compatible = + (host_format_16.format_unsigned.format == VK_FORMAT_R16_UNORM && + host_format_16.format_signed.format == VK_FORMAT_R16_SNORM) || + (host_format_16.format_unsigned.format == VK_FORMAT_R16_SFLOAT && + host_format_16.format_signed.format == VK_FORMAT_R16_SFLOAT); + HostFormatPair& host_format_16_16 = + host_formats_[uint32_t(xenos::TextureFormat::k_16_16)]; + assert_true(host_format_16_16.format_unsigned.format == + VK_FORMAT_R16G16_UNORM); + if ((r16g16_unorm_properties.optimalTilingFeatures & + norm16_required_features) != norm16_required_features) { + host_format_16_16.format_unsigned.load_shader = + kLoadShaderIndexRG16UNormToFloat; + host_format_16_16.format_unsigned.format = VK_FORMAT_R16G16_SFLOAT; + } + assert_true(host_format_16_16.format_signed.format == VK_FORMAT_R16G16_SNORM); + if ((r16g16_snorm_properties.optimalTilingFeatures & + norm16_required_features) != norm16_required_features) { + host_format_16_16.format_signed.load_shader = + kLoadShaderIndexRG16SNormToFloat; + host_format_16_16.format_signed.format = VK_FORMAT_R16G16_SFLOAT; + } + host_format_16_16.unsigned_signed_compatible = + (host_format_16_16.format_unsigned.format == VK_FORMAT_R16G16_UNORM && + host_format_16_16.format_signed.format == VK_FORMAT_R16G16_SNORM) || + (host_format_16_16.format_unsigned.format == VK_FORMAT_R16G16_SFLOAT && + host_format_16_16.format_signed.format == VK_FORMAT_R16G16_SFLOAT); + HostFormatPair& host_format_16_16_16_16 = + host_formats_[uint32_t(xenos::TextureFormat::k_16_16_16_16)]; + assert_true(host_format_16_16_16_16.format_unsigned.format == + VK_FORMAT_R16G16B16A16_UNORM); + if ((r16g16b16a16_unorm_properties.optimalTilingFeatures & + norm16_required_features) != norm16_required_features) { + host_format_16_16_16_16.format_unsigned.load_shader = + kLoadShaderIndexRGBA16UNormToFloat; + host_format_16_16_16_16.format_unsigned.format = + VK_FORMAT_R16G16B16A16_SFLOAT; + } + assert_true(host_format_16_16_16_16.format_signed.format == + VK_FORMAT_R16G16B16A16_SNORM); + if ((r16g16b16a16_snorm_properties.optimalTilingFeatures & + norm16_required_features) != norm16_required_features) { + host_format_16_16_16_16.format_signed.load_shader = + kLoadShaderIndexRGBA16SNormToFloat; + host_format_16_16_16_16.format_signed.format = + VK_FORMAT_R16G16B16A16_SFLOAT; + } + host_format_16_16_16_16.unsigned_signed_compatible = + (host_format_16_16_16_16.format_unsigned.format == + VK_FORMAT_R16G16B16A16_UNORM && + host_format_16_16_16_16.format_signed.format == + VK_FORMAT_R16G16B16A16_SNORM) || + (host_format_16_16_16_16.format_unsigned.format == + VK_FORMAT_R16G16B16A16_SFLOAT && + host_format_16_16_16_16.format_signed.format == + VK_FORMAT_R16G16B16A16_SFLOAT); + + // Normalize format information structures. + for (size_t i = 0; i < xe::countof(host_formats_); ++i) { + HostFormatPair& host_format = host_formats_[i]; + // load_shader_index is left uninitialized for the tail (non-existent + // formats), kLoadShaderIndexUnknown may be non-zero, and format support may + // be disabled by setting the format to VK_FORMAT_UNDEFINED. + if (host_format.format_unsigned.format == VK_FORMAT_UNDEFINED) { + host_format.format_unsigned.load_shader = kLoadShaderIndexUnknown; + } + assert_false(host_format.format_unsigned.load_shader == + kLoadShaderIndexUnknown && + host_format.format_unsigned.format != VK_FORMAT_UNDEFINED); + if (host_format.format_unsigned.load_shader == kLoadShaderIndexUnknown) { + host_format.format_unsigned.format = VK_FORMAT_UNDEFINED; + // Surely known it's unsupported with these two conditions. + host_format.format_unsigned.linear_filterable = false; + } + if (host_format.format_signed.format == VK_FORMAT_UNDEFINED) { + host_format.format_signed.load_shader = kLoadShaderIndexUnknown; + } + assert_false(host_format.format_signed.load_shader == + kLoadShaderIndexUnknown && + host_format.format_signed.format != VK_FORMAT_UNDEFINED); + if (host_format.format_signed.load_shader == kLoadShaderIndexUnknown) { + host_format.format_signed.format = VK_FORMAT_UNDEFINED; + // Surely known it's unsupported with these two conditions. + host_format.format_signed.linear_filterable = false; + } + + // Check if the formats are supported and are linear-filterable. + if (host_format.format_unsigned.format != VK_FORMAT_UNDEFINED) { + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, host_format.format_unsigned.format, + &format_properties); + if (format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) { + host_format.format_unsigned.linear_filterable = + (format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) != 0; + } else { + host_format.format_unsigned.format = VK_FORMAT_UNDEFINED; + host_format.format_unsigned.load_shader = kLoadShaderIndexUnknown; + host_format.format_unsigned.linear_filterable = false; + } + } + if (host_format.format_signed.format != VK_FORMAT_UNDEFINED) { + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, + host_format.format_signed.format, + &format_properties); + if (format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) { + host_format.format_signed.linear_filterable = + (format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) != 0; + } else { + host_format.format_signed.format = VK_FORMAT_UNDEFINED; + host_format.format_signed.load_shader = kLoadShaderIndexUnknown; + host_format.format_signed.linear_filterable = false; + } + } + + // Log which formats are not supported or supported via fallbacks. + const HostFormatPair& best_host_format = kBestHostFormats[i]; + const char* guest_format_name = + FormatInfo::Get(xenos::TextureFormat(i))->name; + if (best_host_format.format_unsigned.format != VK_FORMAT_UNDEFINED) { + assert_not_null(guest_format_name); + if (host_format.format_unsigned.format != VK_FORMAT_UNDEFINED) { + if (host_format.format_unsigned.format != + best_host_format.format_unsigned.format) { + XELOGGPU( + "VulkanTextureCache: Format {} (unsigned) is supported via a " + "fallback format (using the Vulkan format {} instead of the " + "preferred {})", + guest_format_name, uint32_t(host_format.format_unsigned.format), + uint32_t(best_host_format.format_unsigned.format)); + } + } else { + XELOGGPU( + "VulkanTextureCache: Format {} (unsigned) is not supported by the " + "device (preferred Vulkan format is {})", + guest_format_name, + uint32_t(best_host_format.format_unsigned.format)); + } + } + if (best_host_format.format_signed.format != VK_FORMAT_UNDEFINED) { + assert_not_null(guest_format_name); + if (host_format.format_signed.format != VK_FORMAT_UNDEFINED) { + if (host_format.format_signed.format != + best_host_format.format_signed.format) { + XELOGGPU( + "VulkanTextureCache: Format {} (signed) is supported via a " + "fallback format (using the Vulkan format {} instead of the " + "preferred {})", + guest_format_name, uint32_t(host_format.format_signed.format), + uint32_t(best_host_format.format_signed.format)); + } + } else { + XELOGGPU( + "VulkanTextureCache: Format {} (signed) is not supported by the " + "device (preferred Vulkan format is {})", + guest_format_name, uint32_t(best_host_format.format_signed.format)); + } + } + + // Signednesses with different load shaders must have the data loaded + // differently, therefore can't share the image even if the format is the + // same. Also, if there's only one version, simplify the logic - there can't + // be compatibility between two formats when one of them is undefined. + if (host_format.format_unsigned.format != VK_FORMAT_UNDEFINED && + host_format.format_signed.format != VK_FORMAT_UNDEFINED) { + if (host_format.format_unsigned.load_shader == + host_format.format_signed.load_shader) { + if (host_format.format_unsigned.format == + host_format.format_signed.format) { + // Same format after all the fallbacks - force compatibilty. + host_format.unsigned_signed_compatible = true; + } + } else { + host_format.unsigned_signed_compatible = false; + } + // Formats within the same compatibility class must have the same block + // size, though the fallbacks are configured incorrectly if that's not the + // case (since such formats just can't be in one compatibility class). + assert_false(host_format.unsigned_signed_compatible && + host_format.format_unsigned.block_compressed != + host_format.format_signed.block_compressed); + if (host_format.unsigned_signed_compatible && + host_format.format_unsigned.block_compressed != + host_format.format_signed.block_compressed) { + host_format.unsigned_signed_compatible = false; + } + } else { + host_format.unsigned_signed_compatible = false; + } + } + + // Load pipeline layout. + + VkDescriptorSetLayout load_descriptor_set_layouts[kLoadDescriptorSetCount] = + {}; + VkDescriptorSetLayout load_descriptor_set_layout_storage_buffer = + command_processor_.GetSingleTransientDescriptorLayout( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + assert_true(load_descriptor_set_layout_storage_buffer != VK_NULL_HANDLE); + load_descriptor_set_layouts[kLoadDescriptorSetIndexDestination] = + load_descriptor_set_layout_storage_buffer; + load_descriptor_set_layouts[kLoadDescriptorSetIndexSource] = + load_descriptor_set_layout_storage_buffer; + load_descriptor_set_layouts[kLoadDescriptorSetIndexConstants] = + command_processor_.GetSingleTransientDescriptorLayout( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kUniformBufferCompute); + assert_true(load_descriptor_set_layouts[kLoadDescriptorSetIndexConstants] != + VK_NULL_HANDLE); + VkPipelineLayoutCreateInfo load_pipeline_layout_create_info; + load_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + load_pipeline_layout_create_info.pNext = nullptr; + load_pipeline_layout_create_info.flags = 0; + load_pipeline_layout_create_info.setLayoutCount = kLoadDescriptorSetCount; + load_pipeline_layout_create_info.pSetLayouts = load_descriptor_set_layouts; + load_pipeline_layout_create_info.pushConstantRangeCount = 0; + load_pipeline_layout_create_info.pPushConstantRanges = nullptr; + if (dfn.vkCreatePipelineLayout(device, &load_pipeline_layout_create_info, + nullptr, &load_pipeline_layout_)) { + XELOGE("VulkanTexture: Failed to create the texture load pipeline layout"); + return false; + } + + // Load pipelines, only the ones needed for the formats that will be used. + + bool load_shaders_needed[kLoadShaderCount] = {}; + for (size_t i = 0; i < xe::countof(host_formats_); ++i) { + const HostFormatPair& host_format = host_formats_[i]; + if (host_format.format_unsigned.load_shader != kLoadShaderIndexUnknown) { + load_shaders_needed[host_format.format_unsigned.load_shader] = true; + } + if (host_format.format_signed.load_shader != kLoadShaderIndexUnknown) { + load_shaders_needed[host_format.format_signed.load_shader] = true; + } + } + if (kHostFormatGBGRUnaligned.format_unsigned.load_shader != + kLoadShaderIndexUnknown) { + load_shaders_needed[kHostFormatGBGRUnaligned.format_unsigned.load_shader] = + true; + } + if (kHostFormatGBGRUnaligned.format_signed.load_shader != + kLoadShaderIndexUnknown) { + load_shaders_needed[kHostFormatGBGRUnaligned.format_signed.load_shader] = + true; + } + if (kHostFormatBGRGUnaligned.format_unsigned.load_shader != + kLoadShaderIndexUnknown) { + load_shaders_needed[kHostFormatBGRGUnaligned.format_unsigned.load_shader] = + true; + } + if (kHostFormatBGRGUnaligned.format_signed.load_shader != + kLoadShaderIndexUnknown) { + load_shaders_needed[kHostFormatBGRGUnaligned.format_signed.load_shader] = + true; + } + + std::pair load_shader_code[kLoadShaderCount] = {}; + load_shader_code[kLoadShaderIndex8bpb] = std::make_pair( + shaders::texture_load_8bpb_cs, sizeof(shaders::texture_load_8bpb_cs)); + load_shader_code[kLoadShaderIndex16bpb] = std::make_pair( + shaders::texture_load_16bpb_cs, sizeof(shaders::texture_load_16bpb_cs)); + load_shader_code[kLoadShaderIndex32bpb] = std::make_pair( + shaders::texture_load_32bpb_cs, sizeof(shaders::texture_load_32bpb_cs)); + load_shader_code[kLoadShaderIndex64bpb] = std::make_pair( + shaders::texture_load_64bpb_cs, sizeof(shaders::texture_load_64bpb_cs)); + load_shader_code[kLoadShaderIndex128bpb] = std::make_pair( + shaders::texture_load_128bpb_cs, sizeof(shaders::texture_load_128bpb_cs)); + load_shader_code[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] = + std::make_pair(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs, + sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs)); + load_shader_code[kLoadShaderIndexR5G6B5ToB5G6R5] = + std::make_pair(shaders::texture_load_r5g6b5_b5g6r5_cs, + sizeof(shaders::texture_load_r5g6b5_b5g6r5_cs)); + load_shader_code[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] = + std::make_pair( + shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs, + sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs)); + load_shader_code[kLoadShaderIndexRGBA4ToARGB4] = + std::make_pair(shaders::texture_load_r4g4b4a4_a4r4g4b4_cs, + sizeof(shaders::texture_load_r4g4b4a4_a4r4g4b4_cs)); + load_shader_code[kLoadShaderIndexGBGR8ToRGB8] = + std::make_pair(shaders::texture_load_gbgr8_rgb8_cs, + sizeof(shaders::texture_load_gbgr8_rgb8_cs)); + load_shader_code[kLoadShaderIndexBGRG8ToRGB8] = + std::make_pair(shaders::texture_load_bgrg8_rgb8_cs, + sizeof(shaders::texture_load_bgrg8_rgb8_cs)); + load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16] = + std::make_pair(shaders::texture_load_r10g11b11_rgba16_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_cs)); + load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16SNorm] = + std::make_pair(shaders::texture_load_r10g11b11_rgba16_snorm_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_cs)); + load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16] = + std::make_pair(shaders::texture_load_r11g11b10_rgba16_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_cs)); + load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16SNorm] = + std::make_pair(shaders::texture_load_r11g11b10_rgba16_snorm_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_cs)); + load_shader_code[kLoadShaderIndexR16UNormToFloat] = + std::make_pair(shaders::texture_load_r16_unorm_float_cs, + sizeof(shaders::texture_load_r16_unorm_float_cs)); + load_shader_code[kLoadShaderIndexR16SNormToFloat] = + std::make_pair(shaders::texture_load_r16_snorm_float_cs, + sizeof(shaders::texture_load_r16_snorm_float_cs)); + load_shader_code[kLoadShaderIndexRG16UNormToFloat] = + std::make_pair(shaders::texture_load_rg16_unorm_float_cs, + sizeof(shaders::texture_load_rg16_unorm_float_cs)); + load_shader_code[kLoadShaderIndexRG16SNormToFloat] = + std::make_pair(shaders::texture_load_rg16_snorm_float_cs, + sizeof(shaders::texture_load_rg16_snorm_float_cs)); + load_shader_code[kLoadShaderIndexRGBA16UNormToFloat] = + std::make_pair(shaders::texture_load_rgba16_unorm_float_cs, + sizeof(shaders::texture_load_rgba16_unorm_float_cs)); + load_shader_code[kLoadShaderIndexRGBA16SNormToFloat] = + std::make_pair(shaders::texture_load_rgba16_snorm_float_cs, + sizeof(shaders::texture_load_rgba16_snorm_float_cs)); + load_shader_code[kLoadShaderIndexDXT1ToRGBA8] = + std::make_pair(shaders::texture_load_dxt1_rgba8_cs, + sizeof(shaders::texture_load_dxt1_rgba8_cs)); + load_shader_code[kLoadShaderIndexDXT3ToRGBA8] = + std::make_pair(shaders::texture_load_dxt3_rgba8_cs, + sizeof(shaders::texture_load_dxt3_rgba8_cs)); + load_shader_code[kLoadShaderIndexDXT5ToRGBA8] = + std::make_pair(shaders::texture_load_dxt5_rgba8_cs, + sizeof(shaders::texture_load_dxt5_rgba8_cs)); + load_shader_code[kLoadShaderIndexDXNToRG8] = + std::make_pair(shaders::texture_load_dxn_rg8_cs, + sizeof(shaders::texture_load_dxn_rg8_cs)); + load_shader_code[kLoadShaderIndexDXT3A] = std::make_pair( + shaders::texture_load_dxt3a_cs, sizeof(shaders::texture_load_dxt3a_cs)); + load_shader_code[kLoadShaderIndexDXT3AAs1111ToARGB4] = + std::make_pair(shaders::texture_load_dxt3aas1111_argb4_cs, + sizeof(shaders::texture_load_dxt3aas1111_argb4_cs)); + load_shader_code[kLoadShaderIndexDXT5AToR8] = + std::make_pair(shaders::texture_load_dxt5a_r8_cs, + sizeof(shaders::texture_load_dxt5a_r8_cs)); + load_shader_code[kLoadShaderIndexCTX1] = std::make_pair( + shaders::texture_load_ctx1_cs, sizeof(shaders::texture_load_ctx1_cs)); + load_shader_code[kLoadShaderIndexDepthUnorm] = + std::make_pair(shaders::texture_load_depth_unorm_cs, + sizeof(shaders::texture_load_depth_unorm_cs)); + load_shader_code[kLoadShaderIndexDepthFloat] = + std::make_pair(shaders::texture_load_depth_float_cs, + sizeof(shaders::texture_load_depth_float_cs)); + std::pair load_shader_code_scaled[kLoadShaderCount] = + {}; + if (IsDrawResolutionScaled()) { + load_shader_code_scaled[kLoadShaderIndex8bpb] = + std::make_pair(shaders::texture_load_8bpb_scaled_cs, + sizeof(shaders::texture_load_8bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndex16bpb] = + std::make_pair(shaders::texture_load_16bpb_scaled_cs, + sizeof(shaders::texture_load_16bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndex32bpb] = + std::make_pair(shaders::texture_load_32bpb_scaled_cs, + sizeof(shaders::texture_load_32bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndex64bpb] = + std::make_pair(shaders::texture_load_64bpb_scaled_cs, + sizeof(shaders::texture_load_64bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndex128bpb] = + std::make_pair(shaders::texture_load_128bpb_scaled_cs, + sizeof(shaders::texture_load_128bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] = + std::make_pair( + shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs, + sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR5G6B5ToB5G6R5] = + std::make_pair(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs, + sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] = + std::make_pair( + shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs, + sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRGBA4ToARGB4] = std::make_pair( + shaders::texture_load_r4g4b4a4_a4r4g4b4_scaled_cs, + sizeof(shaders::texture_load_r4g4b4a4_a4r4g4b4_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16] = std::make_pair( + shaders::texture_load_r10g11b11_rgba16_scaled_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16SNorm] = + std::make_pair( + shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16] = std::make_pair( + shaders::texture_load_r11g11b10_rgba16_scaled_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16SNorm] = + std::make_pair( + shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR16UNormToFloat] = + std::make_pair(shaders::texture_load_r16_unorm_float_scaled_cs, + sizeof(shaders::texture_load_r16_unorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR16SNormToFloat] = + std::make_pair(shaders::texture_load_r16_snorm_float_scaled_cs, + sizeof(shaders::texture_load_r16_snorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRG16UNormToFloat] = std::make_pair( + shaders::texture_load_rg16_unorm_float_scaled_cs, + sizeof(shaders::texture_load_rg16_unorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRG16SNormToFloat] = std::make_pair( + shaders::texture_load_rg16_snorm_float_scaled_cs, + sizeof(shaders::texture_load_rg16_snorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRGBA16UNormToFloat] = + std::make_pair( + shaders::texture_load_rgba16_unorm_float_scaled_cs, + sizeof(shaders::texture_load_rgba16_unorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRGBA16SNormToFloat] = + std::make_pair( + shaders::texture_load_rgba16_snorm_float_scaled_cs, + sizeof(shaders::texture_load_rgba16_snorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexDepthUnorm] = + std::make_pair(shaders::texture_load_depth_unorm_scaled_cs, + sizeof(shaders::texture_load_depth_unorm_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexDepthFloat] = + std::make_pair(shaders::texture_load_depth_float_scaled_cs, + sizeof(shaders::texture_load_depth_float_scaled_cs)); + } + + for (size_t i = 0; i < kLoadShaderCount; ++i) { + if (!load_shaders_needed[i]) { + continue; + } + const std::pair& current_load_shader_code = + load_shader_code[i]; + assert_not_null(current_load_shader_code.first); + load_pipelines_[i] = ui::vulkan::util::CreateComputePipeline( + provider, load_pipeline_layout_, current_load_shader_code.first, + current_load_shader_code.second); + if (load_pipelines_[i] == VK_NULL_HANDLE) { + XELOGE( + "VulkanTextureCache: Failed to create the texture loading pipeline " + "for shader {}", + i); + return false; + } + if (IsDrawResolutionScaled()) { + const std::pair& + current_load_shader_code_scaled = load_shader_code_scaled[i]; + if (current_load_shader_code_scaled.first) { + load_pipelines_scaled_[i] = ui::vulkan::util::CreateComputePipeline( + provider, load_pipeline_layout_, + current_load_shader_code_scaled.first, + current_load_shader_code_scaled.second); + if (load_pipelines_scaled_[i] == VK_NULL_HANDLE) { + XELOGE( + "VulkanTextureCache: Failed to create the resolution-scaled " + "texture loading pipeline for shader {}", + i); + return false; + } + } + } + } + + // Null images as a replacement for unneeded bindings and for bindings for + // which the real image hasn't been created. + // TODO(Triang3l): Use VK_EXT_robustness2 null descriptors. + + VkImageCreateInfo null_image_create_info; + null_image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + null_image_create_info.pNext = nullptr; + null_image_create_info.flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + null_image_create_info.imageType = VK_IMAGE_TYPE_2D; + // Four components to return (0, 0, 0, 0). + // TODO(Triang3l): Find the return value for invalid texture fetch constants + // on the real hardware. + null_image_create_info.format = VK_FORMAT_R8G8B8A8_UNORM; + null_image_create_info.extent.width = 1; + null_image_create_info.extent.height = 1; + null_image_create_info.extent.depth = 1; + null_image_create_info.mipLevels = 1; + null_image_create_info.arrayLayers = 6; + null_image_create_info.samples = VK_SAMPLE_COUNT_1_BIT; + null_image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; + null_image_create_info.usage = + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + null_image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + null_image_create_info.queueFamilyIndexCount = 0; + null_image_create_info.pQueueFamilyIndices = nullptr; + null_image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + if (dfn.vkCreateImage(device, &null_image_create_info, nullptr, + &null_image_2d_array_cube_) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to create the null 2D array and cube " + "image"); + return false; + } + + null_image_create_info.flags &= ~VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + null_image_create_info.imageType = VK_IMAGE_TYPE_3D; + null_image_create_info.arrayLayers = 1; + if (dfn.vkCreateImage(device, &null_image_create_info, nullptr, + &null_image_3d_) != VK_SUCCESS) { + XELOGE("VulkanTextureCache: Failed to create the null 3D image"); + return false; + } + + VkMemoryRequirements null_image_memory_requirements_2d_array_cube_; + dfn.vkGetImageMemoryRequirements( + device, null_image_2d_array_cube_, + &null_image_memory_requirements_2d_array_cube_); + VkMemoryRequirements null_image_memory_requirements_3d_; + dfn.vkGetImageMemoryRequirements(device, null_image_3d_, + &null_image_memory_requirements_3d_); + uint32_t null_image_memory_type_common = ui::vulkan::util::ChooseMemoryType( + provider, + null_image_memory_requirements_2d_array_cube_.memoryTypeBits & + null_image_memory_requirements_3d_.memoryTypeBits, + ui::vulkan::util::MemoryPurpose::kDeviceLocal); + if (null_image_memory_type_common != UINT32_MAX) { + // Place both null images in one memory allocation because maximum total + // memory allocation count is limited. + VkDeviceSize null_image_memory_offset_3d_ = + xe::align(null_image_memory_requirements_2d_array_cube_.size, + std::max(null_image_memory_requirements_3d_.alignment, + VkDeviceSize(1))); + VkMemoryAllocateInfo null_image_memory_allocate_info; + null_image_memory_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + null_image_memory_allocate_info.pNext = nullptr; + null_image_memory_allocate_info.allocationSize = + null_image_memory_offset_3d_ + null_image_memory_requirements_3d_.size; + null_image_memory_allocate_info.memoryTypeIndex = + null_image_memory_type_common; + if (dfn.vkAllocateMemory(device, &null_image_memory_allocate_info, nullptr, + &null_images_memory_[0]) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to allocate the memory for the null " + "images"); + return false; + } + if (dfn.vkBindImageMemory(device, null_image_2d_array_cube_, + null_images_memory_[0], 0) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to bind the memory to the null 2D array " + "and cube image"); + return false; + } + if (dfn.vkBindImageMemory(device, null_image_3d_, null_images_memory_[0], + null_image_memory_offset_3d_) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to bind the memory to the null 3D image"); + return false; + } + } else { + // Place each null image in separate allocations. + uint32_t null_image_memory_type_2d_array_cube_ = + ui::vulkan::util::ChooseMemoryType( + provider, + null_image_memory_requirements_2d_array_cube_.memoryTypeBits, + ui::vulkan::util::MemoryPurpose::kDeviceLocal); + uint32_t null_image_memory_type_3d_ = ui::vulkan::util::ChooseMemoryType( + provider, null_image_memory_requirements_3d_.memoryTypeBits, + ui::vulkan::util::MemoryPurpose::kDeviceLocal); + if (null_image_memory_type_2d_array_cube_ == UINT32_MAX || + null_image_memory_type_3d_ == UINT32_MAX) { + XELOGE( + "VulkanTextureCache: Failed to get the memory types for the null " + "images"); + return false; + } + + VkMemoryAllocateInfo null_image_memory_allocate_info; + VkMemoryAllocateInfo* null_image_memory_allocate_info_last = + &null_image_memory_allocate_info; + null_image_memory_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + null_image_memory_allocate_info.pNext = nullptr; + null_image_memory_allocate_info.allocationSize = + null_image_memory_requirements_2d_array_cube_.size; + null_image_memory_allocate_info.memoryTypeIndex = + null_image_memory_type_2d_array_cube_; + VkMemoryDedicatedAllocateInfoKHR null_image_memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + null_image_memory_allocate_info_last->pNext = + &null_image_memory_dedicated_allocate_info; + null_image_memory_allocate_info_last = + reinterpret_cast( + &null_image_memory_dedicated_allocate_info); + null_image_memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + null_image_memory_dedicated_allocate_info.pNext = nullptr; + null_image_memory_dedicated_allocate_info.image = + null_image_2d_array_cube_; + null_image_memory_dedicated_allocate_info.buffer = VK_NULL_HANDLE; + } + if (dfn.vkAllocateMemory(device, &null_image_memory_allocate_info, nullptr, + &null_images_memory_[0]) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to allocate the memory for the null 2D " + "array and cube image"); + return false; + } + if (dfn.vkBindImageMemory(device, null_image_2d_array_cube_, + null_images_memory_[0], 0) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to bind the memory to the null 2D array " + "and cube image"); + return false; + } + + null_image_memory_allocate_info.allocationSize = + null_image_memory_requirements_3d_.size; + null_image_memory_allocate_info.memoryTypeIndex = + null_image_memory_type_3d_; + null_image_memory_dedicated_allocate_info.image = null_image_3d_; + if (dfn.vkAllocateMemory(device, &null_image_memory_allocate_info, nullptr, + &null_images_memory_[1]) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to allocate the memory for the null 3D " + "image"); + return false; + } + if (dfn.vkBindImageMemory(device, null_image_3d_, null_images_memory_[1], + 0) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to bind the memory to the null 3D image"); + return false; + } + } + + VkImageViewCreateInfo null_image_view_create_info; + null_image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + null_image_view_create_info.pNext = nullptr; + null_image_view_create_info.flags = 0; + null_image_view_create_info.image = null_image_2d_array_cube_; + null_image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + null_image_view_create_info.format = null_image_create_info.format; + // TODO(Triang3l): Find the return value for invalid texture fetch constants + // on the real hardware. + // Micro-optimization if this has any effect on the host GPU at all, use only + // constant components instead of the real texels. The image will be cleared + // to (0, 0, 0, 0) anyway. + VkComponentSwizzle null_image_view_swizzle = + (!device_portability_subset_features || + device_portability_subset_features->imageViewFormatSwizzle) + ? VK_COMPONENT_SWIZZLE_ZERO + : VK_COMPONENT_SWIZZLE_IDENTITY; + null_image_view_create_info.components.r = null_image_view_swizzle; + null_image_view_create_info.components.g = null_image_view_swizzle; + null_image_view_create_info.components.b = null_image_view_swizzle; + null_image_view_create_info.components.a = null_image_view_swizzle; + null_image_view_create_info.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange( + VK_IMAGE_ASPECT_COLOR_BIT, 0, VK_REMAINING_MIP_LEVELS, 0, 1); + if (dfn.vkCreateImageView(device, &null_image_view_create_info, nullptr, + &null_image_view_2d_array_) != VK_SUCCESS) { + XELOGE("VulkanTextureCache: Failed to create the null 2D array image view"); + return false; + } + null_image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; + null_image_view_create_info.subresourceRange.layerCount = 6; + if (dfn.vkCreateImageView(device, &null_image_view_create_info, nullptr, + &null_image_view_cube_) != VK_SUCCESS) { + XELOGE("VulkanTextureCache: Failed to create the null cube image view"); + return false; + } + null_image_view_create_info.image = null_image_3d_; + null_image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + null_image_view_create_info.subresourceRange.layerCount = 1; + if (dfn.vkCreateImageView(device, &null_image_view_create_info, nullptr, + &null_image_view_3d_) != VK_SUCCESS) { + XELOGE("VulkanTextureCache: Failed to create the null 3D image view"); + return false; + } + + null_images_cleared_ = false; + + // Samplers. + + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + + // Some MoltenVK devices have a maximum of 2048, 1024, or even 96 samplers, + // below Vulkan's minimum requirement of 4000. + // Assuming that the current VulkanTextureCache is the only one on this + // VkDevice (true in a regular emulation scenario), so taking over all the + // allocation slots exclusively. + // Also leaving a few slots for use by things like overlay applications. + sampler_max_count_ = + device_limits.maxSamplerAllocationCount - + uint32_t(ui::vulkan::VulkanProvider::HostSampler::kCount) - 16; + + if (device_features.samplerAnisotropy) { + max_anisotropy_ = xenos::AnisoFilter( + uint32_t(xenos::AnisoFilter::kMax_1_1) + + (31 - + xe::lzcnt(uint32_t(std::min( + 16.0f, std::max(1.0f, device_limits.maxSamplerAnisotropy)))))); + } else { + max_anisotropy_ = xenos::AnisoFilter::kDisabled; + } + + return true; +} + +const VulkanTextureCache::HostFormatPair& VulkanTextureCache::GetHostFormatPair( + TextureKey key) const { + if (key.format == xenos::TextureFormat::k_Cr_Y1_Cb_Y0_REP && + (key.GetWidth() & 1)) { + return kHostFormatGBGRUnaligned; + } + if (key.format == xenos::TextureFormat::k_Y1_Cr_Y0_Cb_REP && + (key.GetWidth() & 1)) { + return kHostFormatBGRGUnaligned; + } + return host_formats_[uint32_t(key.format)]; +} + +void VulkanTextureCache::GetTextureUsageMasks(VulkanTexture::Usage usage, + VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask, + VkImageLayout& layout) { + stage_mask = 0; + access_mask = 0; + layout = VK_IMAGE_LAYOUT_UNDEFINED; + switch (usage) { + case VulkanTexture::Usage::kUndefined: + break; + case VulkanTexture::Usage::kTransferDestination: + stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + break; + case VulkanTexture::Usage::kGuestShaderSampled: + stage_mask = guest_shader_pipeline_stages_; + access_mask = VK_ACCESS_SHADER_READ_BIT; + layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + case VulkanTexture::Usage::kSwapSampled: + // The swap texture is likely to be used only for the presentation + // fragment shader, and not during emulation, where it'd be used in other + // stages. + stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_mask = VK_ACCESS_SHADER_READ_BIT; + layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + } +} + +xenos::ClampMode VulkanTextureCache::NormalizeClampMode( + xenos::ClampMode clamp_mode) const { + if (clamp_mode == xenos::ClampMode::kClampToHalfway) { + // No GL_CLAMP (clamp to half edge, half border) equivalent in Vulkan, but + // there's no Direct3D 9 equivalent anyway, and too weird to be suitable for + // intentional real usage. + return xenos::ClampMode::kClampToEdge; + } + if (clamp_mode == xenos::ClampMode::kMirrorClampToEdge || + clamp_mode == xenos::ClampMode::kMirrorClampToHalfway || + clamp_mode == xenos::ClampMode::kMirrorClampToBorder) { + // TODO(Triang3l): VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR if + // VK_KHR_sampler_mirror_clamp_to_edge (or Vulkan 1.2) and the + // samplerMirrorClampToEdge feature are supported. + return xenos::ClampMode::kMirroredRepeat; + } + return clamp_mode; } } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index a7ddaf861..448e74d03 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,22 +10,15 @@ #ifndef XENIA_GPU_VULKAN_VULKAN_TEXTURE_CACHE_H_ #define XENIA_GPU_VULKAN_VULKAN_TEXTURE_CACHE_H_ -#include -#include +#include +#include #include -#include +#include -#include "xenia/base/mutex.h" -#include "xenia/gpu/register_file.h" -#include "xenia/gpu/sampler_info.h" -#include "xenia/gpu/shader.h" -#include "xenia/gpu/texture_conversion.h" -#include "xenia/gpu/texture_info.h" -#include "xenia/gpu/trace_writer.h" -#include "xenia/gpu/vulkan/vulkan_command_processor.h" -#include "xenia/gpu/xenos.h" -#include "xenia/ui/vulkan/circular_buffer.h" -#include "xenia/ui/vulkan/fenced_pools.h" +#include "xenia/base/hash.h" +#include "xenia/gpu/texture_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/ui/vulkan/vulkan_mem_alloc.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -33,205 +26,334 @@ namespace xe { namespace gpu { namespace vulkan { -// -class VulkanTextureCache { +class VulkanCommandProcessor; + +class VulkanTextureCache final : public TextureCache { public: - struct TextureView; - - // This represents an uploaded Vulkan texture. - struct Texture { - TextureInfo texture_info; - std::vector> views; - - VkFormat format; - VkImage image; - VkImageLayout image_layout; - VmaAllocation alloc; - VmaAllocationInfo alloc_info; - VkFramebuffer framebuffer; // Blit target frame buffer. - VkImageUsageFlags usage_flags; - - bool is_watched; - bool pending_invalidation; - - // Pointer to the latest usage fence. - VkFence in_flight_fence; - }; - - struct TextureView { - Texture* texture; - VkImageView view; - - union { - uint16_t swizzle; - struct { - // FIXME: This only applies on little-endian platforms! - uint16_t swiz_x : 3; - uint16_t swiz_y : 3; - uint16_t swiz_z : 3; - uint16_t swiz_w : 3; - uint16_t : 4; - }; + // Sampler parameters that can be directly converted to a host sampler or used + // for checking whether samplers bindings are up to date. + union SamplerParameters { + uint32_t value; + struct { + xenos::ClampMode clamp_x : 3; // 3 + xenos::ClampMode clamp_y : 3; // 6 + xenos::ClampMode clamp_z : 3; // 9 + xenos::BorderColor border_color : 2; // 11 + uint32_t mag_linear : 1; // 12 + uint32_t min_linear : 1; // 13 + uint32_t mip_linear : 1; // 14 + xenos::AnisoFilter aniso_filter : 3; // 17 + uint32_t mip_min_level : 4; // 21 + uint32_t mip_base_map : 1; // 22 + // Maximum mip level is in the texture resource itself, but mip_base_map + // can be used to limit fetching to mip_min_level. }; + + SamplerParameters() : value(0) { static_assert_size(*this, sizeof(value)); } + struct Hasher { + size_t operator()(const SamplerParameters& parameters) const { + return std::hash{}(parameters.value); + } + }; + bool operator==(const SamplerParameters& parameters) const { + return value == parameters.value; + } + bool operator!=(const SamplerParameters& parameters) const { + return value != parameters.value; + } }; - VulkanTextureCache(Memory* memory, RegisterFile* register_file, - TraceWriter* trace_writer, - ui::vulkan::VulkanProvider& provider); - ~VulkanTextureCache(); - - VkResult Initialize(); - void Shutdown(); - - // Descriptor set layout containing all possible texture bindings. - // The set contains one descriptor for each texture sampler [0-31]. - VkDescriptorSetLayout texture_descriptor_set_layout() const { - return texture_descriptor_set_layout_; + // Transient descriptor set layouts must be initialized in the command + // processor. + static std::unique_ptr Create( + const RegisterFile& register_file, VulkanSharedMemory& shared_memory, + uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, + VulkanCommandProcessor& command_processor, + VkPipelineStageFlags guest_shader_pipeline_stages) { + std::unique_ptr texture_cache(new VulkanTextureCache( + register_file, shared_memory, draw_resolution_scale_x, + draw_resolution_scale_y, command_processor, + guest_shader_pipeline_stages)); + if (!texture_cache->Initialize()) { + return nullptr; + } + return std::move(texture_cache); } - // Prepares a descriptor set containing the samplers and images for all - // bindings. The textures will be uploaded/converted/etc as needed. - // Requires a fence to be provided that will be signaled when finished - // using the returned descriptor set. - VkDescriptorSet PrepareTextureSet( - VkCommandBuffer setup_command_buffer, VkFence completion_fence, - const std::vector& vertex_bindings, - const std::vector& pixel_bindings); + ~VulkanTextureCache(); - // TODO(benvanik): ReadTexture. + void BeginSubmission(uint64_t new_submission_index) override; - Texture* Lookup(const TextureInfo& texture_info); + // Must be called within a frame - creates and untiles textures needed by + // shaders, and enqueues transitioning them into the sampled usage. This may + // bind compute pipelines (notifying the command processor about that), and + // also since it may insert deferred barriers, before flushing the barriers + // preceding host GPU work. + void RequestTextures(uint32_t used_texture_mask) override; - // Looks for a texture either containing or matching these parameters. - // Caller is responsible for checking if the texture returned is an exact - // match or just contains the texture given by the parameters. - // If offset_x and offset_y are not null, this may return a texture that - // contains this address at an offset. - Texture* LookupAddress(uint32_t guest_address, uint32_t width, - uint32_t height, xenos::TextureFormat format, - VkOffset2D* out_offset = nullptr); + VkImageView GetActiveBindingOrNullImageView(uint32_t fetch_constant_index, + xenos::FetchOpDimension dimension, + bool is_signed) const; - TextureView* DemandView(Texture* texture, uint16_t swizzle); + SamplerParameters GetSamplerParameters( + const VulkanShader::SamplerBinding& binding) const; - // Demands a texture for the purpose of resolving from EDRAM. This either - // creates a new texture or returns a previously created texture. - Texture* DemandResolveTexture(const TextureInfo& texture_info); + // Must be called for every used sampler at least once in a single submission, + // and a submission must be open for this to be callable. + // Returns: + // - The sampler, if obtained successfully - and increases its last usage + // submission index - and has_overflown_out = false. + // - VK_NULL_HANDLE and has_overflown_out = true if there's a total sampler + // count overflow in a submission that potentially hasn't completed yet. + // - VK_NULL_HANDLE and has_overflown_out = false in case of a general failure + // to create a sampler. + VkSampler UseSampler(SamplerParameters parameters, bool& has_overflown_out); + // Returns the submission index to await (may be the current submission in + // case of an overflow within a single submission - in this case, it must be + // ended, and a new one must be started) in case of sampler count overflow, so + // samplers may be freed, and UseSamplers may take their slots. + uint64_t GetSubmissionToAwaitOnSamplerOverflow( + uint32_t overflowed_sampler_count) const; - // Clears all cached content. - void ClearCache(); + // Returns the 2D view of the front buffer texture (for fragment shader + // reading - the barrier will be pushed in the command processor if needed), + // or VK_NULL_HANDLE in case of failure. May call LoadTextureData. + VkImageView RequestSwapTexture(uint32_t& width_scaled_out, + uint32_t& height_scaled_out, + xenos::TextureFormat& format_out); - // Frees any unused resources - void Scavenge(); + protected: + bool IsSignedVersionSeparateForFormat(TextureKey key) const override; + uint32_t GetHostFormatSwizzle(TextureKey key) const override; + + uint32_t GetMaxHostTextureWidthHeight( + xenos::DataDimension dimension) const override; + uint32_t GetMaxHostTextureDepthOrArraySize( + xenos::DataDimension dimension) const override; + + std::unique_ptr CreateTexture(TextureKey key) override; + + bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base, + bool load_mips) override; + + void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override; private: - struct UpdateSetInfo; + enum LoadDescriptorSetIndex { + kLoadDescriptorSetIndexDestination, + kLoadDescriptorSetIndexSource, + kLoadDescriptorSetIndexConstants, + kLoadDescriptorSetCount, + }; + + struct HostFormat { + LoadShaderIndex load_shader; + // Do NOT add integer formats to this - they are not filterable, can only be + // read with ImageFetch, not ImageSample! If any game is seen using + // num_format 1 for fixed-point formats (for floating-point, it's normally + // set to 1 though), add a constant buffer containing multipliers for the + // textures and multiplication to the tfetch implementation. + VkFormat format; + // Whether the format is block-compressed on the host (the host block size + // matches the guest format block size in this case), and isn't decompressed + // on load. + bool block_compressed; + + // Set up dynamically based on what's supported by the device. + bool linear_filterable; + }; + + struct HostFormatPair { + HostFormat format_unsigned; + HostFormat format_signed; + // Mapping of Xenos swizzle components to Vulkan format components. + uint32_t swizzle; + // Whether the unsigned and the signed formats are compatible for one image + // and the same image data (on a portability subset device, this should also + // take imageViewFormatReinterpretation into account). + bool unsigned_signed_compatible; + }; + + class VulkanTexture final : public Texture { + public: + enum class Usage { + kUndefined, + kTransferDestination, + kGuestShaderSampled, + kSwapSampled, + }; + + // Takes ownership of the image and its memory. + explicit VulkanTexture(VulkanTextureCache& texture_cache, + const TextureKey& key, VkImage image, + VmaAllocation allocation); + ~VulkanTexture(); + + VkImage image() const { return image_; } + + // Doesn't transition (the caller must insert the barrier). + Usage SetUsage(Usage new_usage) { + Usage old_usage = usage_; + usage_ = new_usage; + return old_usage; + } + + VkImageView GetView(bool is_signed, uint32_t host_swizzle, + bool is_array = true); + + private: + union ViewKey { + uint32_t key; + struct { + uint32_t is_signed_separate_view : 1; + uint32_t host_swizzle : 12; + uint32_t is_array : 1; + }; + + ViewKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const ViewKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const ViewKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const ViewKey& other_key) const { + return !(*this == other_key); + } + }; + + static constexpr VkComponentSwizzle GetComponentSwizzle( + uint32_t texture_swizzle, uint32_t component_index) { + xenos::XE_GPU_TEXTURE_SWIZZLE texture_component_swizzle = + xenos::XE_GPU_TEXTURE_SWIZZLE( + (texture_swizzle >> (3 * component_index)) & 0b111); + if (texture_component_swizzle == + xenos::XE_GPU_TEXTURE_SWIZZLE(component_index)) { + // The portability subset requires all swizzles to be IDENTITY, return + // IDENTITY specifically, not R, G, B, A. + return VK_COMPONENT_SWIZZLE_IDENTITY; + } + switch (texture_component_swizzle) { + case xenos::XE_GPU_TEXTURE_SWIZZLE_R: + return VK_COMPONENT_SWIZZLE_R; + case xenos::XE_GPU_TEXTURE_SWIZZLE_G: + return VK_COMPONENT_SWIZZLE_G; + case xenos::XE_GPU_TEXTURE_SWIZZLE_B: + return VK_COMPONENT_SWIZZLE_B; + case xenos::XE_GPU_TEXTURE_SWIZZLE_A: + return VK_COMPONENT_SWIZZLE_A; + case xenos::XE_GPU_TEXTURE_SWIZZLE_0: + return VK_COMPONENT_SWIZZLE_ZERO; + case xenos::XE_GPU_TEXTURE_SWIZZLE_1: + return VK_COMPONENT_SWIZZLE_ONE; + default: + // An invalid value. + return VK_COMPONENT_SWIZZLE_IDENTITY; + } + } + + VkImage image_; + VmaAllocation allocation_; + + Usage usage_ = Usage::kUndefined; + + std::unordered_map views_; + }; + + struct VulkanTextureBinding { + VkImageView image_view_unsigned; + VkImageView image_view_signed; + + VulkanTextureBinding() { Reset(); } + + void Reset() { + image_view_unsigned = VK_NULL_HANDLE; + image_view_signed = VK_NULL_HANDLE; + } + }; - // Cached Vulkan sampler. struct Sampler { - SamplerInfo sampler_info; VkSampler sampler; + uint64_t last_usage_submission; + std::pair* used_previous; + std::pair* used_next; }; - struct WatchedTexture { - Texture* texture; - bool is_mip; - }; + static constexpr bool AreDimensionsCompatible( + xenos::FetchOpDimension binding_dimension, + xenos::DataDimension resource_dimension) { + switch (binding_dimension) { + case xenos::FetchOpDimension::k1D: + case xenos::FetchOpDimension::k2D: + return resource_dimension == xenos::DataDimension::k1D || + resource_dimension == xenos::DataDimension::k2DOrStacked; + case xenos::FetchOpDimension::k3DOrStacked: + return resource_dimension == xenos::DataDimension::k3D; + case xenos::FetchOpDimension::kCube: + return resource_dimension == xenos::DataDimension::kCube; + default: + return false; + } + } - // Allocates a new texture and memory to back it on the GPU. - Texture* AllocateTexture(const TextureInfo& texture_info, - VkFormatFeatureFlags required_flags = - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); - bool FreeTexture(Texture* texture); + explicit VulkanTextureCache( + const RegisterFile& register_file, VulkanSharedMemory& shared_memory, + uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, + VulkanCommandProcessor& command_processor, + VkPipelineStageFlags guest_shader_pipeline_stages); - void WatchTexture(Texture* texture); - void TextureTouched(Texture* texture); - std::pair MemoryInvalidationCallback( - uint32_t physical_address_start, uint32_t length, bool exact_range); - static std::pair MemoryInvalidationCallbackThunk( - void* context_ptr, uint32_t physical_address_start, uint32_t length, - bool exact_range); + bool Initialize(); - // Demands a texture. If command_buffer is null and the texture hasn't been - // uploaded to graphics memory already, we will return null and bail. - Texture* Demand(const TextureInfo& texture_info, - VkCommandBuffer command_buffer = nullptr, - VkFence completion_fence = nullptr); - Sampler* Demand(const SamplerInfo& sampler_info); + const HostFormatPair& GetHostFormatPair(TextureKey key) const; - void FlushPendingCommands(VkCommandBuffer command_buffer, - VkFence completion_fence); + void GetTextureUsageMasks(VulkanTexture::Usage usage, + VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask, VkImageLayout& layout); - bool ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region, - uint32_t mip, const TextureInfo& src); + xenos::ClampMode NormalizeClampMode(xenos::ClampMode clamp_mode) const; - static const FormatInfo* GetFormatInfo(xenos::TextureFormat format); - static texture_conversion::CopyBlockCallback GetFormatCopyBlock( - xenos::TextureFormat format); - static TextureExtent GetMipExtent(const TextureInfo& src, uint32_t mip); - static uint32_t ComputeMipStorage(const FormatInfo* format_info, - uint32_t width, uint32_t height, - uint32_t depth, uint32_t mip); - static uint32_t ComputeMipStorage(const TextureInfo& src, uint32_t mip); - static uint32_t ComputeTextureStorage(const TextureInfo& src); + VulkanCommandProcessor& command_processor_; + VkPipelineStageFlags guest_shader_pipeline_stages_; - // Writes a texture back into guest memory. This call is (mostly) asynchronous - // but the texture must not be flagged for destruction. - void WritebackTexture(Texture* texture); + // Using the Vulkan Memory Allocator because texture count in games is + // naturally pretty much unbounded, while Vulkan implementations, especially + // on Windows versions before 10, may have an allocation count limit as low as + // 4096. + VmaAllocator vma_allocator_ = VK_NULL_HANDLE; - // Queues commands to upload a texture from system memory, applying any - // conversions necessary. This may flush the command buffer to the GPU if we - // run out of staging memory. - bool UploadTexture(VkCommandBuffer command_buffer, VkFence completion_fence, - Texture* dest, const TextureInfo& src); + static const HostFormatPair kBestHostFormats[64]; + static const HostFormatPair kHostFormatGBGRUnaligned; + static const HostFormatPair kHostFormatBGRGUnaligned; + HostFormatPair host_formats_[64]; - void HashTextureBindings(XXH3_state_t* hash_state, uint32_t& fetch_mask, - const std::vector& bindings); - bool SetupTextureBindings( - VkCommandBuffer command_buffer, VkFence completion_fence, - UpdateSetInfo* update_set_info, - const std::vector& bindings); - bool SetupTextureBinding(VkCommandBuffer command_buffer, - VkFence completion_fence, - UpdateSetInfo* update_set_info, - const Shader::TextureBinding& binding); + VkPipelineLayout load_pipeline_layout_ = VK_NULL_HANDLE; + std::array load_pipelines_{}; + std::array load_pipelines_scaled_{}; - // Removes invalidated textures from the cache, queues them for delete. - void RemoveInvalidatedTextures(); + // If both images can be placed in the same allocation, it's one allocation, + // otherwise it's two separate. + std::array null_images_memory_{}; + VkImage null_image_2d_array_cube_ = VK_NULL_HANDLE; + VkImage null_image_3d_ = VK_NULL_HANDLE; + VkImageView null_image_view_2d_array_ = VK_NULL_HANDLE; + VkImageView null_image_view_cube_ = VK_NULL_HANDLE; + VkImageView null_image_view_3d_ = VK_NULL_HANDLE; + bool null_images_cleared_ = false; - Memory* memory_ = nullptr; + std::array + vulkan_texture_bindings_; - RegisterFile* register_file_ = nullptr; - TraceWriter* trace_writer_ = nullptr; - ui::vulkan::VulkanProvider& provider_; + uint32_t sampler_max_count_; - std::unique_ptr wb_command_pool_ = nullptr; - std::unique_ptr descriptor_pool_ = nullptr; - std::unordered_map texture_sets_; - VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr; + xenos::AnisoFilter max_anisotropy_; - VmaAllocator mem_allocator_ = nullptr; - - ui::vulkan::CircularBuffer staging_buffer_; - ui::vulkan::CircularBuffer wb_staging_buffer_; - std::unordered_map textures_; - std::unordered_map samplers_; - std::list pending_delete_textures_; - - void* memory_invalidation_callback_handle_ = nullptr; - - xe::global_critical_region global_critical_region_; - std::list watched_textures_; - std::unordered_set* invalidated_textures_; - std::unordered_set invalidated_textures_sets_[2]; - - struct UpdateSetInfo { - // Bitmap of all 32 fetch constants and whether they have been setup yet. - // This prevents duplication across the vertex and pixel shader. - uint32_t has_setup_fetch_mask; - uint32_t image_write_count = 0; - VkWriteDescriptorSet image_writes[32]; - VkDescriptorImageInfo image_infos[32]; - } update_set_info_; + std::unordered_map + samplers_; + std::pair* sampler_used_first_ = nullptr; + std::pair* sampler_used_last_ = nullptr; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc b/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc index e50abb41f..1c0616052 100644 --- a/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc +++ b/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2021 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -19,8 +19,6 @@ namespace xe { namespace gpu { namespace vulkan { -using namespace xe::gpu::xenos; - class VulkanTraceViewer final : public TraceViewer { public: static std::unique_ptr Create( @@ -35,36 +33,21 @@ class VulkanTraceViewer final : public TraceViewer { uintptr_t GetColorRenderTarget( uint32_t pitch, xenos::MsaaSamples samples, uint32_t base, xenos::ColorRenderTargetFormat format) override { - auto command_processor = static_cast( - graphics_system()->command_processor()); - // return command_processor->GetColorRenderTarget(pitch, samples, base, - // format); + // TODO(Triang3l): EDRAM viewer. return 0; } uintptr_t GetDepthRenderTarget( uint32_t pitch, xenos::MsaaSamples samples, uint32_t base, xenos::DepthRenderTargetFormat format) override { - auto command_processor = static_cast( - graphics_system()->command_processor()); - // return command_processor->GetDepthRenderTarget(pitch, samples, base, - // format); + // TODO(Triang3l): EDRAM viewer. return 0; } uintptr_t GetTextureEntry(const TextureInfo& texture_info, const SamplerInfo& sampler_info) override { - auto command_processor = static_cast( - graphics_system()->command_processor()); - - // auto entry_view = - // command_processor->texture_cache()->Demand(texture_info, - // sampler_info); - // if (!entry_view) { - // return 0; - //} - // auto texture = entry_view->texture; - // return static_cast(texture->handle); + // TODO(Triang3l): Textures, but from a fetch constant rather than + // TextureInfo/SamplerInfo which are going away. return 0; } diff --git a/src/xenia/ui/spirv/premake5.lua b/src/xenia/ui/spirv/premake5.lua deleted file mode 100644 index 9988a051a..000000000 --- a/src/xenia/ui/spirv/premake5.lua +++ /dev/null @@ -1,19 +0,0 @@ -project_root = "../../../.." -include(project_root.."/tools/build") - -group("src") -project("xenia-ui-spirv") - uuid("2323a069-5b29-44a3-b524-f35451a81978") - kind("StaticLib") - language("C++") - links({ - "glslang-spirv", - "spirv-tools", - "xenia-base", - }) - defines({ - }) - includedirs({ - project_root.."/third_party/spirv-tools/external/include", - }) - local_platform_files() diff --git a/src/xenia/ui/spirv/spirv_assembler.cc b/src/xenia/ui/spirv/spirv_assembler.cc deleted file mode 100644 index b7fc5c901..000000000 --- a/src/xenia/ui/spirv/spirv_assembler.cc +++ /dev/null @@ -1,78 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/spirv/spirv_assembler.h" - -#include "third_party/spirv-tools/include/spirv-tools/libspirv.h" -#include "xenia/base/logging.h" - -namespace xe { -namespace ui { -namespace spirv { - -SpirvAssembler::Result::Result(spv_binary binary, spv_diagnostic diagnostic) - : binary_(binary), diagnostic_(diagnostic) {} - -SpirvAssembler::Result::~Result() { - if (binary_) { - spvBinaryDestroy(binary_); - } - if (diagnostic_) { - spvDiagnosticDestroy(diagnostic_); - } -} - -bool SpirvAssembler::Result::has_error() const { return !!diagnostic_; } - -size_t SpirvAssembler::Result::error_source_line() const { - return diagnostic_ ? diagnostic_->position.line : 0; -} - -size_t SpirvAssembler::Result::error_source_column() const { - return diagnostic_ ? diagnostic_->position.column : 0; -} - -const char* SpirvAssembler::Result::error_string() const { - return diagnostic_ ? diagnostic_->error : ""; -} - -const uint32_t* SpirvAssembler::Result::words() const { - return binary_ ? binary_->code : nullptr; -} - -size_t SpirvAssembler::Result::word_count() const { - return binary_ ? binary_->wordCount : 0; -} - -SpirvAssembler::SpirvAssembler() - : spv_context_(spvContextCreate(SPV_ENV_VULKAN_1_0)) {} - -SpirvAssembler::~SpirvAssembler() { spvContextDestroy(spv_context_); } - -std::unique_ptr SpirvAssembler::Assemble( - const char* source_text, size_t source_text_length) { - spv_binary binary = nullptr; - spv_diagnostic diagnostic = nullptr; - auto result_code = spvTextToBinary(spv_context_, source_text, - source_text_length, &binary, &diagnostic); - std::unique_ptr result(new Result(binary, diagnostic)); - if (result_code) { - XELOGE("Failed to assemble spv: {}", result_code); - if (result->has_error()) { - return result; - } else { - return nullptr; - } - } - return result; -} - -} // namespace spirv -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/spirv/spirv_assembler.h b/src/xenia/ui/spirv/spirv_assembler.h deleted file mode 100644 index 3fabc5d61..000000000 --- a/src/xenia/ui/spirv/spirv_assembler.h +++ /dev/null @@ -1,69 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_ -#define XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_ - -#include -#include - -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -class SpirvAssembler { - public: - class Result { - public: - Result(spv_binary binary, spv_diagnostic diagnostic); - ~Result(); - - // True if the result has an error associated with it. - bool has_error() const; - // Line of the error in the provided source text. - size_t error_source_line() const; - // Column of the error in the provided source text. - size_t error_source_column() const; - // Human-readable description of the error. - const char* error_string() const; - - // Assembled SPIRV binary. - // Returned pointer lifetime is tied to this Result instance. - const uint32_t* words() const; - // Size of the SPIRV binary, in words. - size_t word_count() const; - - private: - spv_binary binary_ = nullptr; - spv_diagnostic diagnostic_ = nullptr; - }; - - SpirvAssembler(); - ~SpirvAssembler(); - - // Assembles the given source text into a SPIRV binary. - // The return will be nullptr if assembly fails due to a library error. - // The return may have an error set on it if the source text is malformed. - std::unique_ptr Assemble(const char* source_text, - size_t source_text_length); - std::unique_ptr Assemble(const std::string_view source_text) { - return Assemble(source_text.data(), source_text.size()); - } - - private: - spv_context spv_context_ = nullptr; -}; - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_ diff --git a/src/xenia/ui/spirv/spirv_disassembler.cc b/src/xenia/ui/spirv/spirv_disassembler.cc deleted file mode 100644 index a8401c8ce..000000000 --- a/src/xenia/ui/spirv/spirv_disassembler.cc +++ /dev/null @@ -1,82 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/spirv/spirv_disassembler.h" - -#include "third_party/spirv-tools/include/spirv-tools/libspirv.h" -#include "xenia/base/logging.h" - -namespace xe { -namespace ui { -namespace spirv { - -SpirvDisassembler::Result::Result(spv_text text, spv_diagnostic diagnostic) - : text_(text), diagnostic_(diagnostic) {} - -SpirvDisassembler::Result::~Result() { - if (text_) { - spvTextDestroy(text_); - } - if (diagnostic_) { - spvDiagnosticDestroy(diagnostic_); - } -} - -bool SpirvDisassembler::Result::has_error() const { return !!diagnostic_; } - -size_t SpirvDisassembler::Result::error_word_index() const { - return diagnostic_ ? diagnostic_->position.index : 0; -} - -const char* SpirvDisassembler::Result::error_string() const { - return diagnostic_ ? diagnostic_->error : ""; -} - -const char* SpirvDisassembler::Result::text() const { - return text_ ? text_->str : ""; -} - -std::string SpirvDisassembler::Result::to_string() const { - return text_ ? std::string(text_->str, text_->length) : ""; -} - -void SpirvDisassembler::Result::AppendText(StringBuffer* target_buffer) const { - if (text_) { - target_buffer->AppendBytes(reinterpret_cast(text_->str), - text_->length); - } -} - -SpirvDisassembler::SpirvDisassembler() - : spv_context_(spvContextCreate(SPV_ENV_VULKAN_1_0)) {} - -SpirvDisassembler::~SpirvDisassembler() { spvContextDestroy(spv_context_); } - -std::unique_ptr SpirvDisassembler::Disassemble( - const uint32_t* words, size_t word_count) { - spv_text text = nullptr; - spv_diagnostic diagnostic = nullptr; - auto result_code = - spvBinaryToText(spv_context_, words, word_count, - SPV_BINARY_TO_TEXT_OPTION_INDENT, &text, &diagnostic); - std::unique_ptr result(new Result(text, diagnostic)); - if (result_code) { - XELOGE("Failed to disassemble spv: {}", result_code); - if (result->has_error()) { - return result; - } else { - return nullptr; - } - } - return result; -} - -} // namespace spirv -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/spirv/spirv_disassembler.h b/src/xenia/ui/spirv/spirv_disassembler.h deleted file mode 100644 index b779b9d75..000000000 --- a/src/xenia/ui/spirv/spirv_disassembler.h +++ /dev/null @@ -1,66 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_ -#define XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_ - -#include -#include - -#include "xenia/base/string_buffer.h" -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -class SpirvDisassembler { - public: - class Result { - public: - Result(spv_text text, spv_diagnostic diagnostic); - ~Result(); - - // True if the result has an error associated with it. - bool has_error() const; - // Index of the error in the provided binary word data. - size_t error_word_index() const; - // Human-readable description of the error. - const char* error_string() const; - - // Disassembled source text. - // Returned pointer lifetime is tied to this Result instance. - const char* text() const; - // Converts the disassembled source text to a string. - std::string to_string() const; - // Appends the disassembled source text to the given buffer. - void AppendText(StringBuffer* target_buffer) const; - - private: - spv_text text_ = nullptr; - spv_diagnostic diagnostic_ = nullptr; - }; - - SpirvDisassembler(); - ~SpirvDisassembler(); - - // Disassembles the given SPIRV binary. - // The return will be nullptr if disassembly fails due to a library error. - // The return may have an error set on it if the SPIRV binary is malformed. - std::unique_ptr Disassemble(const uint32_t* words, size_t word_count); - - private: - spv_context spv_context_ = nullptr; -}; - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_ diff --git a/src/xenia/ui/spirv/spirv_util.cc b/src/xenia/ui/spirv/spirv_util.cc deleted file mode 100644 index a5a5da7a3..000000000 --- a/src/xenia/ui/spirv/spirv_util.cc +++ /dev/null @@ -1,20 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -// - -} // namespace spirv -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/spirv/spirv_util.h b/src/xenia/ui/spirv/spirv_util.h deleted file mode 100644 index b0555d7fa..000000000 --- a/src/xenia/ui/spirv/spirv_util.h +++ /dev/null @@ -1,36 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_SPIRV_SPIRV_UTIL_H_ -#define XENIA_UI_SPIRV_SPIRV_UTIL_H_ - -#include "third_party/spirv-headers/include/spirv/1.1/spirv.hpp11" -#include "third_party/spirv/GLSL.std.450.hpp11" - -// Forward declarations from SPIRV-Tools so we don't pollute /so/ much. -struct spv_binary_t; -typedef spv_binary_t* spv_binary; -struct spv_context_t; -typedef spv_context_t* spv_context; -struct spv_diagnostic_t; -typedef spv_diagnostic_t* spv_diagnostic; -struct spv_text_t; -typedef spv_text_t* spv_text; - -namespace xe { -namespace ui { -namespace spirv { - -// - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_UTIL_H_ diff --git a/src/xenia/ui/spirv/spirv_validator.cc b/src/xenia/ui/spirv/spirv_validator.cc deleted file mode 100644 index 3d586d0ba..000000000 --- a/src/xenia/ui/spirv/spirv_validator.cc +++ /dev/null @@ -1,80 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/spirv/spirv_validator.h" - -#include "third_party/spirv-tools/include/spirv-tools/libspirv.h" -#include "xenia/base/logging.h" - -namespace xe { -namespace ui { -namespace spirv { - -SpirvValidator::Result::Result(spv_text text, spv_diagnostic diagnostic) - : text_(text), diagnostic_(diagnostic) {} - -SpirvValidator::Result::~Result() { - if (text_) { - spvTextDestroy(text_); - } - if (diagnostic_) { - spvDiagnosticDestroy(diagnostic_); - } -} - -bool SpirvValidator::Result::has_error() const { return !!diagnostic_; } - -size_t SpirvValidator::Result::error_word_index() const { - return diagnostic_ ? diagnostic_->position.index : 0; -} - -const char* SpirvValidator::Result::error_string() const { - return diagnostic_ ? diagnostic_->error : ""; -} - -const char* SpirvValidator::Result::text() const { - return text_ ? text_->str : ""; -} - -std::string SpirvValidator::Result::to_string() const { - return text_ ? std::string(text_->str, text_->length) : ""; -} - -void SpirvValidator::Result::AppendText(StringBuffer* target_buffer) const { - if (text_) { - target_buffer->AppendBytes(reinterpret_cast(text_->str), - text_->length); - } -} - -SpirvValidator::SpirvValidator() - : spv_context_(spvContextCreate(SPV_ENV_UNIVERSAL_1_1)) {} -SpirvValidator::~SpirvValidator() { spvContextDestroy(spv_context_); } - -std::unique_ptr SpirvValidator::Validate( - const uint32_t* words, size_t word_count) { - spv_text text = nullptr; - spv_diagnostic diagnostic = nullptr; - spv_const_binary_t binary = {words, word_count}; - auto result_code = spvValidate(spv_context_, &binary, &diagnostic); - std::unique_ptr result(new Result(text, diagnostic)); - if (result_code) { - XELOGE("Failed to validate spv: {}", result_code); - if (result->has_error()) { - return result; - } else { - return nullptr; - } - } - return result; -} - -} // namespace spirv -} // namespace ui -} // namespace xe \ No newline at end of file diff --git a/src/xenia/ui/spirv/spirv_validator.h b/src/xenia/ui/spirv/spirv_validator.h deleted file mode 100644 index 890843f27..000000000 --- a/src/xenia/ui/spirv/spirv_validator.h +++ /dev/null @@ -1,66 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_ -#define XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_ - -#include -#include - -#include "xenia/base/string_buffer.h" -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -class SpirvValidator { - public: - class Result { - public: - Result(spv_text text, spv_diagnostic diagnostic); - ~Result(); - - // True if the result has an error associated with it. - bool has_error() const; - // Index of the error in the provided binary word data. - size_t error_word_index() const; - // Human-readable description of the error. - const char* error_string() const; - - // Disassembled source text. - // Returned pointer lifetime is tied to this Result instance. - const char* text() const; - // Converts the disassembled source text to a string. - std::string to_string() const; - // Appends the disassembled source text to the given buffer. - void AppendText(StringBuffer* target_buffer) const; - - private: - spv_text text_ = nullptr; - spv_diagnostic diagnostic_ = nullptr; - }; - - SpirvValidator(); - ~SpirvValidator(); - - // Validates the given SPIRV binary. - // The return will be nullptr if validation fails due to a library error. - // The return may have an error set on it if the SPIRV binary is malformed. - std::unique_ptr Validate(const uint32_t* words, size_t word_count); - - private: - spv_context spv_context_ = nullptr; -}; - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_ diff --git a/src/xenia/ui/vulkan/blitter.cc b/src/xenia/ui/vulkan/blitter.cc deleted file mode 100644 index a7c36b7ef..000000000 --- a/src/xenia/ui/vulkan/blitter.cc +++ /dev/null @@ -1,574 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/blitter.h" -#include "xenia/base/math.h" -#include "xenia/ui/vulkan/fenced_pools.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace ui { -namespace vulkan { - -using util::CheckResult; - -// Generated with `xb buildshaders`. -namespace shaders { -#include "xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_color_ps.h" -#include "xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_depth_ps.h" -#include "xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_vs.h" -} // namespace shaders - -Blitter::Blitter(const VulkanProvider& provider) : provider_(provider) {} -Blitter::~Blitter() { Shutdown(); } - -VkResult Blitter::Initialize() { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - - // Shaders - VkShaderModuleCreateInfo shader_create_info; - std::memset(&shader_create_info, 0, sizeof(shader_create_info)); - shader_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_create_info.codeSize = sizeof(shaders::blit_vs); - shader_create_info.pCode = shaders::blit_vs; - status = dfn.vkCreateShaderModule(device, &shader_create_info, nullptr, - &blit_vertex_); - CheckResult(status, "vkCreateShaderModule"); - if (status != VK_SUCCESS) { - return status; - } - provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE, - uint64_t(blit_vertex_), "S(B): Vertex"); - - shader_create_info.codeSize = sizeof(shaders::blit_color_ps); - shader_create_info.pCode = shaders::blit_color_ps; - status = dfn.vkCreateShaderModule(device, &shader_create_info, nullptr, - &blit_color_); - CheckResult(status, "vkCreateShaderModule"); - if (status != VK_SUCCESS) { - return status; - } - provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE, - uint64_t(blit_color_), "S(B): Color"); - - shader_create_info.codeSize = sizeof(shaders::blit_depth_ps); - shader_create_info.pCode = shaders::blit_depth_ps; - status = dfn.vkCreateShaderModule(device, &shader_create_info, nullptr, - &blit_depth_); - CheckResult(status, "vkCreateShaderModule"); - if (status != VK_SUCCESS) { - return status; - } - provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE, - uint64_t(blit_depth_), "S(B): Depth"); - - // Create the descriptor set layout used for our texture sampler. - // As it changes almost every draw we cache it per texture. - VkDescriptorSetLayoutCreateInfo texture_set_layout_info; - texture_set_layout_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - texture_set_layout_info.pNext = nullptr; - texture_set_layout_info.flags = 0; - texture_set_layout_info.bindingCount = 1; - VkDescriptorSetLayoutBinding texture_binding; - texture_binding.binding = 0; - texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - texture_binding.descriptorCount = 1; - texture_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - texture_binding.pImmutableSamplers = nullptr; - texture_set_layout_info.pBindings = &texture_binding; - status = dfn.vkCreateDescriptorSetLayout(device, &texture_set_layout_info, - nullptr, &descriptor_set_layout_); - CheckResult(status, "vkCreateDescriptorSetLayout"); - if (status != VK_SUCCESS) { - return status; - } - - // Create a descriptor pool - VkDescriptorPoolSize pool_sizes[1]; - pool_sizes[0].descriptorCount = 4096; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - descriptor_pool_ = std::make_unique( - provider_, 4096, - std::vector(pool_sizes, std::end(pool_sizes))); - - // Create the pipeline layout used for our pipeline. - VkPipelineLayoutCreateInfo pipeline_layout_info; - pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_info.pNext = nullptr; - pipeline_layout_info.flags = 0; - VkDescriptorSetLayout set_layouts[] = {descriptor_set_layout_}; - pipeline_layout_info.setLayoutCount = - static_cast(xe::countof(set_layouts)); - pipeline_layout_info.pSetLayouts = set_layouts; - VkPushConstantRange push_constant_ranges[2]; - - push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - push_constant_ranges[0].offset = 0; - push_constant_ranges[0].size = sizeof(VtxPushConstants); - push_constant_ranges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - push_constant_ranges[1].offset = sizeof(VtxPushConstants); - push_constant_ranges[1].size = sizeof(PixPushConstants); - - pipeline_layout_info.pushConstantRangeCount = - static_cast(xe::countof(push_constant_ranges)); - pipeline_layout_info.pPushConstantRanges = push_constant_ranges; - status = dfn.vkCreatePipelineLayout(device, &pipeline_layout_info, nullptr, - &pipeline_layout_); - CheckResult(status, "vkCreatePipelineLayout"); - if (status != VK_SUCCESS) { - return status; - } - - // Create two samplers. - VkSamplerCreateInfo sampler_create_info = { - VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - nullptr, - 0, - VK_FILTER_NEAREST, - VK_FILTER_NEAREST, - VK_SAMPLER_MIPMAP_MODE_NEAREST, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - 0.f, - VK_FALSE, - 1.f, - VK_FALSE, - VK_COMPARE_OP_NEVER, - 0.f, - 0.f, - VK_BORDER_COLOR_INT_TRANSPARENT_BLACK, - VK_FALSE, - }; - status = dfn.vkCreateSampler(device, &sampler_create_info, nullptr, - &samp_nearest_); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return status; - } - - sampler_create_info.minFilter = VK_FILTER_LINEAR; - sampler_create_info.magFilter = VK_FILTER_LINEAR; - sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - status = - dfn.vkCreateSampler(device, &sampler_create_info, nullptr, &samp_linear_); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return status; - } - - return VK_SUCCESS; -} - -void Blitter::Shutdown() { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - util::DestroyAndNullHandle(dfn.vkDestroySampler, device, samp_nearest_); - util::DestroyAndNullHandle(dfn.vkDestroySampler, device, samp_linear_); - util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, blit_vertex_); - util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, blit_color_); - util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, blit_depth_); - util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_color_); - util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_depth_); - util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, - pipeline_layout_); - util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_); - for (auto& pipeline : pipelines_) { - dfn.vkDestroyPipeline(device, pipeline.second, nullptr); - } - pipelines_.clear(); - - for (auto& pass : render_passes_) { - dfn.vkDestroyRenderPass(device, pass.second, nullptr); - } - render_passes_.clear(); -} - -void Blitter::Scavenge() { - if (descriptor_pool_->has_open_batch()) { - descriptor_pool_->EndBatch(); - } - - descriptor_pool_->Scavenge(); -} - -void Blitter::BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence, - VkImageView src_image_view, VkRect2D src_rect, - VkExtent2D src_extents, VkFormat dst_image_format, - VkRect2D dst_rect, VkExtent2D dst_extents, - VkFramebuffer dst_framebuffer, VkViewport viewport, - VkRect2D scissor, VkFilter filter, - bool color_or_depth, bool swap_channels) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - // Do we need a full draw, or can we cheap out with a blit command? - bool full_draw = swap_channels || true; - if (full_draw) { - if (!descriptor_pool_->has_open_batch()) { - descriptor_pool_->BeginBatch(fence); - } - - // Acquire a render pass. - auto render_pass = GetRenderPass(dst_image_format, color_or_depth); - VkRenderPassBeginInfo render_pass_info = { - VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - nullptr, - render_pass, - dst_framebuffer, - {{0, 0}, dst_extents}, - 0, - nullptr, - }; - - dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_info, - VK_SUBPASS_CONTENTS_INLINE); - - dfn.vkCmdSetViewport(command_buffer, 0, 1, &viewport); - dfn.vkCmdSetScissor(command_buffer, 0, 1, &scissor); - - // Acquire a pipeline. - auto pipeline = - GetPipeline(render_pass, color_or_depth ? blit_color_ : blit_depth_, - color_or_depth); - dfn.vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); - - // Acquire and update a descriptor set for this image. - auto set = descriptor_pool_->AcquireEntry(descriptor_set_layout_); - if (!set) { - assert_always(); - descriptor_pool_->CancelBatch(); - return; - } - - VkWriteDescriptorSet write; - write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write.pNext = nullptr; - write.dstSet = set; - write.dstBinding = 0; - write.dstArrayElement = 0; - write.descriptorCount = 1; - write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - - VkDescriptorImageInfo image; - image.sampler = filter == VK_FILTER_NEAREST ? samp_nearest_ : samp_linear_; - image.imageView = src_image_view; - image.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - - write.pImageInfo = ℑ - write.pBufferInfo = nullptr; - write.pTexelBufferView = nullptr; - dfn.vkUpdateDescriptorSets(device, 1, &write, 0, nullptr); - - dfn.vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_layout_, 0, 1, &set, 0, nullptr); - - VtxPushConstants vtx_constants = { - { - float(src_rect.offset.x) / src_extents.width, - float(src_rect.offset.y) / src_extents.height, - float(src_rect.extent.width) / src_extents.width, - float(src_rect.extent.height) / src_extents.height, - }, - { - float(dst_rect.offset.x) / dst_extents.width, - float(dst_rect.offset.y) / dst_extents.height, - float(dst_rect.extent.width) / dst_extents.width, - float(dst_rect.extent.height) / dst_extents.height, - }, - }; - dfn.vkCmdPushConstants(command_buffer, pipeline_layout_, - VK_SHADER_STAGE_VERTEX_BIT, 0, - sizeof(VtxPushConstants), &vtx_constants); - - PixPushConstants pix_constants = { - 0, - 0, - 0, - swap_channels ? 1 : 0, - }; - dfn.vkCmdPushConstants( - command_buffer, pipeline_layout_, VK_SHADER_STAGE_FRAGMENT_BIT, - sizeof(VtxPushConstants), sizeof(PixPushConstants), &pix_constants); - - dfn.vkCmdDraw(command_buffer, 4, 1, 0, 0); - dfn.vkCmdEndRenderPass(command_buffer); - } -} - -void Blitter::CopyColorTexture2D(VkCommandBuffer command_buffer, VkFence fence, - VkImage src_image, VkImageView src_image_view, - VkOffset2D src_offset, VkImage dst_image, - VkImageView dst_image_view, VkExtent2D extents, - VkFilter filter, bool swap_channels) {} - -void Blitter::CopyDepthTexture(VkCommandBuffer command_buffer, VkFence fence, - VkImage src_image, VkImageView src_image_view, - VkOffset2D src_offset, VkImage dst_image, - VkImageView dst_image_view, VkExtent2D extents) { -} - -VkRenderPass Blitter::GetRenderPass(VkFormat format, bool color_or_depth) { - auto pass = render_passes_.find(format); - if (pass != render_passes_.end()) { - return pass->second; - } - - // Create and cache the render pass. - VkRenderPass render_pass = CreateRenderPass(format, color_or_depth); - if (render_pass) { - render_passes_[format] = render_pass; - } - - return render_pass; -} - -VkPipeline Blitter::GetPipeline(VkRenderPass render_pass, - VkShaderModule frag_shader, - bool color_or_depth) { - auto it = pipelines_.find(std::make_pair(render_pass, frag_shader)); - if (it != pipelines_.end()) { - return it->second; - } - - // Create and cache the pipeline. - VkPipeline pipeline = - CreatePipeline(render_pass, frag_shader, color_or_depth); - if (pipeline) { - pipelines_[std::make_pair(render_pass, frag_shader)] = pipeline; - } - - return pipeline; -} - -VkRenderPass Blitter::CreateRenderPass(VkFormat output_format, - bool color_or_depth) { - VkAttachmentDescription attachments[1]; - std::memset(attachments, 0, sizeof(attachments)); - - // Output attachment - attachments[0].flags = 0; - attachments[0].format = output_format; - attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; - attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[0].initialLayout = - color_or_depth ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - attachments[0].finalLayout = attachments[0].initialLayout; - - VkAttachmentReference attach_refs[1]; - attach_refs[0].attachment = 0; - attach_refs[0].layout = - color_or_depth ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - - VkSubpassDescription subpass = { - 0, VK_PIPELINE_BIND_POINT_GRAPHICS, - 0, nullptr, - 0, nullptr, - nullptr, nullptr, - 0, nullptr, - }; - - if (color_or_depth) { - subpass.colorAttachmentCount = 1; - subpass.pColorAttachments = attach_refs; - } else { - subpass.pDepthStencilAttachment = attach_refs; - } - - VkRenderPassCreateInfo renderpass_info = { - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - nullptr, - 0, - 1, - attachments, - 1, - &subpass, - 0, - nullptr, - }; - VkRenderPass renderpass = nullptr; - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult result = - dfn.vkCreateRenderPass(device, &renderpass_info, nullptr, &renderpass); - CheckResult(result, "vkCreateRenderPass"); - - return renderpass; -} - -VkPipeline Blitter::CreatePipeline(VkRenderPass render_pass, - VkShaderModule frag_shader, - bool color_or_depth) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult result = VK_SUCCESS; - - // Pipeline - VkGraphicsPipelineCreateInfo pipeline_info; - std::memset(&pipeline_info, 0, sizeof(VkGraphicsPipelineCreateInfo)); - pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - - // Shaders - pipeline_info.stageCount = 2; - VkPipelineShaderStageCreateInfo stages[2]; - std::memset(stages, 0, sizeof(stages)); - stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - stages[0].module = blit_vertex_; - stages[0].pName = "main"; - stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - stages[1].module = frag_shader; - stages[1].pName = "main"; - - pipeline_info.pStages = stages; - - // Vertex input - VkPipelineVertexInputStateCreateInfo vtx_state; - std::memset(&vtx_state, 0, sizeof(vtx_state)); - vtx_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vtx_state.flags = 0; - vtx_state.vertexAttributeDescriptionCount = 0; - vtx_state.pVertexAttributeDescriptions = nullptr; - vtx_state.vertexBindingDescriptionCount = 0; - vtx_state.pVertexBindingDescriptions = nullptr; - - pipeline_info.pVertexInputState = &vtx_state; - - // Input Assembly - VkPipelineInputAssemblyStateCreateInfo input_info; - input_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - input_info.pNext = nullptr; - input_info.flags = 0; - input_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; - input_info.primitiveRestartEnable = VK_FALSE; - pipeline_info.pInputAssemblyState = &input_info; - pipeline_info.pTessellationState = nullptr; - VkPipelineViewportStateCreateInfo viewport_state_info; - viewport_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_state_info.pNext = nullptr; - viewport_state_info.flags = 0; - viewport_state_info.viewportCount = 1; - viewport_state_info.pViewports = nullptr; - viewport_state_info.scissorCount = 1; - viewport_state_info.pScissors = nullptr; - pipeline_info.pViewportState = &viewport_state_info; - VkPipelineRasterizationStateCreateInfo rasterization_info; - rasterization_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterization_info.pNext = nullptr; - rasterization_info.flags = 0; - rasterization_info.depthClampEnable = VK_FALSE; - rasterization_info.rasterizerDiscardEnable = VK_FALSE; - rasterization_info.polygonMode = VK_POLYGON_MODE_FILL; - rasterization_info.cullMode = VK_CULL_MODE_NONE; - rasterization_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; - rasterization_info.depthBiasEnable = VK_FALSE; - rasterization_info.depthBiasConstantFactor = 0; - rasterization_info.depthBiasClamp = 0; - rasterization_info.depthBiasSlopeFactor = 0; - rasterization_info.lineWidth = 1.0f; - pipeline_info.pRasterizationState = &rasterization_info; - VkPipelineMultisampleStateCreateInfo multisample_info; - multisample_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_info.pNext = nullptr; - multisample_info.flags = 0; - multisample_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisample_info.sampleShadingEnable = VK_FALSE; - multisample_info.minSampleShading = 0; - multisample_info.pSampleMask = nullptr; - multisample_info.alphaToCoverageEnable = VK_FALSE; - multisample_info.alphaToOneEnable = VK_FALSE; - pipeline_info.pMultisampleState = &multisample_info; - VkPipelineDepthStencilStateCreateInfo depth_info = { - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - nullptr, - 0, - VK_TRUE, - VK_TRUE, - VK_COMPARE_OP_ALWAYS, - VK_FALSE, - VK_FALSE, - {}, - {}, - 0.f, - 1.f, - }; - pipeline_info.pDepthStencilState = &depth_info; - VkPipelineColorBlendStateCreateInfo blend_info; - blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - blend_info.pNext = nullptr; - blend_info.flags = 0; - blend_info.logicOpEnable = VK_FALSE; - blend_info.logicOp = VK_LOGIC_OP_NO_OP; - - VkPipelineColorBlendAttachmentState blend_attachments[1]; - if (color_or_depth) { - blend_attachments[0].blendEnable = VK_FALSE; - blend_attachments[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; - blend_attachments[0].colorBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; - blend_attachments[0].alphaBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].colorWriteMask = 0xF; - - blend_info.attachmentCount = - static_cast(xe::countof(blend_attachments)); - blend_info.pAttachments = blend_attachments; - } else { - blend_info.attachmentCount = 0; - blend_info.pAttachments = nullptr; - } - - std::memset(blend_info.blendConstants, 0, sizeof(blend_info.blendConstants)); - pipeline_info.pColorBlendState = &blend_info; - VkPipelineDynamicStateCreateInfo dynamic_state_info; - dynamic_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_info.pNext = nullptr; - dynamic_state_info.flags = 0; - VkDynamicState dynamic_states[] = { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; - dynamic_state_info.dynamicStateCount = - static_cast(xe::countof(dynamic_states)); - dynamic_state_info.pDynamicStates = dynamic_states; - pipeline_info.pDynamicState = &dynamic_state_info; - pipeline_info.layout = pipeline_layout_; - pipeline_info.renderPass = render_pass; - pipeline_info.subpass = 0; - pipeline_info.basePipelineHandle = nullptr; - pipeline_info.basePipelineIndex = -1; - - VkPipeline pipeline = nullptr; - result = dfn.vkCreateGraphicsPipelines(device, nullptr, 1, &pipeline_info, - nullptr, &pipeline); - CheckResult(result, "vkCreateGraphicsPipelines"); - - return pipeline; -} - -} // namespace vulkan -} // namespace ui -} // namespace xe \ No newline at end of file diff --git a/src/xenia/ui/vulkan/blitter.h b/src/xenia/ui/vulkan/blitter.h deleted file mode 100644 index 817b5efc7..000000000 --- a/src/xenia/ui/vulkan/blitter.h +++ /dev/null @@ -1,100 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_BLITTER_H_ -#define XENIA_UI_VULKAN_BLITTER_H_ - -#include -#include - -#include "xenia/ui/vulkan/vulkan_provider.h" - -namespace xe { -namespace ui { -namespace vulkan { - -class DescriptorPool; - -class Blitter { - public: - Blitter(const VulkanProvider& provider); - ~Blitter(); - - VkResult Initialize(); - void Scavenge(); - void Shutdown(); - - // Queues commands to blit a texture to another texture. - // - // src_rect is the rectangle of pixels to copy from the source - // src_extents is the actual size of the source image - // dst_rect is the rectangle of pixels that are replaced with the source - // dst_extents is the actual size of the destination image - // dst_framebuffer must only have one attachment, the target texture. - // viewport is the viewport rect (set to {0, 0, dst_w, dst_h} if unsure) - // scissor is the scissor rect for the dest (set to dst size if unsure) - void BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence, - VkImageView src_image_view, VkRect2D src_rect, - VkExtent2D src_extents, VkFormat dst_image_format, - VkRect2D dst_rect, VkExtent2D dst_extents, - VkFramebuffer dst_framebuffer, VkViewport viewport, - VkRect2D scissor, VkFilter filter, bool color_or_depth, - bool swap_channels); - - void CopyColorTexture2D(VkCommandBuffer command_buffer, VkFence fence, - VkImage src_image, VkImageView src_image_view, - VkOffset2D src_offset, VkImage dst_image, - VkImageView dst_image_view, VkExtent2D extents, - VkFilter filter, bool swap_channels); - void CopyDepthTexture(VkCommandBuffer command_buffer, VkFence fence, - VkImage src_image, VkImageView src_image_view, - VkOffset2D src_offset, VkImage dst_image, - VkImageView dst_image_view, VkExtent2D extents); - - // For framebuffer creation. - VkRenderPass GetRenderPass(VkFormat format, bool color_or_depth); - - private: - struct VtxPushConstants { - float src_uv[4]; // 0x00 - float dst_uv[4]; // 0x10 - }; - - struct PixPushConstants { - int _pad[3]; // 0x20 - int swap; // 0x2C - }; - - VkPipeline GetPipeline(VkRenderPass render_pass, VkShaderModule frag_shader, - bool color_or_depth); - VkRenderPass CreateRenderPass(VkFormat output_format, bool color_or_depth); - VkPipeline CreatePipeline(VkRenderPass render_pass, - VkShaderModule frag_shader, bool color_or_depth); - - std::unique_ptr descriptor_pool_ = nullptr; - const VulkanProvider& provider_; - VkPipeline pipeline_color_ = nullptr; - VkPipeline pipeline_depth_ = nullptr; - VkPipelineLayout pipeline_layout_ = nullptr; - VkShaderModule blit_vertex_ = nullptr; - VkShaderModule blit_color_ = nullptr; - VkShaderModule blit_depth_ = nullptr; - VkSampler samp_linear_ = nullptr; - VkSampler samp_nearest_ = nullptr; - VkDescriptorSetLayout descriptor_set_layout_ = nullptr; - - std::map render_passes_; - std::map, VkPipeline> pipelines_; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_BLITTER_H_ diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc deleted file mode 100644 index 30ba025cf..000000000 --- a/src/xenia/ui/vulkan/circular_buffer.cc +++ /dev/null @@ -1,314 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/circular_buffer.h" - -#include - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace ui { -namespace vulkan { - -using util::CheckResult; - -CircularBuffer::CircularBuffer(const VulkanProvider& provider, - VkBufferUsageFlags usage, VkDeviceSize capacity, - VkDeviceSize alignment) - : provider_(provider), capacity_(capacity) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - - // Create our internal buffer. - VkBufferCreateInfo buffer_info; - buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_info.pNext = nullptr; - buffer_info.flags = 0; - buffer_info.size = capacity; - buffer_info.usage = usage; - buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_info.queueFamilyIndexCount = 0; - buffer_info.pQueueFamilyIndices = nullptr; - status = dfn.vkCreateBuffer(device, &buffer_info, nullptr, &gpu_buffer_); - CheckResult(status, "vkCreateBuffer"); - if (status != VK_SUCCESS) { - assert_always(); - } - - VkMemoryRequirements reqs; - dfn.vkGetBufferMemoryRequirements(device, gpu_buffer_, &reqs); - alignment_ = xe::round_up(alignment, reqs.alignment); -} -CircularBuffer::~CircularBuffer() { Shutdown(); } - -VkResult CircularBuffer::Initialize(VkDeviceMemory memory, - VkDeviceSize offset) { - assert_true(offset % alignment_ == 0); - gpu_memory_ = memory; - gpu_base_ = offset; - - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - - // Bind the buffer to its backing memory. - status = dfn.vkBindBufferMemory(device, gpu_buffer_, gpu_memory_, gpu_base_); - CheckResult(status, "vkBindBufferMemory"); - if (status != VK_SUCCESS) { - XELOGE("CircularBuffer::Initialize - Failed to bind memory!"); - Shutdown(); - return status; - } - - // Map the memory so we can access it. - status = dfn.vkMapMemory(device, gpu_memory_, gpu_base_, capacity_, 0, - reinterpret_cast(&host_base_)); - CheckResult(status, "vkMapMemory"); - if (status != VK_SUCCESS) { - XELOGE("CircularBuffer::Initialize - Failed to map memory!"); - Shutdown(); - return status; - } - - return VK_SUCCESS; -} - -VkResult CircularBuffer::Initialize() { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkResult status = VK_SUCCESS; - - VkMemoryRequirements reqs; - dfn.vkGetBufferMemoryRequirements(device, gpu_buffer_, &reqs); - - // Allocate memory from the device to back the buffer. - owns_gpu_memory_ = true; - VkMemoryAllocateInfo memory_allocate_info; - memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_allocate_info.pNext = nullptr; - memory_allocate_info.allocationSize = reqs.size; - memory_allocate_info.memoryTypeIndex = ui::vulkan::util::ChooseHostMemoryType( - provider_, reqs.memoryTypeBits, false); - if (memory_allocate_info.memoryTypeIndex == UINT32_MAX) { - XELOGE("CircularBuffer::Initialize - Failed to get memory type!"); - Shutdown(); - return VK_ERROR_INITIALIZATION_FAILED; - } - status = dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, - &gpu_memory_); - if (status != VK_SUCCESS) { - XELOGE("CircularBuffer::Initialize - Failed to allocate memory!"); - Shutdown(); - return status; - } - - capacity_ = reqs.size; - gpu_base_ = 0; - - // Bind the buffer to its backing memory. - status = dfn.vkBindBufferMemory(device, gpu_buffer_, gpu_memory_, gpu_base_); - CheckResult(status, "vkBindBufferMemory"); - if (status != VK_SUCCESS) { - XELOGE("CircularBuffer::Initialize - Failed to bind memory!"); - Shutdown(); - return status; - } - - // Map the memory so we can access it. - status = dfn.vkMapMemory(device, gpu_memory_, gpu_base_, capacity_, 0, - reinterpret_cast(&host_base_)); - CheckResult(status, "vkMapMemory"); - if (status != VK_SUCCESS) { - XELOGE("CircularBuffer::Initialize - Failed to map memory!"); - Shutdown(); - return status; - } - - return VK_SUCCESS; -} - -void CircularBuffer::Shutdown() { - Clear(); - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - if (host_base_) { - dfn.vkUnmapMemory(device, gpu_memory_); - host_base_ = nullptr; - } - if (gpu_buffer_) { - dfn.vkDestroyBuffer(device, gpu_buffer_, nullptr); - gpu_buffer_ = nullptr; - } - if (gpu_memory_ && owns_gpu_memory_) { - dfn.vkFreeMemory(device, gpu_memory_, nullptr); - gpu_memory_ = nullptr; - } -} - -void CircularBuffer::GetBufferMemoryRequirements(VkMemoryRequirements* reqs) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkGetBufferMemoryRequirements(device, gpu_buffer_, reqs); -} - -bool CircularBuffer::CanAcquire(VkDeviceSize length) { - // Make sure the length is aligned. - length = xe::round_up(length, alignment_); - if (allocations_.empty()) { - // Read head has caught up to write head (entire buffer available for write) - assert_true(read_head_ == write_head_); - return capacity_ >= length; - } else if (write_head_ < read_head_) { - // Write head wrapped around and is behind read head. - // | write |---- read ----| - return (read_head_ - write_head_) >= length; - } else if (write_head_ > read_head_) { - // Read head behind write head. - // 1. Check if there's enough room from write -> capacity - // | |---- read ----| write | - if ((capacity_ - write_head_) >= length) { - return true; - } - - // 2. Check if there's enough room from 0 -> read - // | write |---- read ----| | - if ((read_head_ - 0) >= length) { - return true; - } - } - - return false; -} - -CircularBuffer::Allocation* CircularBuffer::Acquire(VkDeviceSize length, - VkFence fence) { - VkDeviceSize aligned_length = xe::round_up(length, alignment_); - if (!CanAcquire(aligned_length)) { - return nullptr; - } - - assert_true(write_head_ % alignment_ == 0); - if (write_head_ < read_head_) { - // Write head behind read head. - assert_true(read_head_ - write_head_ >= aligned_length); - - Allocation alloc; - alloc.host_ptr = host_base_ + write_head_; - alloc.gpu_memory = gpu_memory_; - alloc.offset = gpu_base_ + write_head_; - alloc.length = length; - alloc.aligned_length = aligned_length; - alloc.fence = fence; - write_head_ += aligned_length; - allocations_.push(alloc); - - return &allocations_.back(); - } else { - // Write head equal to/after read head - if (capacity_ - write_head_ >= aligned_length) { - // Free space from write -> capacity - Allocation alloc; - alloc.host_ptr = host_base_ + write_head_; - alloc.gpu_memory = gpu_memory_; - alloc.offset = gpu_base_ + write_head_; - alloc.length = length; - alloc.aligned_length = aligned_length; - alloc.fence = fence; - write_head_ += aligned_length; - allocations_.push(alloc); - - return &allocations_.back(); - } else if ((read_head_ - 0) >= aligned_length) { - // Not enough space from write -> capacity, but there is enough free space - // from begin -> read - Allocation alloc; - alloc.host_ptr = host_base_ + 0; - alloc.gpu_memory = gpu_memory_; - alloc.offset = gpu_base_ + 0; - alloc.length = length; - alloc.aligned_length = aligned_length; - alloc.fence = fence; - write_head_ = aligned_length; - allocations_.push(alloc); - - return &allocations_.back(); - } - } - - return nullptr; -} - -void CircularBuffer::Flush(Allocation* allocation) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkMappedMemoryRange range; - range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range.pNext = nullptr; - range.memory = gpu_memory_; - range.offset = gpu_base_ + allocation->offset; - range.size = allocation->length; - dfn.vkFlushMappedMemoryRanges(device, 1, &range); -} - -void CircularBuffer::Flush(VkDeviceSize offset, VkDeviceSize length) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkMappedMemoryRange range; - range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range.pNext = nullptr; - range.memory = gpu_memory_; - range.offset = gpu_base_ + offset; - range.size = length; - dfn.vkFlushMappedMemoryRanges(device, 1, &range); -} - -void CircularBuffer::Clear() { - allocations_ = std::queue{}; - write_head_ = read_head_ = 0; -} - -void CircularBuffer::Scavenge() { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - // Stash the last signalled fence - VkFence fence = nullptr; - while (!allocations_.empty()) { - Allocation& alloc = allocations_.front(); - if (fence != alloc.fence && - dfn.vkGetFenceStatus(device, alloc.fence) != VK_SUCCESS) { - // Don't bother freeing following allocations to ensure proper ordering. - break; - } - - fence = alloc.fence; - if (capacity_ - read_head_ < alloc.aligned_length) { - // This allocation is stored at the beginning of the buffer. - read_head_ = alloc.aligned_length; - } else { - read_head_ += alloc.aligned_length; - } - - allocations_.pop(); - } - - if (allocations_.empty()) { - // Reset R/W heads to work around fragmentation issues. - read_head_ = write_head_ = 0; - } -} - -} // namespace vulkan -} // namespace ui -} // namespace xe \ No newline at end of file diff --git a/src/xenia/ui/vulkan/circular_buffer.h b/src/xenia/ui/vulkan/circular_buffer.h deleted file mode 100644 index 9f54b706a..000000000 --- a/src/xenia/ui/vulkan/circular_buffer.h +++ /dev/null @@ -1,92 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_ -#define XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_ - -#include - -#include "xenia/ui/vulkan/vulkan_provider.h" - -namespace xe { -namespace ui { -namespace vulkan { - -// A circular buffer, intended to hold (fairly) temporary memory that will be -// released when a fence is signaled. Best used when allocations are taken -// in-order with command buffer submission. -// -// Allocations loop around the buffer in circles (but are not fragmented at the -// ends of the buffer), where trailing older allocations are freed after use. -class CircularBuffer { - public: - CircularBuffer(const VulkanProvider& provider, VkBufferUsageFlags usage, - VkDeviceSize capacity, VkDeviceSize alignment = 256); - ~CircularBuffer(); - - struct Allocation { - void* host_ptr; - VkDeviceMemory gpu_memory; - VkDeviceSize offset; - VkDeviceSize length; - VkDeviceSize aligned_length; - - // Allocation usage fence. This allocation will be deleted when the fence - // becomes signaled. - VkFence fence; - }; - - VkResult Initialize(VkDeviceMemory memory, VkDeviceSize offset); - VkResult Initialize(); - void Shutdown(); - - void GetBufferMemoryRequirements(VkMemoryRequirements* reqs); - - VkDeviceSize alignment() const { return alignment_; } - VkDeviceSize capacity() const { return capacity_; } - VkBuffer gpu_buffer() const { return gpu_buffer_; } - VkDeviceMemory gpu_memory() const { return gpu_memory_; } - uint8_t* host_base() const { return host_base_; } - - bool CanAcquire(VkDeviceSize length); - - // Acquires space to hold memory. This allocation is only freed when the fence - // reaches the signaled state. - Allocation* Acquire(VkDeviceSize length, VkFence fence); - void Flush(Allocation* allocation); - void Flush(VkDeviceSize offset, VkDeviceSize length); - - // Clears all allocations, regardless of whether they've been consumed or not. - void Clear(); - - // Frees any allocations whose fences have been signaled. - void Scavenge(); - - private: - // All of these variables are relative to gpu_base - VkDeviceSize capacity_ = 0; - VkDeviceSize alignment_ = 0; - VkDeviceSize write_head_ = 0; - VkDeviceSize read_head_ = 0; - - const VulkanProvider& provider_; - bool owns_gpu_memory_ = false; - VkBuffer gpu_buffer_ = nullptr; - VkDeviceMemory gpu_memory_ = nullptr; - VkDeviceSize gpu_base_ = 0; - uint8_t* host_base_ = nullptr; - - std::queue allocations_; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_GL_CIRCULAR_BUFFER_H_ diff --git a/src/xenia/ui/vulkan/fenced_pools.cc b/src/xenia/ui/vulkan/fenced_pools.cc deleted file mode 100644 index ef4794c71..000000000 --- a/src/xenia/ui/vulkan/fenced_pools.cc +++ /dev/null @@ -1,142 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/fenced_pools.h" - -#include "xenia/base/assert.h" -#include "xenia/base/math.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace ui { -namespace vulkan { - -using util::CheckResult; - -CommandBufferPool::CommandBufferPool(const VulkanProvider& provider, - uint32_t queue_family_index) - : BaseFencedPool(provider) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - // Create the pool used for allocating buffers. - // They are marked as transient (short-lived) and cycled frequently. - VkCommandPoolCreateInfo cmd_pool_info; - cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - cmd_pool_info.pNext = nullptr; - cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | - VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - cmd_pool_info.queueFamilyIndex = queue_family_index; - auto err = - dfn.vkCreateCommandPool(device, &cmd_pool_info, nullptr, &command_pool_); - CheckResult(err, "vkCreateCommandPool"); - - // Allocate a bunch of command buffers to start. - constexpr uint32_t kDefaultCount = 32; - VkCommandBufferAllocateInfo command_buffer_info; - command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - command_buffer_info.pNext = nullptr; - command_buffer_info.commandPool = command_pool_; - command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - command_buffer_info.commandBufferCount = kDefaultCount; - VkCommandBuffer command_buffers[kDefaultCount]; - err = dfn.vkAllocateCommandBuffers(device, &command_buffer_info, - command_buffers); - CheckResult(err, "vkCreateCommandBuffer"); - for (size_t i = 0; i < xe::countof(command_buffers); ++i) { - PushEntry(command_buffers[i], nullptr); - } -} - -CommandBufferPool::~CommandBufferPool() { - FreeAllEntries(); - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkDestroyCommandPool(device, command_pool_, nullptr); - command_pool_ = nullptr; -} - -VkCommandBuffer CommandBufferPool::AllocateEntry(void* data) { - // TODO(benvanik): allocate a bunch at once? - VkCommandBufferAllocateInfo command_buffer_info; - command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - command_buffer_info.pNext = nullptr; - command_buffer_info.commandPool = command_pool_; - command_buffer_info.level = - VkCommandBufferLevel(reinterpret_cast(data)); - command_buffer_info.commandBufferCount = 1; - VkCommandBuffer command_buffer; - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - auto err = dfn.vkAllocateCommandBuffers(device, &command_buffer_info, - &command_buffer); - CheckResult(err, "vkCreateCommandBuffer"); - return command_buffer; -} - -void CommandBufferPool::FreeEntry(VkCommandBuffer handle) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkFreeCommandBuffers(device, command_pool_, 1, &handle); -} - -DescriptorPool::DescriptorPool(const VulkanProvider& provider, - uint32_t max_count, - std::vector pool_sizes) - : BaseFencedPool(provider) { - VkDescriptorPoolCreateInfo descriptor_pool_info; - descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - descriptor_pool_info.pNext = nullptr; - descriptor_pool_info.flags = - VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - descriptor_pool_info.maxSets = max_count; - descriptor_pool_info.poolSizeCount = uint32_t(pool_sizes.size()); - descriptor_pool_info.pPoolSizes = pool_sizes.data(); - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - auto err = dfn.vkCreateDescriptorPool(device, &descriptor_pool_info, nullptr, - &descriptor_pool_); - CheckResult(err, "vkCreateDescriptorPool"); -} -DescriptorPool::~DescriptorPool() { - FreeAllEntries(); - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkDestroyDescriptorPool(device, descriptor_pool_, nullptr); - descriptor_pool_ = nullptr; -} - -VkDescriptorSet DescriptorPool::AllocateEntry(void* data) { - VkDescriptorSetLayout layout = reinterpret_cast(data); - - VkDescriptorSet descriptor_set = nullptr; - VkDescriptorSetAllocateInfo set_alloc_info; - set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - set_alloc_info.pNext = nullptr; - set_alloc_info.descriptorPool = descriptor_pool_; - set_alloc_info.descriptorSetCount = 1; - set_alloc_info.pSetLayouts = &layout; - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - auto err = - dfn.vkAllocateDescriptorSets(device, &set_alloc_info, &descriptor_set); - CheckResult(err, "vkAllocateDescriptorSets"); - - return descriptor_set; -} - -void DescriptorPool::FreeEntry(VkDescriptorSet handle) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - dfn.vkFreeDescriptorSets(device, descriptor_pool_, 1, &handle); -} - -} // namespace vulkan -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h deleted file mode 100644 index 728b7aaae..000000000 --- a/src/xenia/ui/vulkan/fenced_pools.h +++ /dev/null @@ -1,341 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_ -#define XENIA_UI_VULKAN_FENCED_POOLS_H_ - -#include - -#include "xenia/base/assert.h" -#include "xenia/ui/vulkan/vulkan_provider.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace ui { -namespace vulkan { - -// Simple pool for Vulkan homogenous objects that cannot be reused while -// in-flight. -// It batches pooled objects into groups and uses a vkQueueSubmit fence to -// indicate their availability. If no objects are free when one is requested -// the caller is expected to create them. -template -class BaseFencedPool { - public: - BaseFencedPool(const VulkanProvider& provider) : provider_(provider) {} - - virtual ~BaseFencedPool() { - // TODO(benvanik): wait on fence until done. - assert_null(pending_batch_list_head_); - - // Subclasses must call FreeAllEntries() to properly clean up things. - assert_null(free_batch_list_head_); - assert_null(free_entry_list_head_); - } - - // True if one or more batches are still pending on the GPU. - bool has_pending() const { return pending_batch_list_head_ != nullptr; } - // True if a batch is open. - bool has_open_batch() const { return open_batch_ != nullptr; } - - // Checks all pending batches for completion and scavenges their entries. - // This should be called as frequently as reasonable. - void Scavenge() { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - while (pending_batch_list_head_) { - auto batch = pending_batch_list_head_; - assert_not_null(batch->fence); - - VkResult status = dfn.vkGetFenceStatus(device, batch->fence); - if (status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST) { - // Batch has completed. Reclaim. - pending_batch_list_head_ = batch->next; - if (batch == pending_batch_list_tail_) { - pending_batch_list_tail_ = nullptr; - } - batch->next = free_batch_list_head_; - free_batch_list_head_ = batch; - batch->entry_list_tail->next = free_entry_list_head_; - free_entry_list_head_ = batch->entry_list_head; - batch->entry_list_head = nullptr; - batch->entry_list_tail = nullptr; - } else { - // Batch is still in-flight. Since batches are executed in order we know - // no others after it could have completed, so early-exit. - return; - } - } - } - - // Begins a new batch. - // All entries acquired within this batch will be marked as in-use until - // the fence returned is signalled. - // Pass in a fence to use an external fence. This assumes the fence has been - // reset. - VkFence BeginBatch(VkFence fence = nullptr) { - assert_null(open_batch_); - Batch* batch = nullptr; - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - if (free_batch_list_head_) { - // Reuse a batch. - batch = free_batch_list_head_; - free_batch_list_head_ = batch->next; - batch->next = nullptr; - - if (batch->flags & kBatchOwnsFence && !fence) { - // Reset owned fence. - dfn.vkResetFences(device, 1, &batch->fence); - } else if ((batch->flags & kBatchOwnsFence) && fence) { - // Transfer owned -> external - dfn.vkDestroyFence(device, batch->fence, nullptr); - batch->fence = fence; - batch->flags &= ~kBatchOwnsFence; - } else if (!(batch->flags & kBatchOwnsFence) && !fence) { - // external -> owned - VkFenceCreateInfo info; - info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - info.pNext = nullptr; - info.flags = 0; - VkResult res = dfn.vkCreateFence(device, &info, nullptr, &batch->fence); - if (res != VK_SUCCESS) { - assert_always(); - } - - batch->flags |= kBatchOwnsFence; - } else { - // external -> external - batch->fence = fence; - } - } else { - // Allocate new batch. - batch = new Batch(); - batch->next = nullptr; - batch->flags = 0; - - if (!fence) { - VkFenceCreateInfo info; - info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - info.pNext = nullptr; - info.flags = 0; - VkResult res = dfn.vkCreateFence(device, &info, nullptr, &batch->fence); - if (res != VK_SUCCESS) { - assert_always(); - } - - batch->flags |= kBatchOwnsFence; - } else { - batch->fence = fence; - } - } - batch->entry_list_head = nullptr; - batch->entry_list_tail = nullptr; - open_batch_ = batch; - - return batch->fence; - } - - // Cancels an open batch, and releases all entries acquired within. - void CancelBatch() { - assert_not_null(open_batch_); - - auto batch = open_batch_; - open_batch_ = nullptr; - - // Relink the batch back into the free batch list. - batch->next = free_batch_list_head_; - free_batch_list_head_ = batch; - - // Relink entries back into free entries list. - batch->entry_list_tail->next = free_entry_list_head_; - free_entry_list_head_ = batch->entry_list_head; - batch->entry_list_head = nullptr; - batch->entry_list_tail = nullptr; - } - - // Ends the current batch. - void EndBatch() { - assert_not_null(open_batch_); - - // Close and see if we have anything. - auto batch = open_batch_; - open_batch_ = nullptr; - if (!batch->entry_list_head) { - // Nothing to do. - batch->next = free_batch_list_head_; - free_batch_list_head_ = batch; - return; - } - - // Append to the end of the batch list. - batch->next = nullptr; - if (!pending_batch_list_head_) { - pending_batch_list_head_ = batch; - } - if (pending_batch_list_tail_) { - pending_batch_list_tail_->next = batch; - pending_batch_list_tail_ = batch; - } else { - pending_batch_list_tail_ = batch; - } - } - - protected: - // Attempts to acquire an entry from the pool in the current batch. - // If none are available a new one will be allocated. - HANDLE AcquireEntry(void* data) { - Entry* entry = nullptr; - if (free_entry_list_head_) { - // Slice off an entry from the free list. - Entry* prev = nullptr; - Entry* cur = free_entry_list_head_; - while (cur != nullptr) { - if (cur->data == data) { - if (prev) { - prev->next = cur->next; - } else { - free_entry_list_head_ = cur->next; - } - - entry = cur; - break; - } - - prev = cur; - cur = cur->next; - } - } - - if (!entry) { - // No entry available; allocate new. - entry = new Entry(); - entry->data = data; - entry->handle = static_cast(this)->AllocateEntry(data); - if (!entry->handle) { - delete entry; - return nullptr; - } - } - entry->next = nullptr; - if (!open_batch_->entry_list_head) { - open_batch_->entry_list_head = entry; - } - if (open_batch_->entry_list_tail) { - open_batch_->entry_list_tail->next = entry; - } - open_batch_->entry_list_tail = entry; - return entry->handle; - } - - void PushEntry(HANDLE handle, void* data) { - auto entry = new Entry(); - entry->next = free_entry_list_head_; - entry->data = data; - entry->handle = handle; - free_entry_list_head_ = entry; - } - - void FreeAllEntries() { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - // Run down free lists. - while (free_batch_list_head_) { - auto batch = free_batch_list_head_; - free_batch_list_head_ = batch->next; - - if (batch->flags & kBatchOwnsFence) { - dfn.vkDestroyFence(device, batch->fence, nullptr); - batch->fence = nullptr; - } - delete batch; - } - while (free_entry_list_head_) { - auto entry = free_entry_list_head_; - free_entry_list_head_ = entry->next; - static_cast(this)->FreeEntry(entry->handle); - delete entry; - } - } - - const VulkanProvider& provider_; - - private: - struct Entry { - Entry* next; - void* data; - HANDLE handle; - }; - struct Batch { - Batch* next; - Entry* entry_list_head; - Entry* entry_list_tail; - uint32_t flags; - VkFence fence; - }; - - static const uint32_t kBatchOwnsFence = 1; - - Batch* free_batch_list_head_ = nullptr; - Entry* free_entry_list_head_ = nullptr; - Batch* pending_batch_list_head_ = nullptr; - Batch* pending_batch_list_tail_ = nullptr; - Batch* open_batch_ = nullptr; -}; - -class CommandBufferPool - : public BaseFencedPool { - public: - typedef BaseFencedPool Base; - - CommandBufferPool(const VulkanProvider& provider, - uint32_t queue_family_index); - ~CommandBufferPool() override; - - VkCommandBuffer AcquireEntry( - VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - return Base::AcquireEntry(reinterpret_cast(level)); - } - - protected: - friend class BaseFencedPool; - VkCommandBuffer AllocateEntry(void* data); - void FreeEntry(VkCommandBuffer handle); - - VkCommandPool command_pool_ = nullptr; -}; - -class DescriptorPool : public BaseFencedPool { - public: - typedef BaseFencedPool Base; - - DescriptorPool(const VulkanProvider& provider, uint32_t max_count, - std::vector pool_sizes); - ~DescriptorPool() override; - - VkDescriptorSet AcquireEntry(VkDescriptorSetLayout layout) { - return Base::AcquireEntry(layout); - } - - // WARNING: Allocating sets from the vulkan pool will not be tracked! - VkDescriptorPool descriptor_pool() { return descriptor_pool_; } - - protected: - friend class BaseFencedPool; - VkDescriptorSet AllocateEntry(void* data); - void FreeEntry(VkDescriptorSet handle); - - VkDescriptorPool descriptor_pool_ = nullptr; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_FENCED_POOLS_H_ diff --git a/src/xenia/ui/vulkan/functions/device_1_0.inc b/src/xenia/ui/vulkan/functions/device_1_0.inc index 58e18bb7f..4e9eaa83a 100644 --- a/src/xenia/ui/vulkan/functions/device_1_0.inc +++ b/src/xenia/ui/vulkan/functions/device_1_0.inc @@ -10,32 +10,28 @@ XE_UI_VULKAN_FUNCTION(vkCmdBindDescriptorSets) XE_UI_VULKAN_FUNCTION(vkCmdBindIndexBuffer) XE_UI_VULKAN_FUNCTION(vkCmdBindPipeline) XE_UI_VULKAN_FUNCTION(vkCmdBindVertexBuffers) -XE_UI_VULKAN_FUNCTION(vkCmdBlitImage) XE_UI_VULKAN_FUNCTION(vkCmdClearAttachments) XE_UI_VULKAN_FUNCTION(vkCmdClearColorImage) -XE_UI_VULKAN_FUNCTION(vkCmdClearDepthStencilImage) XE_UI_VULKAN_FUNCTION(vkCmdCopyBuffer) XE_UI_VULKAN_FUNCTION(vkCmdCopyBufferToImage) XE_UI_VULKAN_FUNCTION(vkCmdCopyImageToBuffer) +XE_UI_VULKAN_FUNCTION(vkCmdDispatch) XE_UI_VULKAN_FUNCTION(vkCmdDraw) XE_UI_VULKAN_FUNCTION(vkCmdDrawIndexed) XE_UI_VULKAN_FUNCTION(vkCmdEndRenderPass) -XE_UI_VULKAN_FUNCTION(vkCmdExecuteCommands) -XE_UI_VULKAN_FUNCTION(vkCmdFillBuffer) XE_UI_VULKAN_FUNCTION(vkCmdPipelineBarrier) XE_UI_VULKAN_FUNCTION(vkCmdPushConstants) -XE_UI_VULKAN_FUNCTION(vkCmdResolveImage) XE_UI_VULKAN_FUNCTION(vkCmdSetBlendConstants) XE_UI_VULKAN_FUNCTION(vkCmdSetDepthBias) -XE_UI_VULKAN_FUNCTION(vkCmdSetDepthBounds) -XE_UI_VULKAN_FUNCTION(vkCmdSetLineWidth) XE_UI_VULKAN_FUNCTION(vkCmdSetScissor) XE_UI_VULKAN_FUNCTION(vkCmdSetStencilCompareMask) XE_UI_VULKAN_FUNCTION(vkCmdSetStencilReference) XE_UI_VULKAN_FUNCTION(vkCmdSetStencilWriteMask) XE_UI_VULKAN_FUNCTION(vkCmdSetViewport) XE_UI_VULKAN_FUNCTION(vkCreateBuffer) +XE_UI_VULKAN_FUNCTION(vkCreateBufferView) XE_UI_VULKAN_FUNCTION(vkCreateCommandPool) +XE_UI_VULKAN_FUNCTION(vkCreateComputePipelines) XE_UI_VULKAN_FUNCTION(vkCreateDescriptorPool) XE_UI_VULKAN_FUNCTION(vkCreateDescriptorSetLayout) XE_UI_VULKAN_FUNCTION(vkCreateFence) @@ -43,13 +39,13 @@ XE_UI_VULKAN_FUNCTION(vkCreateFramebuffer) XE_UI_VULKAN_FUNCTION(vkCreateGraphicsPipelines) XE_UI_VULKAN_FUNCTION(vkCreateImage) XE_UI_VULKAN_FUNCTION(vkCreateImageView) -XE_UI_VULKAN_FUNCTION(vkCreatePipelineCache) XE_UI_VULKAN_FUNCTION(vkCreatePipelineLayout) XE_UI_VULKAN_FUNCTION(vkCreateRenderPass) XE_UI_VULKAN_FUNCTION(vkCreateSampler) XE_UI_VULKAN_FUNCTION(vkCreateSemaphore) XE_UI_VULKAN_FUNCTION(vkCreateShaderModule) XE_UI_VULKAN_FUNCTION(vkDestroyBuffer) +XE_UI_VULKAN_FUNCTION(vkDestroyBufferView) XE_UI_VULKAN_FUNCTION(vkDestroyCommandPool) XE_UI_VULKAN_FUNCTION(vkDestroyDescriptorPool) XE_UI_VULKAN_FUNCTION(vkDestroyDescriptorSetLayout) @@ -58,7 +54,6 @@ XE_UI_VULKAN_FUNCTION(vkDestroyFramebuffer) XE_UI_VULKAN_FUNCTION(vkDestroyImage) XE_UI_VULKAN_FUNCTION(vkDestroyImageView) XE_UI_VULKAN_FUNCTION(vkDestroyPipeline) -XE_UI_VULKAN_FUNCTION(vkDestroyPipelineCache) XE_UI_VULKAN_FUNCTION(vkDestroyPipelineLayout) XE_UI_VULKAN_FUNCTION(vkDestroyRenderPass) XE_UI_VULKAN_FUNCTION(vkDestroySampler) @@ -66,23 +61,18 @@ XE_UI_VULKAN_FUNCTION(vkDestroySemaphore) XE_UI_VULKAN_FUNCTION(vkDestroyShaderModule) XE_UI_VULKAN_FUNCTION(vkEndCommandBuffer) XE_UI_VULKAN_FUNCTION(vkFlushMappedMemoryRanges) -XE_UI_VULKAN_FUNCTION(vkFreeCommandBuffers) -XE_UI_VULKAN_FUNCTION(vkFreeDescriptorSets) XE_UI_VULKAN_FUNCTION(vkFreeMemory) XE_UI_VULKAN_FUNCTION(vkGetBufferMemoryRequirements) XE_UI_VULKAN_FUNCTION(vkGetDeviceQueue) XE_UI_VULKAN_FUNCTION(vkGetFenceStatus) XE_UI_VULKAN_FUNCTION(vkGetImageMemoryRequirements) -XE_UI_VULKAN_FUNCTION(vkGetImageSubresourceLayout) -XE_UI_VULKAN_FUNCTION(vkGetPipelineCacheData) XE_UI_VULKAN_FUNCTION(vkInvalidateMappedMemoryRanges) XE_UI_VULKAN_FUNCTION(vkMapMemory) -XE_UI_VULKAN_FUNCTION(vkResetCommandBuffer) XE_UI_VULKAN_FUNCTION(vkResetCommandPool) XE_UI_VULKAN_FUNCTION(vkResetDescriptorPool) XE_UI_VULKAN_FUNCTION(vkResetFences) +XE_UI_VULKAN_FUNCTION(vkQueueBindSparse) XE_UI_VULKAN_FUNCTION(vkQueueSubmit) -XE_UI_VULKAN_FUNCTION(vkQueueWaitIdle) XE_UI_VULKAN_FUNCTION(vkUnmapMemory) XE_UI_VULKAN_FUNCTION(vkUpdateDescriptorSets) XE_UI_VULKAN_FUNCTION(vkWaitForFences) diff --git a/src/xenia/ui/vulkan/functions/device_amd_shader_info.inc b/src/xenia/ui/vulkan/functions/device_amd_shader_info.inc deleted file mode 100644 index 2da2b31b6..000000000 --- a/src/xenia/ui/vulkan/functions/device_amd_shader_info.inc +++ /dev/null @@ -1,2 +0,0 @@ -// VK_AMD_shader_info functions used in Xenia. -XE_UI_VULKAN_FUNCTION(vkGetShaderInfoAMD) diff --git a/src/xenia/ui/vulkan/functions/device_khr_bind_memory2.inc b/src/xenia/ui/vulkan/functions/device_khr_bind_memory2.inc new file mode 100644 index 000000000..ebefbc50f --- /dev/null +++ b/src/xenia/ui/vulkan/functions/device_khr_bind_memory2.inc @@ -0,0 +1,4 @@ +// VK_KHR_bind_memory2 functions used in Xenia. +// Promoted to Vulkan 1.1 core. +XE_UI_VULKAN_FUNCTION_PROMOTED(vkBindBufferMemory2KHR, vkBindBufferMemory2) +XE_UI_VULKAN_FUNCTION_PROMOTED(vkBindImageMemory2KHR, vkBindImageMemory2) diff --git a/src/xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc b/src/xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc new file mode 100644 index 000000000..11068c485 --- /dev/null +++ b/src/xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc @@ -0,0 +1,6 @@ +// VK_KHR_get_memory_requirements2 functions used in Xenia. +// Promoted to Vulkan 1.1 core. +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetBufferMemoryRequirements2KHR, + vkGetBufferMemoryRequirements2) +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetImageMemoryRequirements2KHR, + vkGetImageMemoryRequirements2) diff --git a/src/xenia/ui/vulkan/functions/device_khr_maintenance4.inc b/src/xenia/ui/vulkan/functions/device_khr_maintenance4.inc new file mode 100644 index 000000000..11c078792 --- /dev/null +++ b/src/xenia/ui/vulkan/functions/device_khr_maintenance4.inc @@ -0,0 +1,6 @@ +// VK_KHR_maintenance4 functions used in Xenia. +// Promoted to Vulkan 1.3 core. +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetDeviceBufferMemoryRequirementsKHR, + vkGetDeviceBufferMemoryRequirements) +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetDeviceImageMemoryRequirementsKHR, + vkGetDeviceImageMemoryRequirements) diff --git a/src/xenia/ui/vulkan/functions/instance_1_0.inc b/src/xenia/ui/vulkan/functions/instance_1_0.inc index 0da71a57c..b4ad9344a 100644 --- a/src/xenia/ui/vulkan/functions/instance_1_0.inc +++ b/src/xenia/ui/vulkan/functions/instance_1_0.inc @@ -6,7 +6,6 @@ XE_UI_VULKAN_FUNCTION(vkEnumeratePhysicalDevices) XE_UI_VULKAN_FUNCTION(vkGetDeviceProcAddr) XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceFeatures) XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceFormatProperties) -XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceImageFormatProperties) XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceMemoryProperties) XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties) XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceQueueFamilyProperties) diff --git a/src/xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc b/src/xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc index 05b2fe800..45153db06 100644 --- a/src/xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc +++ b/src/xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc @@ -1,4 +1,6 @@ // VK_KHR_get_physical_device_properties2 functions used in Xenia. // Promoted to Vulkan 1.1 core. +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceMemoryProperties2KHR, + vkGetPhysicalDeviceMemoryProperties2) XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceProperties2KHR, vkGetPhysicalDeviceProperties2) diff --git a/src/xenia/ui/vulkan/premake5.lua b/src/xenia/ui/vulkan/premake5.lua index a8fd2f319..454007bcb 100644 --- a/src/xenia/ui/vulkan/premake5.lua +++ b/src/xenia/ui/vulkan/premake5.lua @@ -7,10 +7,8 @@ project("xenia-ui-vulkan") kind("StaticLib") language("C++") links({ - "fmt", "xenia-base", "xenia-ui", - "xenia-ui-spirv", }) includedirs({ project_root.."/third_party/Vulkan-Headers/include", @@ -19,9 +17,7 @@ project("xenia-ui-vulkan") local_platform_files("functions") files({ "../shaders/bytecode/vulkan_spirv/*.h", - "shaders/bytecode/vulkan_spirv/*.h", }) - removefiles({"*_demo.cc"}) group("demos") project("xenia-ui-window-vulkan-demo") @@ -33,7 +29,6 @@ project("xenia-ui-window-vulkan-demo") "imgui", "xenia-base", "xenia-ui", - "xenia-ui-spirv", "xenia-ui-vulkan", }) includedirs({ diff --git a/src/xenia/ui/vulkan/shaders/blit.vs.glsl b/src/xenia/ui/vulkan/shaders/blit.vs.glsl deleted file mode 100644 index 432a1395c..000000000 --- a/src/xenia/ui/vulkan/shaders/blit.vs.glsl +++ /dev/null @@ -1,31 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -precision highp float; - -layout(push_constant) uniform PushConstants { - // normalized [x, y, w, h] - layout(offset = 0x00) vec4 src_uv; - layout(offset = 0x10) vec4 dst_uv; -} push_constants; - -layout(location = 0) out vec2 vtx_uv; - -void main() { - const vec2 vtx_arr[4]=vec2[4]( - vec2(0,0), - vec2(1,0), - vec2(0,1), - vec2(1,1) - ); - - vec2 vfetch_pos = vtx_arr[gl_VertexIndex]; - vec2 scaled_pos = vfetch_pos.xy * vec2(2.0, 2.0) - vec2(1.0, 1.0); - vec4 scaled_dst_uv = push_constants.dst_uv * vec4(2.0); - gl_Position = - vec4(scaled_dst_uv.xy - vec2(1.0) + vfetch_pos.xy * scaled_dst_uv.zw, 0.0, - 1.0); - - vtx_uv = vfetch_pos.xy * push_constants.src_uv.zw + push_constants.src_uv.xy; -} \ No newline at end of file diff --git a/src/xenia/ui/vulkan/shaders/blit_color.ps.glsl b/src/xenia/ui/vulkan/shaders/blit_color.ps.glsl deleted file mode 100644 index 682ae6af4..000000000 --- a/src/xenia/ui/vulkan/shaders/blit_color.ps.glsl +++ /dev/null @@ -1,20 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -precision highp float; - -layout(push_constant) uniform PushConstants { - layout(offset = 0x20) vec3 _pad; - layout(offset = 0x2C) int swap; -} push_constants; - -layout(set = 0, binding = 0) uniform sampler2D src_texture; - -layout(location = 0) in vec2 vtx_uv; -layout(location = 0) out vec4 oC; - -void main() { - oC = texture(src_texture, vtx_uv); - if (push_constants.swap != 0) oC = oC.bgra; -} \ No newline at end of file diff --git a/src/xenia/ui/vulkan/shaders/blit_depth.ps.glsl b/src/xenia/ui/vulkan/shaders/blit_depth.ps.glsl deleted file mode 100644 index a22f1e5a6..000000000 --- a/src/xenia/ui/vulkan/shaders/blit_depth.ps.glsl +++ /dev/null @@ -1,19 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -precision highp float; - -layout(push_constant) uniform PushConstants { - layout(offset = 0x20) vec3 _pad; - layout(offset = 0x2C) int swap; -} push_constants; - -layout(set = 0, binding = 0) uniform sampler2D src_texture; - -layout(location = 0) in vec2 vtx_uv; -layout(location = 0) out vec4 oC; - -void main() { - gl_FragDepth = texture(src_texture, vtx_uv).r; -} \ No newline at end of file diff --git a/src/xenia/ui/vulkan/shaders/bytecode/.clang-format b/src/xenia/ui/vulkan/shaders/bytecode/.clang-format deleted file mode 100644 index 9d159247d..000000000 --- a/src/xenia/ui/vulkan/shaders/bytecode/.clang-format +++ /dev/null @@ -1,2 +0,0 @@ -DisableFormat: true -SortIncludes: false diff --git a/src/xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_color_ps.h b/src/xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_color_ps.h deleted file mode 100644 index 33c955fea..000000000 --- a/src/xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_color_ps.h +++ /dev/null @@ -1,99 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 24608 -; Schema: 0 - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Fragment %5663 "main" %4841 %5592 - OpExecutionMode %5663 OriginUpperLeft - OpDecorate %4841 Location 0 - OpDecorate %5164 DescriptorSet 0 - OpDecorate %5164 Binding 0 - OpDecorate %5592 Location 0 - OpMemberDecorate %_struct_1019 0 Offset 32 - OpMemberDecorate %_struct_1019 1 Offset 44 - OpDecorate %_struct_1019 Block - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%_ptr_Output_v4float = OpTypePointer Output %v4float - %4841 = OpVariable %_ptr_Output_v4float Output - %150 = OpTypeImage %float 2D 0 0 0 1 Unknown - %510 = OpTypeSampledImage %150 -%_ptr_UniformConstant_510 = OpTypePointer UniformConstant %510 - %5164 = OpVariable %_ptr_UniformConstant_510 UniformConstant - %v2float = OpTypeVector %float 2 -%_ptr_Input_v2float = OpTypePointer Input %v2float - %5592 = OpVariable %_ptr_Input_v2float Input - %v3float = OpTypeVector %float 3 - %int = OpTypeInt 32 1 -%_struct_1019 = OpTypeStruct %v3float %int -%_ptr_PushConstant__struct_1019 = OpTypePointer PushConstant %_struct_1019 - %3463 = OpVariable %_ptr_PushConstant__struct_1019 PushConstant - %int_1 = OpConstant %int 1 -%_ptr_PushConstant_int = OpTypePointer PushConstant %int - %int_0 = OpConstant %int 0 - %bool = OpTypeBool - %5663 = OpFunction %void None %1282 - %24607 = OpLabel - %21248 = OpLoad %510 %5164 - %19293 = OpLoad %v2float %5592 - %8148 = OpImageSampleImplicitLod %v4float %21248 %19293 - OpStore %4841 %8148 - %20291 = OpAccessChain %_ptr_PushConstant_int %3463 %int_1 - %11639 = OpLoad %int %20291 - %12913 = OpINotEqual %bool %11639 %int_0 - OpSelectionMerge %19578 None - OpBranchConditional %12913 %13163 %19578 - %13163 = OpLabel - %9669 = OpLoad %v4float %4841 - %6737 = OpVectorShuffle %v4float %9669 %9669 2 1 0 3 - OpStore %4841 %6737 - OpBranch %19578 - %19578 = OpLabel - OpReturn - OpFunctionEnd -#endif - -const uint32_t blit_color_ps[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x00006020, 0x00000000, 0x00020011, - 0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, - 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0007000F, 0x00000004, - 0x0000161F, 0x6E69616D, 0x00000000, 0x000012E9, 0x000015D8, 0x00030010, - 0x0000161F, 0x00000007, 0x00040047, 0x000012E9, 0x0000001E, 0x00000000, - 0x00040047, 0x0000142C, 0x00000022, 0x00000000, 0x00040047, 0x0000142C, - 0x00000021, 0x00000000, 0x00040047, 0x000015D8, 0x0000001E, 0x00000000, - 0x00050048, 0x000003FB, 0x00000000, 0x00000023, 0x00000020, 0x00050048, - 0x000003FB, 0x00000001, 0x00000023, 0x0000002C, 0x00030047, 0x000003FB, - 0x00000002, 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, - 0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D, - 0x00000004, 0x00040020, 0x0000029A, 0x00000003, 0x0000001D, 0x0004003B, - 0x0000029A, 0x000012E9, 0x00000003, 0x00090019, 0x00000096, 0x0000000D, - 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, - 0x0003001B, 0x000001FE, 0x00000096, 0x00040020, 0x0000047B, 0x00000000, - 0x000001FE, 0x0004003B, 0x0000047B, 0x0000142C, 0x00000000, 0x00040017, - 0x00000013, 0x0000000D, 0x00000002, 0x00040020, 0x00000290, 0x00000001, - 0x00000013, 0x0004003B, 0x00000290, 0x000015D8, 0x00000001, 0x00040017, - 0x00000018, 0x0000000D, 0x00000003, 0x00040015, 0x0000000C, 0x00000020, - 0x00000001, 0x0004001E, 0x000003FB, 0x00000018, 0x0000000C, 0x00040020, - 0x00000678, 0x00000009, 0x000003FB, 0x0004003B, 0x00000678, 0x00000D87, - 0x00000009, 0x0004002B, 0x0000000C, 0x00000A0E, 0x00000001, 0x00040020, - 0x00000289, 0x00000009, 0x0000000C, 0x0004002B, 0x0000000C, 0x00000A0B, - 0x00000000, 0x00020014, 0x00000009, 0x00050036, 0x00000008, 0x0000161F, - 0x00000000, 0x00000502, 0x000200F8, 0x0000601F, 0x0004003D, 0x000001FE, - 0x00005300, 0x0000142C, 0x0004003D, 0x00000013, 0x00004B5D, 0x000015D8, - 0x00050057, 0x0000001D, 0x00001FD4, 0x00005300, 0x00004B5D, 0x0003003E, - 0x000012E9, 0x00001FD4, 0x00050041, 0x00000289, 0x00004F43, 0x00000D87, - 0x00000A0E, 0x0004003D, 0x0000000C, 0x00002D77, 0x00004F43, 0x000500AB, - 0x00000009, 0x00003271, 0x00002D77, 0x00000A0B, 0x000300F7, 0x00004C7A, - 0x00000000, 0x000400FA, 0x00003271, 0x0000336B, 0x00004C7A, 0x000200F8, - 0x0000336B, 0x0004003D, 0x0000001D, 0x000025C5, 0x000012E9, 0x0009004F, - 0x0000001D, 0x00001A51, 0x000025C5, 0x000025C5, 0x00000002, 0x00000001, - 0x00000000, 0x00000003, 0x0003003E, 0x000012E9, 0x00001A51, 0x000200F9, - 0x00004C7A, 0x000200F8, 0x00004C7A, 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_depth_ps.h b/src/xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_depth_ps.h deleted file mode 100644 index 63baaf2e1..000000000 --- a/src/xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_depth_ps.h +++ /dev/null @@ -1,70 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 24608 -; Schema: 0 - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Fragment %5663 "main" %gl_FragDepth %5592 %4841 - OpExecutionMode %5663 OriginUpperLeft - OpExecutionMode %5663 DepthReplacing - OpDecorate %gl_FragDepth BuiltIn FragDepth - OpDecorate %5164 DescriptorSet 0 - OpDecorate %5164 Binding 0 - OpDecorate %5592 Location 0 - OpDecorate %4841 Location 0 - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %float = OpTypeFloat 32 -%_ptr_Output_float = OpTypePointer Output %float -%gl_FragDepth = OpVariable %_ptr_Output_float Output - %150 = OpTypeImage %float 2D 0 0 0 1 Unknown - %510 = OpTypeSampledImage %150 -%_ptr_UniformConstant_510 = OpTypePointer UniformConstant %510 - %5164 = OpVariable %_ptr_UniformConstant_510 UniformConstant - %v2float = OpTypeVector %float 2 -%_ptr_Input_v2float = OpTypePointer Input %v2float - %5592 = OpVariable %_ptr_Input_v2float Input - %v4float = OpTypeVector %float 4 -%_ptr_Output_v4float = OpTypePointer Output %v4float - %4841 = OpVariable %_ptr_Output_v4float Output - %5663 = OpFunction %void None %1282 - %24607 = OpLabel - %21248 = OpLoad %510 %5164 - %19654 = OpLoad %v2float %5592 - %23875 = OpImageSampleImplicitLod %v4float %21248 %19654 - %15662 = OpCompositeExtract %float %23875 0 - OpStore %gl_FragDepth %15662 - OpReturn - OpFunctionEnd -#endif - -const uint32_t blit_depth_ps[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x00006020, 0x00000000, 0x00020011, - 0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, - 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0008000F, 0x00000004, - 0x0000161F, 0x6E69616D, 0x00000000, 0x000011F3, 0x000015D8, 0x000012E9, - 0x00030010, 0x0000161F, 0x00000007, 0x00030010, 0x0000161F, 0x0000000C, - 0x00040047, 0x000011F3, 0x0000000B, 0x00000016, 0x00040047, 0x0000142C, - 0x00000022, 0x00000000, 0x00040047, 0x0000142C, 0x00000021, 0x00000000, - 0x00040047, 0x000015D8, 0x0000001E, 0x00000000, 0x00040047, 0x000012E9, - 0x0000001E, 0x00000000, 0x00020013, 0x00000008, 0x00030021, 0x00000502, - 0x00000008, 0x00030016, 0x0000000D, 0x00000020, 0x00040020, 0x0000028A, - 0x00000003, 0x0000000D, 0x0004003B, 0x0000028A, 0x000011F3, 0x00000003, - 0x00090019, 0x00000096, 0x0000000D, 0x00000001, 0x00000000, 0x00000000, - 0x00000000, 0x00000001, 0x00000000, 0x0003001B, 0x000001FE, 0x00000096, - 0x00040020, 0x0000047B, 0x00000000, 0x000001FE, 0x0004003B, 0x0000047B, - 0x0000142C, 0x00000000, 0x00040017, 0x00000013, 0x0000000D, 0x00000002, - 0x00040020, 0x00000290, 0x00000001, 0x00000013, 0x0004003B, 0x00000290, - 0x000015D8, 0x00000001, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, - 0x00040020, 0x0000029A, 0x00000003, 0x0000001D, 0x0004003B, 0x0000029A, - 0x000012E9, 0x00000003, 0x00050036, 0x00000008, 0x0000161F, 0x00000000, - 0x00000502, 0x000200F8, 0x0000601F, 0x0004003D, 0x000001FE, 0x00005300, - 0x0000142C, 0x0004003D, 0x00000013, 0x00004CC6, 0x000015D8, 0x00050057, - 0x0000001D, 0x00005D43, 0x00005300, 0x00004CC6, 0x00050051, 0x0000000D, - 0x00003D2E, 0x00005D43, 0x00000000, 0x0003003E, 0x000011F3, 0x00003D2E, - 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_vs.h b/src/xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_vs.h deleted file mode 100644 index c8dc3c96f..000000000 --- a/src/xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_vs.h +++ /dev/null @@ -1,149 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 25137 -; Schema: 0 - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Vertex %5663 "main" %gl_VertexIndex %4930 %5592 - OpDecorate %gl_VertexIndex BuiltIn VertexIndex - OpMemberDecorate %_struct_1080 0 Offset 0 - OpMemberDecorate %_struct_1080 1 Offset 16 - OpDecorate %_struct_1080 Block - OpMemberDecorate %_struct_1589 0 BuiltIn Position - OpMemberDecorate %_struct_1589 1 BuiltIn PointSize - OpMemberDecorate %_struct_1589 2 BuiltIn ClipDistance - OpMemberDecorate %_struct_1589 3 BuiltIn CullDistance - OpDecorate %_struct_1589 Block - OpDecorate %5592 Location 0 - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v2float = OpTypeVector %float 2 -%_ptr_Function_v2float = OpTypePointer Function %v2float - %uint = OpTypeInt 32 0 - %uint_4 = OpConstant %uint 4 -%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 - %float_0 = OpConstant %float 0 - %1823 = OpConstantComposite %v2float %float_0 %float_0 - %float_1 = OpConstant %float 1 - %312 = OpConstantComposite %v2float %float_1 %float_0 - %889 = OpConstantComposite %v2float %float_0 %float_1 - %768 = OpConstantComposite %v2float %float_1 %float_1 - %809 = OpConstantComposite %_arr_v2float_uint_4 %1823 %312 %889 %768 - %int = OpTypeInt 32 1 -%_ptr_Input_int = OpTypePointer Input %int -%gl_VertexIndex = OpVariable %_ptr_Input_int Input -%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4 - %float_2 = OpConstant %float 2 - %v4float = OpTypeVector %float 4 -%_struct_1080 = OpTypeStruct %v4float %v4float -%_ptr_PushConstant__struct_1080 = OpTypePointer PushConstant %_struct_1080 - %3463 = OpVariable %_ptr_PushConstant__struct_1080 PushConstant - %int_1 = OpConstant %int 1 -%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float - %2243 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 - %uint_1 = OpConstant %uint 1 -%_arr_float_uint_1 = OpTypeArray %float %uint_1 -%_struct_1589 = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 -%_ptr_Output__struct_1589 = OpTypePointer Output %_struct_1589 - %4930 = OpVariable %_ptr_Output__struct_1589 Output - %int_0 = OpConstant %int 0 -%_ptr_Output_v4float = OpTypePointer Output %v4float -%_ptr_Output_v2float = OpTypePointer Output %v2float - %5592 = OpVariable %_ptr_Output_v2float Output - %5663 = OpFunction %void None %1282 - %24953 = OpLabel - %5238 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function - %24173 = OpLoad %int %gl_VertexIndex - OpStore %5238 %809 - %16679 = OpAccessChain %_ptr_Function_v2float %5238 %24173 - %7372 = OpLoad %v2float %16679 - %21446 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_1 - %10986 = OpLoad %v4float %21446 - %7772 = OpFMul %v4float %10986 %2243 - %17065 = OpVectorShuffle %v2float %7772 %7772 0 1 - %22600 = OpFSub %v2float %17065 %768 - %7156 = OpVectorShuffle %v2float %7772 %7772 2 3 - %20491 = OpFMul %v2float %7372 %7156 - %18197 = OpFAdd %v2float %22600 %20491 - %10599 = OpCompositeExtract %float %18197 0 - %13956 = OpCompositeExtract %float %18197 1 - %18260 = OpCompositeConstruct %v4float %10599 %13956 %float_0 %float_1 - %8483 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %8483 %18260 - %20171 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_0 - %6318 = OpLoad %v4float %20171 - %7688 = OpVectorShuffle %v2float %6318 %6318 2 3 - %18797 = OpFMul %v2float %7372 %7688 - %18691 = OpVectorShuffle %v2float %6318 %6318 0 1 - %25136 = OpFAdd %v2float %18797 %18691 - OpStore %5592 %25136 - OpReturn - OpFunctionEnd -#endif - -const uint32_t blit_vs[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x00006231, 0x00000000, 0x00020011, - 0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, - 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0008000F, 0x00000000, - 0x0000161F, 0x6E69616D, 0x00000000, 0x00001029, 0x00001342, 0x000015D8, - 0x00040047, 0x00001029, 0x0000000B, 0x0000002A, 0x00050048, 0x00000438, - 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x00000438, 0x00000001, - 0x00000023, 0x00000010, 0x00030047, 0x00000438, 0x00000002, 0x00050048, - 0x00000635, 0x00000000, 0x0000000B, 0x00000000, 0x00050048, 0x00000635, - 0x00000001, 0x0000000B, 0x00000001, 0x00050048, 0x00000635, 0x00000002, - 0x0000000B, 0x00000003, 0x00050048, 0x00000635, 0x00000003, 0x0000000B, - 0x00000004, 0x00030047, 0x00000635, 0x00000002, 0x00040047, 0x000015D8, - 0x0000001E, 0x00000000, 0x00020013, 0x00000008, 0x00030021, 0x00000502, - 0x00000008, 0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x00000013, - 0x0000000D, 0x00000002, 0x00040020, 0x00000290, 0x00000007, 0x00000013, - 0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B, - 0x00000A16, 0x00000004, 0x0004001C, 0x00000276, 0x00000013, 0x00000A16, - 0x0004002B, 0x0000000D, 0x00000A0C, 0x00000000, 0x0005002C, 0x00000013, - 0x0000071F, 0x00000A0C, 0x00000A0C, 0x0004002B, 0x0000000D, 0x0000008A, - 0x3F800000, 0x0005002C, 0x00000013, 0x00000138, 0x0000008A, 0x00000A0C, - 0x0005002C, 0x00000013, 0x00000379, 0x00000A0C, 0x0000008A, 0x0005002C, - 0x00000013, 0x00000300, 0x0000008A, 0x0000008A, 0x0007002C, 0x00000276, - 0x00000329, 0x0000071F, 0x00000138, 0x00000379, 0x00000300, 0x00040015, - 0x0000000C, 0x00000020, 0x00000001, 0x00040020, 0x00000289, 0x00000001, - 0x0000000C, 0x0004003B, 0x00000289, 0x00001029, 0x00000001, 0x00040020, - 0x000004F3, 0x00000007, 0x00000276, 0x0004002B, 0x0000000D, 0x00000018, - 0x40000000, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0004001E, - 0x00000438, 0x0000001D, 0x0000001D, 0x00040020, 0x000006B5, 0x00000009, - 0x00000438, 0x0004003B, 0x000006B5, 0x00000D87, 0x00000009, 0x0004002B, - 0x0000000C, 0x00000A0E, 0x00000001, 0x00040020, 0x0000029A, 0x00000009, - 0x0000001D, 0x0007002C, 0x0000001D, 0x000008C3, 0x00000018, 0x00000018, - 0x00000018, 0x00000018, 0x0004002B, 0x0000000B, 0x00000A0D, 0x00000001, - 0x0004001C, 0x000002E3, 0x0000000D, 0x00000A0D, 0x0006001E, 0x00000635, - 0x0000001D, 0x0000000D, 0x000002E3, 0x000002E3, 0x00040020, 0x000008B2, - 0x00000003, 0x00000635, 0x0004003B, 0x000008B2, 0x00001342, 0x00000003, - 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x00040020, 0x0000029B, - 0x00000003, 0x0000001D, 0x00040020, 0x00000291, 0x00000003, 0x00000013, - 0x0004003B, 0x00000291, 0x000015D8, 0x00000003, 0x00050036, 0x00000008, - 0x0000161F, 0x00000000, 0x00000502, 0x000200F8, 0x00006179, 0x0004003B, - 0x000004F3, 0x00001476, 0x00000007, 0x0004003D, 0x0000000C, 0x00005E6D, - 0x00001029, 0x0003003E, 0x00001476, 0x00000329, 0x00050041, 0x00000290, - 0x00004127, 0x00001476, 0x00005E6D, 0x0004003D, 0x00000013, 0x00001CCC, - 0x00004127, 0x00050041, 0x0000029A, 0x000053C6, 0x00000D87, 0x00000A0E, - 0x0004003D, 0x0000001D, 0x00002AEA, 0x000053C6, 0x00050085, 0x0000001D, - 0x00001E5C, 0x00002AEA, 0x000008C3, 0x0007004F, 0x00000013, 0x000042A9, - 0x00001E5C, 0x00001E5C, 0x00000000, 0x00000001, 0x00050083, 0x00000013, - 0x00005848, 0x000042A9, 0x00000300, 0x0007004F, 0x00000013, 0x00001BF4, - 0x00001E5C, 0x00001E5C, 0x00000002, 0x00000003, 0x00050085, 0x00000013, - 0x0000500B, 0x00001CCC, 0x00001BF4, 0x00050081, 0x00000013, 0x00004715, - 0x00005848, 0x0000500B, 0x00050051, 0x0000000D, 0x00002967, 0x00004715, - 0x00000000, 0x00050051, 0x0000000D, 0x00003684, 0x00004715, 0x00000001, - 0x00070050, 0x0000001D, 0x00004754, 0x00002967, 0x00003684, 0x00000A0C, - 0x0000008A, 0x00050041, 0x0000029B, 0x00002123, 0x00001342, 0x00000A0B, - 0x0003003E, 0x00002123, 0x00004754, 0x00050041, 0x0000029A, 0x00004ECB, - 0x00000D87, 0x00000A0B, 0x0004003D, 0x0000001D, 0x000018AE, 0x00004ECB, - 0x0007004F, 0x00000013, 0x00001E08, 0x000018AE, 0x000018AE, 0x00000002, - 0x00000003, 0x00050085, 0x00000013, 0x0000496D, 0x00001CCC, 0x00001E08, - 0x0007004F, 0x00000013, 0x00004903, 0x000018AE, 0x000018AE, 0x00000000, - 0x00000001, 0x00050081, 0x00000013, 0x00006230, 0x0000496D, 0x00004903, - 0x0003003E, 0x000015D8, 0x00006230, 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc new file mode 100644 index 000000000..5b07c0673 --- /dev/null +++ b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc @@ -0,0 +1,119 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h" + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" + +namespace xe { +namespace ui { +namespace vulkan { + +SingleLayoutDescriptorSetPool::SingleLayoutDescriptorSetPool( + const VulkanProvider& provider, uint32_t pool_set_count, + uint32_t set_layout_descriptor_counts_count, + const VkDescriptorPoolSize* set_layout_descriptor_counts, + VkDescriptorSetLayout set_layout) + : provider_(provider), + pool_set_count_(pool_set_count), + set_layout_(set_layout) { + assert_not_zero(pool_set_count); + pool_descriptor_counts_.resize(set_layout_descriptor_counts_count); + for (uint32_t i = 0; i < set_layout_descriptor_counts_count; ++i) { + VkDescriptorPoolSize& pool_descriptor_type_count = + pool_descriptor_counts_[i]; + const VkDescriptorPoolSize& set_layout_descriptor_type_count = + set_layout_descriptor_counts[i]; + pool_descriptor_type_count.type = set_layout_descriptor_type_count.type; + pool_descriptor_type_count.descriptorCount = + set_layout_descriptor_type_count.descriptorCount * pool_set_count; + } +} + +SingleLayoutDescriptorSetPool::~SingleLayoutDescriptorSetPool() { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + if (current_pool_ != VK_NULL_HANDLE) { + dfn.vkDestroyDescriptorPool(device, current_pool_, nullptr); + } + for (VkDescriptorPool pool : full_pools_) { + dfn.vkDestroyDescriptorPool(device, pool, nullptr); + } +} + +size_t SingleLayoutDescriptorSetPool::Allocate() { + if (!descriptor_sets_free_.empty()) { + size_t free_index = descriptor_sets_free_.back(); + descriptor_sets_free_.pop_back(); + return free_index; + } + + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + + // Two iterations so if vkAllocateDescriptorSets fails even with a non-zero + // current_pool_sets_remaining_, another attempt will be made in a new pool. + for (uint32_t i = 0; i < 2; ++i) { + if (current_pool_ != VK_NULL_HANDLE && !current_pool_sets_remaining_) { + full_pools_.push_back(current_pool_); + current_pool_ = VK_NULL_HANDLE; + } + if (current_pool_ == VK_NULL_HANDLE) { + VkDescriptorPoolCreateInfo pool_create_info; + pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_create_info.pNext = nullptr; + pool_create_info.flags = 0; + pool_create_info.maxSets = pool_set_count_; + pool_create_info.poolSizeCount = uint32_t(pool_descriptor_counts_.size()); + pool_create_info.pPoolSizes = pool_descriptor_counts_.data(); + if (dfn.vkCreateDescriptorPool(device, &pool_create_info, nullptr, + ¤t_pool_) != VK_SUCCESS) { + XELOGE( + "SingleLayoutDescriptorSetPool: Failed to create a descriptor " + "pool"); + return SIZE_MAX; + } + current_pool_sets_remaining_ = pool_set_count_; + } + + VkDescriptorSetAllocateInfo descriptor_set_allocate_info; + descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptor_set_allocate_info.pNext = nullptr; + descriptor_set_allocate_info.descriptorPool = current_pool_; + descriptor_set_allocate_info.descriptorSetCount = 1; + descriptor_set_allocate_info.pSetLayouts = &set_layout_; + VkDescriptorSet descriptor_set; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + XELOGE( + "SingleLayoutDescriptorSetPool: Failed to allocate a descriptor set"); + if (current_pool_sets_remaining_ >= pool_set_count_) { + // Failed to allocate in a new pool - something completely wrong, don't + // store empty pools as full. + dfn.vkDestroyDescriptorPool(device, current_pool_, nullptr); + current_pool_ = VK_NULL_HANDLE; + return SIZE_MAX; + } + full_pools_.push_back(current_pool_); + current_pool_ = VK_NULL_HANDLE; + } + --current_pool_sets_remaining_; + descriptor_sets_.push_back(descriptor_set); + return descriptor_sets_.size() - 1; + } + + // Both attempts have failed. + return SIZE_MAX; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.h b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.h new file mode 100644 index 000000000..c3f3eb080 --- /dev/null +++ b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.h @@ -0,0 +1,63 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_ +#define XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_ + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +class SingleLayoutDescriptorSetPool { + public: + // set_layout_descriptor_counts must contain the numbers of descriptors of + // each type in a single set with the layout (the multiplication by the pool + // set count will be done internally). The descriptor set layout must not be + // destroyed until this object is also destroyed. + SingleLayoutDescriptorSetPool( + const VulkanProvider& provider, uint32_t pool_set_count, + uint32_t set_layout_descriptor_counts_count, + const VkDescriptorPoolSize* set_layout_descriptor_counts, + VkDescriptorSetLayout set_layout); + ~SingleLayoutDescriptorSetPool(); + + // Returns SIZE_MAX in case of a failure. + size_t Allocate(); + void Free(size_t index) { + assert_true(index < descriptor_sets_.size()); + descriptor_sets_free_.push_back(index); + } + VkDescriptorSet Get(size_t index) const { return descriptor_sets_[index]; } + + private: + const VulkanProvider& provider_; + uint32_t pool_set_count_; + std::vector pool_descriptor_counts_; + VkDescriptorSetLayout set_layout_; + + std::vector full_pools_; + VkDescriptorPool current_pool_ = VK_NULL_HANDLE; + uint32_t current_pool_sets_remaining_ = 0; + + std::vector descriptor_sets_; + std::vector descriptor_sets_free_; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_ diff --git a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc new file mode 100644 index 000000000..44a3d31fe --- /dev/null +++ b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc @@ -0,0 +1,216 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h" + +#include "xenia/base/logging.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace ui { +namespace vulkan { + +void SingleTypeDescriptorSetAllocator::Reset() { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, + page_usable_latest_.pool); + for (const std::pair& page_pair : pages_usable_) { + dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr); + } + pages_usable_.clear(); + for (VkDescriptorPool pool : pages_full_) { + dfn.vkDestroyDescriptorPool(device, pool, nullptr); + } + pages_full_.clear(); +} + +VkDescriptorSet SingleTypeDescriptorSetAllocator::Allocate( + VkDescriptorSetLayout descriptor_set_layout, uint32_t descriptor_count) { + assert_not_zero(descriptor_count); + if (descriptor_count == 0) { + return VK_NULL_HANDLE; + } + + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + + VkDescriptorSetAllocateInfo descriptor_set_allocate_info; + descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptor_set_allocate_info.pNext = nullptr; + descriptor_set_allocate_info.descriptorSetCount = 1; + descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout; + VkDescriptorSet descriptor_set; + + if (descriptor_count > descriptor_pool_size_.descriptorCount) { + // Can't allocate in the pool, need a dedicated allocation. + VkDescriptorPoolSize dedicated_descriptor_pool_size; + dedicated_descriptor_pool_size.type = descriptor_pool_size_.type; + dedicated_descriptor_pool_size.descriptorCount = descriptor_count; + VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info; + dedicated_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + dedicated_descriptor_pool_create_info.pNext = nullptr; + dedicated_descriptor_pool_create_info.flags = 0; + dedicated_descriptor_pool_create_info.maxSets = 1; + dedicated_descriptor_pool_create_info.poolSizeCount = 1; + dedicated_descriptor_pool_create_info.pPoolSizes = + &dedicated_descriptor_pool_size; + VkDescriptorPool dedicated_descriptor_pool; + if (dfn.vkCreateDescriptorPool( + device, &dedicated_descriptor_pool_create_info, nullptr, + &dedicated_descriptor_pool) != VK_SUCCESS) { + XELOGE( + "SingleTypeDescriptorSetAllocator: Failed to create a dedicated pool " + "for {} descriptors", + dedicated_descriptor_pool_size.descriptorCount); + return VK_NULL_HANDLE; + } + descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + XELOGE( + "SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors " + "in a dedicated pool", + descriptor_count); + dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr); + return VK_NULL_HANDLE; + } + pages_full_.push_back(dedicated_descriptor_pool); + return descriptor_set; + } + + // Try allocating from the latest page an allocation has happened from, to + // avoid detaching from the map and re-attaching for every allocation. + if (page_usable_latest_.pool != VK_NULL_HANDLE) { + assert_not_zero(page_usable_latest_.descriptors_remaining); + assert_not_zero(page_usable_latest_.descriptor_sets_remaining); + if (page_usable_latest_.descriptors_remaining >= descriptor_count) { + descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) == VK_SUCCESS) { + page_usable_latest_.descriptors_remaining -= descriptor_count; + --page_usable_latest_.descriptor_sets_remaining; + if (!page_usable_latest_.descriptors_remaining || + !page_usable_latest_.descriptor_sets_remaining) { + pages_full_.push_back(page_usable_latest_.pool); + page_usable_latest_.pool = VK_NULL_HANDLE; + } + return descriptor_set; + } + // Failed to allocate internally even though there should be enough space, + // don't try to allocate from this pool again at all. + pages_full_.push_back(page_usable_latest_.pool); + page_usable_latest_.pool = VK_NULL_HANDLE; + } + } + + // If allocating from the latest pool wasn't possible, pick any that has free + // space. Prefer filling pages that have the most free space as they can more + // likely be used for more allocations later. + while (!pages_usable_.empty()) { + auto page_usable_last_it = std::prev(pages_usable_.cend()); + if (page_usable_last_it->second.descriptors_remaining < descriptor_count) { + // All other pages_usable_ entries have fewer free descriptors too (the + // remaining count is the map key). + break; + } + // Remove the page from the map unconditionally - in case of a successful + // allocation, it will have a different number of free descriptors, thus a + // new map key (but it will also become page_usable_latest_ instead even), + // or will become full, and in case of a failure to allocate internally even + // though there still should be enough space, it should never be allocated + // from again. + Page map_page = page_usable_last_it->second; + pages_usable_.erase(page_usable_last_it); + descriptor_set_allocate_info.descriptorPool = map_page.pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + pages_full_.push_back(map_page.pool); + continue; + } + map_page.descriptors_remaining -= descriptor_count; + --map_page.descriptor_sets_remaining; + if (!map_page.descriptors_remaining || + !map_page.descriptor_sets_remaining) { + pages_full_.push_back(map_page.pool); + } else { + if (page_usable_latest_.pool != VK_NULL_HANDLE) { + // Make the page with more free descriptors the next to allocate from. + if (map_page.descriptors_remaining > + page_usable_latest_.descriptors_remaining) { + pages_usable_.emplace(page_usable_latest_.descriptors_remaining, + page_usable_latest_); + page_usable_latest_ = map_page; + } else { + pages_usable_.emplace(map_page.descriptors_remaining, map_page); + } + } else { + page_usable_latest_ = map_page; + } + } + return descriptor_set; + } + + // Try allocating from a new page. + VkDescriptorPoolCreateInfo new_descriptor_pool_create_info; + new_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + new_descriptor_pool_create_info.pNext = nullptr; + new_descriptor_pool_create_info.flags = 0; + new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_; + new_descriptor_pool_create_info.poolSizeCount = 1; + new_descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size_; + VkDescriptorPool new_descriptor_pool; + if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info, + nullptr, &new_descriptor_pool) != VK_SUCCESS) { + XELOGE( + "SingleTypeDescriptorSetAllocator: Failed to create a pool for {} sets " + "with {} descriptors", + descriptor_sets_per_page_, descriptor_pool_size_.descriptorCount); + return VK_NULL_HANDLE; + } + descriptor_set_allocate_info.descriptorPool = new_descriptor_pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + XELOGE( + "SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors", + descriptor_count); + dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr); + return VK_NULL_HANDLE; + } + Page new_page; + new_page.pool = new_descriptor_pool; + new_page.descriptors_remaining = + descriptor_pool_size_.descriptorCount - descriptor_count; + new_page.descriptor_sets_remaining = descriptor_sets_per_page_ - 1; + if (!new_page.descriptors_remaining || !new_page.descriptor_sets_remaining) { + pages_full_.push_back(new_page.pool); + } else { + if (page_usable_latest_.pool != VK_NULL_HANDLE) { + // Make the page with more free descriptors the next to allocate from. + if (new_page.descriptors_remaining > + page_usable_latest_.descriptors_remaining) { + pages_usable_.emplace(page_usable_latest_.descriptors_remaining, + page_usable_latest_); + page_usable_latest_ = new_page; + } else { + pages_usable_.emplace(new_page.descriptors_remaining, new_page); + } + } else { + page_usable_latest_ = new_page; + } + } + return descriptor_set; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h new file mode 100644 index 000000000..7a21f6f35 --- /dev/null +++ b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h @@ -0,0 +1,84 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ +#define XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ + +#include +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +// Allocates multiple descriptors of a single type in descriptor set layouts +// consisting of descriptors of only that type. There's no way to free these +// descriptors within the SingleTypeDescriptorSetAllocator, per-layout free +// lists should be used externally. +class SingleTypeDescriptorSetAllocator { + public: + explicit SingleTypeDescriptorSetAllocator( + const ui::vulkan::VulkanProvider& provider, + VkDescriptorType descriptor_type, uint32_t descriptors_per_page, + uint32_t descriptor_sets_per_page) + : provider_(provider), + descriptor_sets_per_page_(descriptor_sets_per_page) { + assert_not_zero(descriptor_sets_per_page_); + descriptor_pool_size_.type = descriptor_type; + // Not allocating sets with 0 descriptors using the allocator - pointless to + // have the descriptor count below the set count. + descriptor_pool_size_.descriptorCount = + std::max(descriptors_per_page, descriptor_sets_per_page); + } + SingleTypeDescriptorSetAllocator( + const SingleTypeDescriptorSetAllocator& allocator) = delete; + SingleTypeDescriptorSetAllocator& operator=( + const SingleTypeDescriptorSetAllocator& allocator) = delete; + ~SingleTypeDescriptorSetAllocator() { Reset(); } + + void Reset(); + + VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout, + uint32_t descriptor_count); + + private: + struct Page { + VkDescriptorPool pool; + uint32_t descriptors_remaining; + uint32_t descriptor_sets_remaining; + }; + + const ui::vulkan::VulkanProvider& provider_; + + VkDescriptorPoolSize descriptor_pool_size_; + uint32_t descriptor_sets_per_page_; + + std::vector pages_full_; + // Because allocations must be contiguous, overflow may happen even if a page + // still has free descriptors, so multiple pages may have free space. + // To avoid removing and re-adding the page to the map that keeps them sorted + // (the key is the number of free descriptors remaining, and it changes at + // every allocation from a page), instead of always looking for a free space + // in the map, maintaining one page outside the map, and allocation attempts + // will be made from that page first. + std::multimap pages_usable_; + // Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE. + Page page_usable_latest_ = {}; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ diff --git a/src/xenia/ui/vulkan/spirv_tools_context.cc b/src/xenia/ui/vulkan/spirv_tools_context.cc new file mode 100644 index 000000000..0565e1f60 --- /dev/null +++ b/src/xenia/ui/vulkan/spirv_tools_context.cc @@ -0,0 +1,123 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/spirv_tools_context.h" + +#include +#include +#include + +#include "xenia/base/logging.h" +#include "xenia/base/platform.h" + +#if XE_PLATFORM_LINUX +#include +#elif XE_PLATFORM_WIN32 +#include "xenia/base/platform_win.h" +#endif + +namespace xe { +namespace ui { +namespace vulkan { + +bool SpirvToolsContext::Initialize(unsigned int spirv_version) { + const char* vulkan_sdk_env = std::getenv("VULKAN_SDK"); + if (!vulkan_sdk_env) { + XELOGE("SPIRV-Tools: Failed to get the VULKAN_SDK environment variable"); + Shutdown(); + return false; + } + std::filesystem::path vulkan_sdk_path(vulkan_sdk_env); +#if XE_PLATFORM_LINUX + library_ = dlopen((vulkan_sdk_path / "bin/libSPIRV-Tools-shared.so").c_str(), + RTLD_NOW | RTLD_LOCAL); + if (!library_) { + XELOGE( + "SPIRV-Tools: Failed to load $VULKAN_SDK/bin/libSPIRV-Tools-shared.so"); + Shutdown(); + return false; + } +#elif XE_PLATFORM_WIN32 + library_ = LoadLibraryW( + (vulkan_sdk_path / "Bin/SPIRV-Tools-shared.dll").wstring().c_str()); + if (!library_) { + XELOGE( + "SPIRV-Tools: Failed to load %VULKAN_SDK%/Bin/SPIRV-Tools-shared.dll"); + Shutdown(); + return false; + } +#else +#error No SPIRV-Tools library loading provided for the target platform. +#endif + if (!LoadLibraryFunction(fn_spvContextCreate_, "spvContextCreate") || + !LoadLibraryFunction(fn_spvContextDestroy_, "spvContextDestroy") || + !LoadLibraryFunction(fn_spvValidateBinary_, "spvValidateBinary") || + !LoadLibraryFunction(fn_spvDiagnosticDestroy_, "spvDiagnosticDestroy")) { + XELOGE("SPIRV-Tools: Failed to get library function pointers"); + Shutdown(); + return false; + } + spv_target_env target_env; + if (spirv_version >= 0x10500) { + target_env = SPV_ENV_VULKAN_1_2; + } else if (spirv_version >= 0x10400) { + target_env = SPV_ENV_VULKAN_1_1_SPIRV_1_4; + } else if (spirv_version >= 0x10300) { + target_env = SPV_ENV_VULKAN_1_1; + } else { + target_env = SPV_ENV_VULKAN_1_0; + } + context_ = fn_spvContextCreate_(target_env); + if (!context_) { + XELOGE("SPIRV-Tools: Failed to create a Vulkan 1.0 context"); + Shutdown(); + return false; + } + return true; +} + +void SpirvToolsContext::Shutdown() { + if (context_) { + fn_spvContextDestroy_(context_); + context_ = nullptr; + } + if (library_) { +#if XE_PLATFORM_LINUX + dlclose(library_); +#elif XE_PLATFORM_WIN32 + FreeLibrary(library_); +#endif + library_ = nullptr; + } +} + +spv_result_t SpirvToolsContext::Validate(const uint32_t* words, + size_t num_words, + std::string* error) const { + if (error) { + error->clear(); + } + if (!context_) { + return SPV_UNSUPPORTED; + } + spv_diagnostic diagnostic = nullptr; + spv_result_t result = + fn_spvValidateBinary_(context_, words, num_words, &diagnostic); + if (diagnostic) { + if (error && diagnostic && diagnostic->error) { + *error = diagnostic->error; + } + fn_spvDiagnosticDestroy_(diagnostic); + } + return result; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/spirv_tools_context.h b/src/xenia/ui/vulkan/spirv_tools_context.h new file mode 100644 index 000000000..2ffea1ebd --- /dev/null +++ b/src/xenia/ui/vulkan/spirv_tools_context.h @@ -0,0 +1,72 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_ +#define XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_ + +#include +#include + +#include "third_party/SPIRV-Tools/include/spirv-tools/libspirv.h" +#include "xenia/base/platform.h" + +#if XE_PLATFORM_LINUX +#include +#elif XE_PLATFORM_WIN32 +#include "xenia/base/platform_win.h" +#endif + +namespace xe { +namespace ui { +namespace vulkan { + +class SpirvToolsContext { + public: + SpirvToolsContext() {} + SpirvToolsContext(const SpirvToolsContext& context) = delete; + SpirvToolsContext& operator=(const SpirvToolsContext& context) = delete; + ~SpirvToolsContext() { Shutdown(); } + bool Initialize(unsigned int spirv_version); + void Shutdown(); + + spv_result_t Validate(const uint32_t* words, size_t num_words, + std::string* error) const; + + private: +#if XE_PLATFORM_LINUX + void* library_ = nullptr; +#elif XE_PLATFORM_WIN32 + HMODULE library_ = nullptr; +#endif + + template + bool LoadLibraryFunction(FunctionPointer& function, const char* name) { +#if XE_PLATFORM_LINUX + function = reinterpret_cast(dlsym(library_, name)); +#elif XE_PLATFORM_WIN32 + function = + reinterpret_cast(GetProcAddress(library_, name)); +#else +#error No SPIRV-Tools LoadLibraryFunction provided for the target platform. +#endif + return function != nullptr; + } + decltype(&spvContextCreate) fn_spvContextCreate_ = nullptr; + decltype(&spvContextDestroy) fn_spvContextDestroy_ = nullptr; + decltype(&spvValidateBinary) fn_spvValidateBinary_ = nullptr; + decltype(&spvDiagnosticDestroy) fn_spvDiagnosticDestroy_ = nullptr; + + spv_context context_ = nullptr; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_ diff --git a/src/xenia/ui/vulkan/vulkan_mem_alloc.cc b/src/xenia/ui/vulkan/vulkan_mem_alloc.cc new file mode 100644 index 000000000..d3be16c5f --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_mem_alloc.cc @@ -0,0 +1,108 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +// Implementing VMA in this translation unit. +#define VMA_IMPLEMENTATION +#include "xenia/ui/vulkan/vulkan_mem_alloc.h" + +#include + +#include "xenia/base/logging.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +VmaAllocator CreateVmaAllocator(const VulkanProvider& provider, + bool externally_synchronized) { + const VulkanProvider::LibraryFunctions& lfn = provider.lfn(); + const VulkanProvider::InstanceFunctions& ifn = provider.ifn(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + const VulkanProvider::InstanceExtensions& instance_extensions = + provider.instance_extensions(); + const VulkanProvider::DeviceExtensions& device_extensions = + provider.device_extensions(); + + VmaVulkanFunctions vma_vulkan_functions = {}; + VmaAllocatorCreateInfo allocator_create_info = {}; + + vma_vulkan_functions.vkGetInstanceProcAddr = lfn.vkGetInstanceProcAddr; + vma_vulkan_functions.vkGetDeviceProcAddr = ifn.vkGetDeviceProcAddr; + vma_vulkan_functions.vkGetPhysicalDeviceProperties = + ifn.vkGetPhysicalDeviceProperties; + vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties = + ifn.vkGetPhysicalDeviceMemoryProperties; + vma_vulkan_functions.vkAllocateMemory = dfn.vkAllocateMemory; + vma_vulkan_functions.vkFreeMemory = dfn.vkFreeMemory; + vma_vulkan_functions.vkMapMemory = dfn.vkMapMemory; + vma_vulkan_functions.vkUnmapMemory = dfn.vkUnmapMemory; + vma_vulkan_functions.vkFlushMappedMemoryRanges = + dfn.vkFlushMappedMemoryRanges; + vma_vulkan_functions.vkInvalidateMappedMemoryRanges = + dfn.vkInvalidateMappedMemoryRanges; + vma_vulkan_functions.vkBindBufferMemory = dfn.vkBindBufferMemory; + vma_vulkan_functions.vkBindImageMemory = dfn.vkBindImageMemory; + vma_vulkan_functions.vkGetBufferMemoryRequirements = + dfn.vkGetBufferMemoryRequirements; + vma_vulkan_functions.vkGetImageMemoryRequirements = + dfn.vkGetImageMemoryRequirements; + vma_vulkan_functions.vkCreateBuffer = dfn.vkCreateBuffer; + vma_vulkan_functions.vkDestroyBuffer = dfn.vkDestroyBuffer; + vma_vulkan_functions.vkCreateImage = dfn.vkCreateImage; + vma_vulkan_functions.vkDestroyImage = dfn.vkDestroyImage; + vma_vulkan_functions.vkCmdCopyBuffer = dfn.vkCmdCopyBuffer; + if (device_extensions.khr_get_memory_requirements2) { + vma_vulkan_functions.vkGetBufferMemoryRequirements2KHR = + dfn.vkGetBufferMemoryRequirements2KHR; + vma_vulkan_functions.vkGetImageMemoryRequirements2KHR = + dfn.vkGetImageMemoryRequirements2KHR; + if (device_extensions.khr_dedicated_allocation) { + allocator_create_info.flags |= + VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; + } + } + if (device_extensions.khr_bind_memory2) { + vma_vulkan_functions.vkBindBufferMemory2KHR = dfn.vkBindBufferMemory2KHR; + vma_vulkan_functions.vkBindImageMemory2KHR = dfn.vkBindImageMemory2KHR; + allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT; + } + if (instance_extensions.khr_get_physical_device_properties2) { + vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties2KHR = + ifn.vkGetPhysicalDeviceMemoryProperties2KHR; + if (device_extensions.ext_memory_budget) { + allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; + } + } + if (device_extensions.khr_maintenance4) { + vma_vulkan_functions.vkGetDeviceImageMemoryRequirements = + dfn.vkGetDeviceImageMemoryRequirementsKHR; + } + + if (externally_synchronized) { + allocator_create_info.flags |= + VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; + } + allocator_create_info.physicalDevice = provider.physical_device(); + allocator_create_info.device = provider.device(); + allocator_create_info.pVulkanFunctions = &vma_vulkan_functions; + allocator_create_info.instance = provider.instance(); + allocator_create_info.vulkanApiVersion = + provider.device_properties().apiVersion; + VmaAllocator allocator; + if (vmaCreateAllocator(&allocator_create_info, &allocator) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan Memory Allocator instance"); + return VK_NULL_HANDLE; + } + return allocator; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_mem_alloc.h b/src/xenia/ui/vulkan/vulkan_mem_alloc.h index 5fe76b462..9ae9db16e 100644 --- a/src/xenia/ui/vulkan/vulkan_mem_alloc.h +++ b/src/xenia/ui/vulkan/vulkan_mem_alloc.h @@ -29,33 +29,8 @@ namespace xe { namespace ui { namespace vulkan { -inline void FillVMAVulkanFunctions(VmaVulkanFunctions* vma_funcs, - const VulkanProvider& provider) { - const VulkanProvider::LibraryFunctions& lfn = provider.lfn(); - const VulkanProvider::InstanceFunctions& ifn = provider.ifn(); - const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - vma_funcs->vkGetInstanceProcAddr = lfn.vkGetInstanceProcAddr; - vma_funcs->vkGetDeviceProcAddr = ifn.vkGetDeviceProcAddr; - vma_funcs->vkGetPhysicalDeviceProperties = ifn.vkGetPhysicalDeviceProperties; - vma_funcs->vkGetPhysicalDeviceMemoryProperties = - ifn.vkGetPhysicalDeviceMemoryProperties; - vma_funcs->vkAllocateMemory = dfn.vkAllocateMemory; - vma_funcs->vkFreeMemory = dfn.vkFreeMemory; - vma_funcs->vkMapMemory = dfn.vkMapMemory; - vma_funcs->vkUnmapMemory = dfn.vkUnmapMemory; - vma_funcs->vkFlushMappedMemoryRanges = dfn.vkFlushMappedMemoryRanges; - vma_funcs->vkInvalidateMappedMemoryRanges = - dfn.vkInvalidateMappedMemoryRanges; - vma_funcs->vkBindBufferMemory = dfn.vkBindBufferMemory; - vma_funcs->vkBindImageMemory = dfn.vkBindImageMemory; - vma_funcs->vkGetBufferMemoryRequirements = dfn.vkGetBufferMemoryRequirements; - vma_funcs->vkGetImageMemoryRequirements = dfn.vkGetImageMemoryRequirements; - vma_funcs->vkCreateBuffer = dfn.vkCreateBuffer; - vma_funcs->vkDestroyBuffer = dfn.vkDestroyBuffer; - vma_funcs->vkCreateImage = dfn.vkCreateImage; - vma_funcs->vkDestroyImage = dfn.vkDestroyImage; - vma_funcs->vkCmdCopyBuffer = dfn.vkCmdCopyBuffer; -} +VmaAllocator CreateVmaAllocator(const VulkanProvider& provider, + bool externally_synchronized); } // namespace vulkan } // namespace ui diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 893b0ea7c..3a30220fb 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -29,13 +29,8 @@ #include "xenia/base/platform_win.h" #endif -// Implement AMD's VMA here. -#define VMA_IMPLEMENTATION -#include "xenia/ui/vulkan/vulkan_mem_alloc.h" - -// TODO(Triang3l): Disable Vulkan validation before releasing a stable version. DEFINE_bool( - vulkan_validation, true, + vulkan_validation, false, "Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be " "written to the OS debug log without vulkan_debug_messenger or to the " "Xenia log with it.", @@ -548,22 +543,10 @@ bool VulkanProvider::Initialize() { ++i) { VkPhysicalDevice physical_device_current = physical_devices[i]; - // Get physical device features and check if the needed ones are supported. - // Need this before obtaining the queues as sparse binding is an optional - // feature. + // Get physical device features. Need this before obtaining the queues as + // sparse binding is an optional feature. ifn_.vkGetPhysicalDeviceFeatures(physical_device_current, &device_features_); - // Passing indices directly from guest memory, where they are big-endian; a - // workaround using fetch from shared memory for 32-bit indices that need - // swapping isn't implemented yet. Not supported only Qualcomm Adreno 4xx. - if (!device_features_.fullDrawIndexUint32) { - continue; - } - // TODO(Triang3l): Make geometry shaders optional by providing compute - // shader fallback (though that would require vertex shader stores). - if (!device_features_.geometryShader) { - continue; - } // Get the needed queues: // - Graphics and compute. @@ -704,11 +687,17 @@ bool VulkanProvider::Initialize() { } std::memset(&device_extensions_, 0, sizeof(device_extensions_)); if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { + device_extensions_.khr_bind_memory2 = true; device_extensions_.khr_dedicated_allocation = true; + device_extensions_.khr_get_memory_requirements2 = true; + device_extensions_.khr_sampler_ycbcr_conversion = true; if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { device_extensions_.khr_image_format_list = true; device_extensions_.khr_shader_float_controls = true; device_extensions_.khr_spirv_1_4 = true; + if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) { + device_extensions_.khr_maintenance4 = true; + } } } device_extensions_enabled.clear(); @@ -717,15 +706,28 @@ bool VulkanProvider::Initialize() { // core to device_extensions_enabled. Adding literals to // device_extensions_enabled for the most C string lifetime safety. static const std::pair kUsedDeviceExtensions[] = { - {"VK_AMD_shader_info", offsetof(DeviceExtensions, amd_shader_info)}, {"VK_EXT_fragment_shader_interlock", offsetof(DeviceExtensions, ext_fragment_shader_interlock)}, + {"VK_EXT_memory_budget", offsetof(DeviceExtensions, ext_memory_budget)}, + {"VK_EXT_shader_stencil_export", + offsetof(DeviceExtensions, ext_shader_stencil_export)}, + {"VK_KHR_bind_memory2", offsetof(DeviceExtensions, khr_bind_memory2)}, {"VK_KHR_dedicated_allocation", offsetof(DeviceExtensions, khr_dedicated_allocation)}, + {"VK_KHR_get_memory_requirements2", + offsetof(DeviceExtensions, khr_get_memory_requirements2)}, {"VK_KHR_image_format_list", offsetof(DeviceExtensions, khr_image_format_list)}, + {"VK_KHR_maintenance4", offsetof(DeviceExtensions, khr_maintenance4)}, {"VK_KHR_portability_subset", offsetof(DeviceExtensions, khr_portability_subset)}, + // While vkGetPhysicalDeviceFormatProperties should be used to check the + // format support (device support for Y'CbCr formats is not required by + // this extension or by Vulkan 1.1), still adding + // VK_KHR_sampler_ycbcr_conversion to this list to enable this extension + // on the device on Vulkan 1.0. + {"VK_KHR_sampler_ycbcr_conversion", + offsetof(DeviceExtensions, khr_sampler_ycbcr_conversion)}, {"VK_KHR_shader_float_controls", offsetof(DeviceExtensions, khr_shader_float_controls)}, {"VK_KHR_spirv_1_4", offsetof(DeviceExtensions, khr_spirv_1_4)}, @@ -917,10 +919,47 @@ bool VulkanProvider::Initialize() { } } // Extensions - disable the specific extension if failed to get its functions. - if (device_extensions_.amd_shader_info) { + if (device_extensions_.khr_bind_memory2) { bool functions_loaded = true; -#include "xenia/ui/vulkan/functions/device_amd_shader_info.inc" - device_extensions_.amd_shader_info = functions_loaded; + if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } else { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } + device_extensions_.khr_bind_memory2 = functions_loaded; + } + if (device_extensions_.khr_get_memory_requirements2) { + bool functions_loaded = true; + if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } else { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } + device_extensions_.khr_get_memory_requirements2 = functions_loaded; + // VK_KHR_dedicated_allocation can still work without the dedicated + // allocation preference getter even though it requires + // VK_KHR_get_memory_requirements2 to be supported and enabled. + } + if (device_extensions_.khr_maintenance4) { + bool functions_loaded = true; + if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } else { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } + device_extensions_.khr_maintenance4 = functions_loaded; } if (device_extensions_.khr_swapchain) { bool functions_loaded = true; @@ -954,14 +993,22 @@ bool VulkanProvider::Initialize() { VK_VERSION_MINOR(device_properties_.apiVersion), VK_VERSION_PATCH(device_properties_.apiVersion)); XELOGVK("Vulkan device extensions:"); - XELOGVK("* VK_AMD_shader_info: {}", - device_extensions_.amd_shader_info ? "yes" : "no"); XELOGVK("* VK_EXT_fragment_shader_interlock: {}", device_extensions_.ext_fragment_shader_interlock ? "yes" : "no"); + XELOGVK("* VK_EXT_memory_budget: {}", + device_extensions_.ext_memory_budget ? "yes" : "no"); + XELOGVK("* VK_EXT_shader_stencil_export: {}", + device_extensions_.ext_shader_stencil_export ? "yes" : "no"); + XELOGVK("* VK_KHR_bind_memory2: {}", + device_extensions_.khr_bind_memory2 ? "yes" : "no"); XELOGVK("* VK_KHR_dedicated_allocation: {}", device_extensions_.khr_dedicated_allocation ? "yes" : "no"); + XELOGVK("* VK_KHR_get_memory_requirements2: {}", + device_extensions_.khr_get_memory_requirements2 ? "yes" : "no"); XELOGVK("* VK_KHR_image_format_list: {}", device_extensions_.khr_image_format_list ? "yes" : "no"); + XELOGVK("* VK_KHR_maintenance4: {}", + device_extensions_.khr_maintenance4 ? "yes" : "no"); XELOGVK("* VK_KHR_portability_subset: {}", device_extensions_.khr_portability_subset ? "yes" : "no"); if (device_extensions_.khr_portability_subset) { @@ -990,6 +1037,8 @@ bool VulkanProvider::Initialize() { XELOGVK(" * Triangle fans: {}", device_portability_subset_features_.triangleFans ? "yes" : "no"); } + XELOGVK("* VK_KHR_sampler_ycbcr_conversion: {}", + device_extensions_.khr_sampler_ycbcr_conversion ? "yes" : "no"); XELOGVK("* VK_KHR_shader_float_controls: {}", device_extensions_.khr_shader_float_controls ? "yes" : "no"); if (device_extensions_.khr_shader_float_controls) { diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 01cc69916..8dc83283c 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -131,14 +131,23 @@ class VulkanProvider : public GraphicsProvider { return device_features_; } struct DeviceExtensions { - bool amd_shader_info; bool ext_fragment_shader_interlock; + bool ext_memory_budget; + bool ext_shader_stencil_export; + // Core since 1.1.0. + bool khr_bind_memory2; // Core since 1.1.0. bool khr_dedicated_allocation; + // Core since 1.1.0. + bool khr_get_memory_requirements2; // Core since 1.2.0. bool khr_image_format_list; + // Core since 1.3.0. + bool khr_maintenance4; // Requires the VK_KHR_get_physical_device_properties2 instance extension. bool khr_portability_subset; + // Core since 1.1.0. + bool khr_sampler_ycbcr_conversion; // Core since 1.2.0. bool khr_shader_float_controls; // Core since 1.2.0. @@ -215,9 +224,14 @@ class VulkanProvider : public GraphicsProvider { VkDevice device() const { return device_; } struct DeviceFunctions { #define XE_UI_VULKAN_FUNCTION(name) PFN_##name name; +#define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \ + PFN_##extension_name extension_name; #include "xenia/ui/vulkan/functions/device_1_0.inc" -#include "xenia/ui/vulkan/functions/device_amd_shader_info.inc" +#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" +#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" +#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" #include "xenia/ui/vulkan/functions/device_khr_swapchain.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED #undef XE_UI_VULKAN_FUNCTION }; const DeviceFunctions& dfn() const { return dfn_; } diff --git a/src/xenia/ui/vulkan/vulkan_util.cc b/src/xenia/ui/vulkan/vulkan_util.cc index f8dd5846e..b4eb02c3f 100644 --- a/src/xenia/ui/vulkan/vulkan_util.cc +++ b/src/xenia/ui/vulkan/vulkan_util.cc @@ -189,6 +189,53 @@ bool CreateDedicatedAllocationImage(const VulkanProvider& provider, return true; } +VkPipeline CreateComputePipeline( + const VulkanProvider& provider, VkPipelineLayout layout, + VkShaderModule shader, const VkSpecializationInfo* specialization_info, + const char* entry_point) { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkComputePipelineCreateInfo pipeline_create_info; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipeline_create_info.pNext = nullptr; + pipeline_create_info.flags = 0; + pipeline_create_info.stage.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pipeline_create_info.stage.pNext = nullptr; + pipeline_create_info.stage.flags = 0; + pipeline_create_info.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + pipeline_create_info.stage.module = shader; + pipeline_create_info.stage.pName = entry_point; + pipeline_create_info.stage.pSpecializationInfo = specialization_info; + pipeline_create_info.layout = layout; + pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_create_info.basePipelineIndex = -1; + VkPipeline pipeline; + if (dfn.vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipeline) != VK_SUCCESS) { + return VK_NULL_HANDLE; + } + return pipeline; +} + +VkPipeline CreateComputePipeline( + const VulkanProvider& provider, VkPipelineLayout layout, + const uint32_t* shader_code, size_t shader_code_size_bytes, + const VkSpecializationInfo* specialization_info, const char* entry_point) { + VkShaderModule shader = + CreateShaderModule(provider, shader_code, shader_code_size_bytes); + if (shader == VK_NULL_HANDLE) { + return VK_NULL_HANDLE; + } + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkPipeline pipeline = CreateComputePipeline(provider, layout, shader, + specialization_info, entry_point); + dfn.vkDestroyShaderModule(device, shader, nullptr); + return pipeline; +} + } // namespace util } // namespace vulkan } // namespace ui diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index fc828e4c2..ca3eb60b1 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -13,7 +13,6 @@ #include #include -#include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -22,13 +21,6 @@ namespace ui { namespace vulkan { namespace util { -inline void CheckResult(VkResult result, const char* action) { - if (result != VK_SUCCESS) { - XELOGE("Vulkan check: {} returned 0x{:X}", action, uint32_t(result)); - } - assert_true(result == VK_SUCCESS, action); -} - template inline bool DestroyAndNullHandle(F* destroy_function, T& handle) { if (handle != VK_NULL_HANDLE) { @@ -174,6 +166,17 @@ inline VkShaderModule CreateShaderModule(const VulkanProvider& provider, : VK_NULL_HANDLE; } +VkPipeline CreateComputePipeline( + const VulkanProvider& provider, VkPipelineLayout layout, + VkShaderModule shader, + const VkSpecializationInfo* specialization_info = nullptr, + const char* entry_point = "main"); +VkPipeline CreateComputePipeline( + const VulkanProvider& provider, VkPipelineLayout layout, + const uint32_t* shader_code, size_t shader_code_size_bytes, + const VkSpecializationInfo* specialization_info = nullptr, + const char* entry_point = "main"); + } // namespace util } // namespace vulkan } // namespace ui diff --git a/third_party/SPIRV-Tools b/third_party/SPIRV-Tools new file mode 160000 index 000000000..dd534e877 --- /dev/null +++ b/third_party/SPIRV-Tools @@ -0,0 +1 @@ +Subproject commit dd534e877e725c9bb6f751c427442456a05384e4 diff --git a/third_party/VulkanMemoryAllocator b/third_party/VulkanMemoryAllocator index fd82bc7b6..51c8b5601 160000 --- a/third_party/VulkanMemoryAllocator +++ b/third_party/VulkanMemoryAllocator @@ -1 +1 @@ -Subproject commit fd82bc7b6daa58ff3ac9f581a9399cd22a24285d +Subproject commit 51c8b56011303e94840370089f816b19dbe7edf0 diff --git a/third_party/glslang b/third_party/glslang new file mode 160000 index 000000000..f4f1d8a35 --- /dev/null +++ b/third_party/glslang @@ -0,0 +1 @@ +Subproject commit f4f1d8a352ca1908943aea2ad8c54b39b4879080 diff --git a/third_party/glslang-spirv.lua b/third_party/glslang-spirv.lua index 77895361b..19a04c71e 100644 --- a/third_party/glslang-spirv.lua +++ b/third_party/glslang-spirv.lua @@ -11,27 +11,33 @@ project("glslang-spirv") includedirs({ }) files({ - "glslang-spirv/bitutils.h", - "glslang-spirv/disassemble.cpp", - "glslang-spirv/disassemble.h", - "glslang-spirv/doc.cpp", - "glslang-spirv/doc.h", - "glslang-spirv/GLSL.ext.AMD.h", - "glslang-spirv/GLSL.ext.EXT.h", - "glslang-spirv/GLSL.ext.KHR.h", - "glslang-spirv/GLSL.ext.NV.h", - "glslang-spirv/GLSL.std.450.h", - -- Disabled until required. - -- "glslang-spirv/GlslangToSpv.cpp", - -- "glslang-spirv/GlslangToSpv.h", - "glslang-spirv/hex_float.h", - "glslang-spirv/InReadableOrder.cpp", - "glslang-spirv/Logger.cpp", - "glslang-spirv/Logger.h", - "glslang-spirv/spirv.hpp", - "glslang-spirv/SpvBuilder.cpp", - "glslang-spirv/SpvBuilder.h", - "glslang-spirv/spvIR.h", - "glslang-spirv/SPVRemapper.cpp", - "glslang-spirv/SPVRemapper.h", + "glslang/SPIRV/bitutils.h", + "glslang/SPIRV/disassemble.cpp", + "glslang/SPIRV/disassemble.h", + "glslang/SPIRV/doc.cpp", + "glslang/SPIRV/doc.h", + "glslang/SPIRV/GLSL.ext.AMD.h", + "glslang/SPIRV/GLSL.ext.EXT.h", + "glslang/SPIRV/GLSL.ext.KHR.h", + "glslang/SPIRV/GLSL.ext.NV.h", + "glslang/SPIRV/GLSL.std.450.h", + -- Disabled because GLSL is not used. + -- "glslang/SPIRV/GlslangToSpv.cpp", + -- "glslang/SPIRV/GlslangToSpv.h", + "glslang/SPIRV/hex_float.h", + "glslang/SPIRV/InReadableOrder.cpp", + "glslang/SPIRV/Logger.cpp", + "glslang/SPIRV/Logger.h", + "glslang/SPIRV/NonSemanticDebugPrintf.h", + "glslang/SPIRV/spirv.hpp", + "glslang/SPIRV/SpvBuilder.cpp", + "glslang/SPIRV/SpvBuilder.h", + "glslang/SPIRV/spvIR.h", + -- Disabled because of spirv-tools dependency. + -- "glslang/SPIRV/SpvPostProcess.cpp", + "glslang/SPIRV/SPVRemapper.cpp", + "glslang/SPIRV/SPVRemapper.h", + -- Disabled because of spirv-tools dependency. + -- "glslang/SPIRV/SpvTools.cpp", + -- "glslang/SPIRV/SpvTools.h", }) diff --git a/third_party/glslang-spirv/GLSL.ext.AMD.h b/third_party/glslang-spirv/GLSL.ext.AMD.h deleted file mode 100644 index d4f57efdc..000000000 --- a/third_party/glslang-spirv/GLSL.ext.AMD.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLextAMD_H -#define GLSLextAMD_H - -enum BuiltIn; -enum Capability; -enum Decoration; -enum Op; - -static const int GLSLextAMDVersion = 100; -static const int GLSLextAMDRevision = 6; - -// SPV_AMD_shader_ballot -static const char* const E_SPV_AMD_shader_ballot = "SPV_AMD_shader_ballot"; - -enum ShaderBallotAMD { - ShaderBallotBadAMD = 0, // Don't use - - SwizzleInvocationsAMD = 1, - SwizzleInvocationsMaskedAMD = 2, - WriteInvocationAMD = 3, - MbcntAMD = 4, - - ShaderBallotCountAMD -}; - -// SPV_AMD_shader_trinary_minmax -static const char* const E_SPV_AMD_shader_trinary_minmax = "SPV_AMD_shader_trinary_minmax"; - -enum ShaderTrinaryMinMaxAMD { - ShaderTrinaryMinMaxBadAMD = 0, // Don't use - - FMin3AMD = 1, - UMin3AMD = 2, - SMin3AMD = 3, - FMax3AMD = 4, - UMax3AMD = 5, - SMax3AMD = 6, - FMid3AMD = 7, - UMid3AMD = 8, - SMid3AMD = 9, - - ShaderTrinaryMinMaxCountAMD -}; - -// SPV_AMD_shader_explicit_vertex_parameter -static const char* const E_SPV_AMD_shader_explicit_vertex_parameter = "SPV_AMD_shader_explicit_vertex_parameter"; - -enum ShaderExplicitVertexParameterAMD { - ShaderExplicitVertexParameterBadAMD = 0, // Don't use - - InterpolateAtVertexAMD = 1, - - ShaderExplicitVertexParameterCountAMD -}; - -// SPV_AMD_gcn_shader -static const char* const E_SPV_AMD_gcn_shader = "SPV_AMD_gcn_shader"; - -enum GcnShaderAMD { - GcnShaderBadAMD = 0, // Don't use - - CubeFaceIndexAMD = 1, - CubeFaceCoordAMD = 2, - TimeAMD = 3, - - GcnShaderCountAMD -}; - -// SPV_AMD_gpu_shader_half_float -static const char* const E_SPV_AMD_gpu_shader_half_float = "SPV_AMD_gpu_shader_half_float"; - -// SPV_AMD_texture_gather_bias_lod -static const char* const E_SPV_AMD_texture_gather_bias_lod = "SPV_AMD_texture_gather_bias_lod"; - -// SPV_AMD_gpu_shader_int16 -static const char* const E_SPV_AMD_gpu_shader_int16 = "SPV_AMD_gpu_shader_int16"; - -// SPV_AMD_shader_image_load_store_lod -static const char* const E_SPV_AMD_shader_image_load_store_lod = "SPV_AMD_shader_image_load_store_lod"; - -// SPV_AMD_shader_fragment_mask -static const char* const E_SPV_AMD_shader_fragment_mask = "SPV_AMD_shader_fragment_mask"; - -#endif // #ifndef GLSLextAMD_H diff --git a/third_party/glslang-spirv/GLSL.ext.EXT.h b/third_party/glslang-spirv/GLSL.ext.EXT.h deleted file mode 100644 index e879714d0..000000000 --- a/third_party/glslang-spirv/GLSL.ext.EXT.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLextEXT_H -#define GLSLextEXT_H - -enum BuiltIn; -enum Op; -enum Capability; - -static const int GLSLextEXTVersion = 100; -static const int GLSLextEXTRevision = 1; - -static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fully_covered"; - -#endif // #ifndef GLSLextEXT_H diff --git a/third_party/glslang-spirv/GLSL.ext.KHR.h b/third_party/glslang-spirv/GLSL.ext.KHR.h deleted file mode 100644 index 2eb10ae62..000000000 --- a/third_party/glslang-spirv/GLSL.ext.KHR.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLextKHR_H -#define GLSLextKHR_H - -enum BuiltIn; -enum Op; -enum Capability; - -static const int GLSLextKHRVersion = 100; -static const int GLSLextKHRRevision = 2; - -static const char* const E_SPV_KHR_shader_ballot = "SPV_KHR_shader_ballot"; -static const char* const E_SPV_KHR_subgroup_vote = "SPV_KHR_subgroup_vote"; -static const char* const E_SPV_KHR_device_group = "SPV_KHR_device_group"; -static const char* const E_SPV_KHR_multiview = "SPV_KHR_multiview"; -static const char* const E_SPV_KHR_shader_draw_parameters = "SPV_KHR_shader_draw_parameters"; -static const char* const E_SPV_KHR_16bit_storage = "SPV_KHR_16bit_storage"; -static const char* const E_SPV_KHR_storage_buffer_storage_class = "SPV_KHR_storage_buffer_storage_class"; -static const char* const E_SPV_KHR_post_depth_coverage = "SPV_KHR_post_depth_coverage"; -static const char* const E_SPV_EXT_shader_stencil_export = "SPV_EXT_shader_stencil_export"; -static const char* const E_SPV_EXT_shader_viewport_index_layer = "SPV_EXT_shader_viewport_index_layer"; - -#endif // #ifndef GLSLextKHR_H diff --git a/third_party/glslang-spirv/GLSL.ext.NV.h b/third_party/glslang-spirv/GLSL.ext.NV.h deleted file mode 100644 index c01858be4..000000000 --- a/third_party/glslang-spirv/GLSL.ext.NV.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -** Copyright (c) 2014-2017 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLextNV_H -#define GLSLextNV_H - -enum BuiltIn; -enum Decoration; -enum Op; -enum Capability; - -static const int GLSLextNVVersion = 100; -static const int GLSLextNVRevision = 5; - -//SPV_NV_sample_mask_override_coverage -const char* const E_SPV_NV_sample_mask_override_coverage = "SPV_NV_sample_mask_override_coverage"; - -//SPV_NV_geometry_shader_passthrough -const char* const E_SPV_NV_geometry_shader_passthrough = "SPV_NV_geometry_shader_passthrough"; - -//SPV_NV_viewport_array2 -const char* const E_SPV_NV_viewport_array2 = "SPV_NV_viewport_array2"; -const char* const E_ARB_shader_viewport_layer_array = "SPV_ARB_shader_viewport_layer_array"; - -//SPV_NV_stereo_view_rendering -const char* const E_SPV_NV_stereo_view_rendering = "SPV_NV_stereo_view_rendering"; - -//SPV_NVX_multiview_per_view_attributes -const char* const E_SPV_NVX_multiview_per_view_attributes = "SPV_NVX_multiview_per_view_attributes"; - -#endif // #ifndef GLSLextNV_H \ No newline at end of file diff --git a/third_party/glslang-spirv/GLSL.std.450.h b/third_party/glslang-spirv/GLSL.std.450.h deleted file mode 100644 index df31092be..000000000 --- a/third_party/glslang-spirv/GLSL.std.450.h +++ /dev/null @@ -1,131 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_H -#define GLSLstd450_H - -static const int GLSLstd450Version = 100; -static const int GLSLstd450Revision = 1; - -enum GLSLstd450 { - GLSLstd450Bad = 0, // Don't use - - GLSLstd450Round = 1, - GLSLstd450RoundEven = 2, - GLSLstd450Trunc = 3, - GLSLstd450FAbs = 4, - GLSLstd450SAbs = 5, - GLSLstd450FSign = 6, - GLSLstd450SSign = 7, - GLSLstd450Floor = 8, - GLSLstd450Ceil = 9, - GLSLstd450Fract = 10, - - GLSLstd450Radians = 11, - GLSLstd450Degrees = 12, - GLSLstd450Sin = 13, - GLSLstd450Cos = 14, - GLSLstd450Tan = 15, - GLSLstd450Asin = 16, - GLSLstd450Acos = 17, - GLSLstd450Atan = 18, - GLSLstd450Sinh = 19, - GLSLstd450Cosh = 20, - GLSLstd450Tanh = 21, - GLSLstd450Asinh = 22, - GLSLstd450Acosh = 23, - GLSLstd450Atanh = 24, - GLSLstd450Atan2 = 25, - - GLSLstd450Pow = 26, - GLSLstd450Exp = 27, - GLSLstd450Log = 28, - GLSLstd450Exp2 = 29, - GLSLstd450Log2 = 30, - GLSLstd450Sqrt = 31, - GLSLstd450InverseSqrt = 32, - - GLSLstd450Determinant = 33, - GLSLstd450MatrixInverse = 34, - - GLSLstd450Modf = 35, // second operand needs an OpVariable to write to - GLSLstd450ModfStruct = 36, // no OpVariable operand - GLSLstd450FMin = 37, - GLSLstd450UMin = 38, - GLSLstd450SMin = 39, - GLSLstd450FMax = 40, - GLSLstd450UMax = 41, - GLSLstd450SMax = 42, - GLSLstd450FClamp = 43, - GLSLstd450UClamp = 44, - GLSLstd450SClamp = 45, - GLSLstd450FMix = 46, - GLSLstd450IMix = 47, // Reserved - GLSLstd450Step = 48, - GLSLstd450SmoothStep = 49, - - GLSLstd450Fma = 50, - GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 52, // no OpVariable operand - GLSLstd450Ldexp = 53, - - GLSLstd450PackSnorm4x8 = 54, - GLSLstd450PackUnorm4x8 = 55, - GLSLstd450PackSnorm2x16 = 56, - GLSLstd450PackUnorm2x16 = 57, - GLSLstd450PackHalf2x16 = 58, - GLSLstd450PackDouble2x32 = 59, - GLSLstd450UnpackSnorm2x16 = 60, - GLSLstd450UnpackUnorm2x16 = 61, - GLSLstd450UnpackHalf2x16 = 62, - GLSLstd450UnpackSnorm4x8 = 63, - GLSLstd450UnpackUnorm4x8 = 64, - GLSLstd450UnpackDouble2x32 = 65, - - GLSLstd450Length = 66, - GLSLstd450Distance = 67, - GLSLstd450Cross = 68, - GLSLstd450Normalize = 69, - GLSLstd450FaceForward = 70, - GLSLstd450Reflect = 71, - GLSLstd450Refract = 72, - - GLSLstd450FindILsb = 73, - GLSLstd450FindSMsb = 74, - GLSLstd450FindUMsb = 75, - - GLSLstd450InterpolateAtCentroid = 76, - GLSLstd450InterpolateAtSample = 77, - GLSLstd450InterpolateAtOffset = 78, - - GLSLstd450NMin = 79, - GLSLstd450NMax = 80, - GLSLstd450NClamp = 81, - - GLSLstd450Count -}; - -#endif // #ifndef GLSLstd450_H diff --git a/third_party/glslang-spirv/GlslangToSpv.cpp b/third_party/glslang-spirv/GlslangToSpv.cpp deleted file mode 100644 index 6e9fb38bb..000000000 --- a/third_party/glslang-spirv/GlslangToSpv.cpp +++ /dev/null @@ -1,6146 +0,0 @@ -// -// Copyright (C) 2014-2016 LunarG, Inc. -// Copyright (C) 2015-2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Visit the nodes in the glslang intermediate tree representation to -// translate them to SPIR-V. -// - -#include "spirv.hpp" -#include "GlslangToSpv.h" -#include "SpvBuilder.h" -namespace spv { - #include "GLSL.std.450.h" - #include "GLSL.ext.KHR.h" - #include "GLSL.ext.EXT.h" -#ifdef AMD_EXTENSIONS - #include "GLSL.ext.AMD.h" -#endif -#ifdef NV_EXTENSIONS - #include "GLSL.ext.NV.h" -#endif -} - -#ifdef ENABLE_OPT - #include "spirv-tools/optimizer.hpp" - #include "message.h" - #include "SPVRemapper.h" -#endif - -#ifdef ENABLE_OPT -using namespace spvtools; -#endif - -// Glslang includes -#include "../glslang/MachineIndependent/localintermediate.h" -#include "../glslang/MachineIndependent/SymbolTable.h" -#include "../glslang/Include/Common.h" -#include "../glslang/Include/revision.h" - -#include -#include -#include -#include -#include -#include -#include - -namespace { - -namespace { -class SpecConstantOpModeGuard { -public: - SpecConstantOpModeGuard(spv::Builder* builder) - : builder_(builder) { - previous_flag_ = builder->isInSpecConstCodeGenMode(); - } - ~SpecConstantOpModeGuard() { - previous_flag_ ? builder_->setToSpecConstCodeGenMode() - : builder_->setToNormalCodeGenMode(); - } - void turnOnSpecConstantOpMode() { - builder_->setToSpecConstCodeGenMode(); - } - -private: - spv::Builder* builder_; - bool previous_flag_; -}; -} - -// -// The main holder of information for translating glslang to SPIR-V. -// -// Derives from the AST walking base class. -// -class TGlslangToSpvTraverser : public glslang::TIntermTraverser { -public: - TGlslangToSpvTraverser(unsigned int spvVersion, const glslang::TIntermediate*, spv::SpvBuildLogger* logger, - glslang::SpvOptions& options); - virtual ~TGlslangToSpvTraverser() { } - - bool visitAggregate(glslang::TVisit, glslang::TIntermAggregate*); - bool visitBinary(glslang::TVisit, glslang::TIntermBinary*); - void visitConstantUnion(glslang::TIntermConstantUnion*); - bool visitSelection(glslang::TVisit, glslang::TIntermSelection*); - bool visitSwitch(glslang::TVisit, glslang::TIntermSwitch*); - void visitSymbol(glslang::TIntermSymbol* symbol); - bool visitUnary(glslang::TVisit, glslang::TIntermUnary*); - bool visitLoop(glslang::TVisit, glslang::TIntermLoop*); - bool visitBranch(glslang::TVisit visit, glslang::TIntermBranch*); - - void finishSpv(); - void dumpSpv(std::vector& out); - -protected: - spv::Decoration TranslateInterpolationDecoration(const glslang::TQualifier& qualifier); - spv::Decoration TranslateAuxiliaryStorageDecoration(const glslang::TQualifier& qualifier); - spv::BuiltIn TranslateBuiltInDecoration(glslang::TBuiltInVariable, bool memberDeclaration); - spv::ImageFormat TranslateImageFormat(const glslang::TType& type); - spv::SelectionControlMask TranslateSelectionControl(const glslang::TIntermSelection&) const; - spv::SelectionControlMask TranslateSwitchControl(const glslang::TIntermSwitch&) const; - spv::LoopControlMask TranslateLoopControl(const glslang::TIntermLoop&, unsigned int& dependencyLength) const; - spv::StorageClass TranslateStorageClass(const glslang::TType&); - spv::Id createSpvVariable(const glslang::TIntermSymbol*); - spv::Id getSampledType(const glslang::TSampler&); - spv::Id getInvertedSwizzleType(const glslang::TIntermTyped&); - spv::Id createInvertedSwizzle(spv::Decoration precision, const glslang::TIntermTyped&, spv::Id parentResult); - void convertSwizzle(const glslang::TIntermAggregate&, std::vector& swizzle); - spv::Id convertGlslangToSpvType(const glslang::TType& type); - spv::Id convertGlslangToSpvType(const glslang::TType& type, glslang::TLayoutPacking, const glslang::TQualifier&); - bool filterMember(const glslang::TType& member); - spv::Id convertGlslangStructToSpvType(const glslang::TType&, const glslang::TTypeList* glslangStruct, - glslang::TLayoutPacking, const glslang::TQualifier&); - void decorateStructType(const glslang::TType&, const glslang::TTypeList* glslangStruct, glslang::TLayoutPacking, - const glslang::TQualifier&, spv::Id); - spv::Id makeArraySizeId(const glslang::TArraySizes&, int dim); - spv::Id accessChainLoad(const glslang::TType& type); - void accessChainStore(const glslang::TType& type, spv::Id rvalue); - void multiTypeStore(const glslang::TType&, spv::Id rValue); - glslang::TLayoutPacking getExplicitLayout(const glslang::TType& type) const; - int getArrayStride(const glslang::TType& arrayType, glslang::TLayoutPacking, glslang::TLayoutMatrix); - int getMatrixStride(const glslang::TType& matrixType, glslang::TLayoutPacking, glslang::TLayoutMatrix); - void updateMemberOffset(const glslang::TType& structType, const glslang::TType& memberType, int& currentOffset, int& nextOffset, glslang::TLayoutPacking, glslang::TLayoutMatrix); - void declareUseOfStructMember(const glslang::TTypeList& members, int glslangMember); - - bool isShaderEntryPoint(const glslang::TIntermAggregate* node); - bool writableParam(glslang::TStorageQualifier); - bool originalParam(glslang::TStorageQualifier, const glslang::TType&, bool implicitThisParam); - void makeFunctions(const glslang::TIntermSequence&); - void makeGlobalInitializers(const glslang::TIntermSequence&); - void visitFunctions(const glslang::TIntermSequence&); - void handleFunctionEntry(const glslang::TIntermAggregate* node); - void translateArguments(const glslang::TIntermAggregate& node, std::vector& arguments); - void translateArguments(glslang::TIntermUnary& node, std::vector& arguments); - spv::Id createImageTextureFunctionCall(glslang::TIntermOperator* node); - spv::Id handleUserFunctionCall(const glslang::TIntermAggregate*); - - spv::Id createBinaryOperation(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id left, spv::Id right, glslang::TBasicType typeProxy, bool reduceComparison = true); - spv::Id createBinaryMatrixOperation(spv::Op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id left, spv::Id right); - spv::Id createUnaryOperation(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id operand,glslang::TBasicType typeProxy); - spv::Id createUnaryMatrixOperation(spv::Op op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id operand,glslang::TBasicType typeProxy); - spv::Id createConversion(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id destTypeId, spv::Id operand, glslang::TBasicType typeProxy); - spv::Id makeSmearedConstant(spv::Id constant, int vectorSize); - spv::Id createAtomicOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy); - spv::Id createInvocationsOperation(glslang::TOperator op, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy); - spv::Id CreateInvocationsVectorOperation(spv::Op op, spv::GroupOperation groupOperation, spv::Id typeId, std::vector& operands); - spv::Id createMiscOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy); - spv::Id createNoArgOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId); - spv::Id getSymbolId(const glslang::TIntermSymbol* node); - void addDecoration(spv::Id id, spv::Decoration dec); - void addDecoration(spv::Id id, spv::Decoration dec, unsigned value); - void addMemberDecoration(spv::Id id, int member, spv::Decoration dec); - void addMemberDecoration(spv::Id id, int member, spv::Decoration dec, unsigned value); - spv::Id createSpvConstant(const glslang::TIntermTyped&); - spv::Id createSpvConstantFromConstUnionArray(const glslang::TType& type, const glslang::TConstUnionArray&, int& nextConst, bool specConstant); - bool isTrivialLeaf(const glslang::TIntermTyped* node); - bool isTrivial(const glslang::TIntermTyped* node); - spv::Id createShortCircuit(glslang::TOperator, glslang::TIntermTyped& left, glslang::TIntermTyped& right); -#ifdef AMD_EXTENSIONS - spv::Id getExtBuiltins(const char* name); -#endif - - glslang::SpvOptions& options; - spv::Function* shaderEntry; - spv::Function* currentFunction; - spv::Instruction* entryPoint; - int sequenceDepth; - - spv::SpvBuildLogger* logger; - - // There is a 1:1 mapping between a spv builder and a module; this is thread safe - spv::Builder builder; - bool inEntryPoint; - bool entryPointTerminated; - bool linkageOnly; // true when visiting the set of objects in the AST present only for establishing interface, whether or not they were statically used - std::set iOSet; // all input/output variables from either static use or declaration of interface - const glslang::TIntermediate* glslangIntermediate; - spv::Id stdBuiltins; - std::unordered_map extBuiltinMap; - - std::unordered_map symbolValues; - std::unordered_set rValueParameters; // set of formal function parameters passed as rValues, rather than a pointer - std::unordered_map functionMap; - std::unordered_map structMap[glslang::ElpCount][glslang::ElmCount]; - std::unordered_map > memberRemapper; // for mapping glslang block indices to spv indices (e.g., due to hidden members) - std::stack breakForLoop; // false means break for switch -}; - -// -// Helper functions for translating glslang representations to SPIR-V enumerants. -// - -// Translate glslang profile to SPIR-V source language. -spv::SourceLanguage TranslateSourceLanguage(glslang::EShSource source, EProfile profile) -{ - switch (source) { - case glslang::EShSourceGlsl: - switch (profile) { - case ENoProfile: - case ECoreProfile: - case ECompatibilityProfile: - return spv::SourceLanguageGLSL; - case EEsProfile: - return spv::SourceLanguageESSL; - default: - return spv::SourceLanguageUnknown; - } - case glslang::EShSourceHlsl: - return spv::SourceLanguageHLSL; - default: - return spv::SourceLanguageUnknown; - } -} - -// Translate glslang language (stage) to SPIR-V execution model. -spv::ExecutionModel TranslateExecutionModel(EShLanguage stage) -{ - switch (stage) { - case EShLangVertex: return spv::ExecutionModelVertex; - case EShLangTessControl: return spv::ExecutionModelTessellationControl; - case EShLangTessEvaluation: return spv::ExecutionModelTessellationEvaluation; - case EShLangGeometry: return spv::ExecutionModelGeometry; - case EShLangFragment: return spv::ExecutionModelFragment; - case EShLangCompute: return spv::ExecutionModelGLCompute; - default: - assert(0); - return spv::ExecutionModelFragment; - } -} - -// Translate glslang sampler type to SPIR-V dimensionality. -spv::Dim TranslateDimensionality(const glslang::TSampler& sampler) -{ - switch (sampler.dim) { - case glslang::Esd1D: return spv::Dim1D; - case glslang::Esd2D: return spv::Dim2D; - case glslang::Esd3D: return spv::Dim3D; - case glslang::EsdCube: return spv::DimCube; - case glslang::EsdRect: return spv::DimRect; - case glslang::EsdBuffer: return spv::DimBuffer; - case glslang::EsdSubpass: return spv::DimSubpassData; - default: - assert(0); - return spv::Dim2D; - } -} - -// Translate glslang precision to SPIR-V precision decorations. -spv::Decoration TranslatePrecisionDecoration(glslang::TPrecisionQualifier glslangPrecision) -{ - switch (glslangPrecision) { - case glslang::EpqLow: return spv::DecorationRelaxedPrecision; - case glslang::EpqMedium: return spv::DecorationRelaxedPrecision; - default: - return spv::NoPrecision; - } -} - -// Translate glslang type to SPIR-V precision decorations. -spv::Decoration TranslatePrecisionDecoration(const glslang::TType& type) -{ - return TranslatePrecisionDecoration(type.getQualifier().precision); -} - -// Translate glslang type to SPIR-V block decorations. -spv::Decoration TranslateBlockDecoration(const glslang::TType& type, bool useStorageBuffer) -{ - if (type.getBasicType() == glslang::EbtBlock) { - switch (type.getQualifier().storage) { - case glslang::EvqUniform: return spv::DecorationBlock; - case glslang::EvqBuffer: return useStorageBuffer ? spv::DecorationBlock : spv::DecorationBufferBlock; - case glslang::EvqVaryingIn: return spv::DecorationBlock; - case glslang::EvqVaryingOut: return spv::DecorationBlock; - default: - assert(0); - break; - } - } - - return spv::DecorationMax; -} - -// Translate glslang type to SPIR-V memory decorations. -void TranslateMemoryDecoration(const glslang::TQualifier& qualifier, std::vector& memory) -{ - if (qualifier.coherent) - memory.push_back(spv::DecorationCoherent); - if (qualifier.volatil) - memory.push_back(spv::DecorationVolatile); - if (qualifier.restrict) - memory.push_back(spv::DecorationRestrict); - if (qualifier.readonly) - memory.push_back(spv::DecorationNonWritable); - if (qualifier.writeonly) - memory.push_back(spv::DecorationNonReadable); -} - -// Translate glslang type to SPIR-V layout decorations. -spv::Decoration TranslateLayoutDecoration(const glslang::TType& type, glslang::TLayoutMatrix matrixLayout) -{ - if (type.isMatrix()) { - switch (matrixLayout) { - case glslang::ElmRowMajor: - return spv::DecorationRowMajor; - case glslang::ElmColumnMajor: - return spv::DecorationColMajor; - default: - // opaque layouts don't need a majorness - return spv::DecorationMax; - } - } else { - switch (type.getBasicType()) { - default: - return spv::DecorationMax; - break; - case glslang::EbtBlock: - switch (type.getQualifier().storage) { - case glslang::EvqUniform: - case glslang::EvqBuffer: - switch (type.getQualifier().layoutPacking) { - case glslang::ElpShared: return spv::DecorationGLSLShared; - case glslang::ElpPacked: return spv::DecorationGLSLPacked; - default: - return spv::DecorationMax; - } - case glslang::EvqVaryingIn: - case glslang::EvqVaryingOut: - assert(type.getQualifier().layoutPacking == glslang::ElpNone); - return spv::DecorationMax; - default: - assert(0); - return spv::DecorationMax; - } - } - } -} - -// Translate glslang type to SPIR-V interpolation decorations. -// Returns spv::DecorationMax when no decoration -// should be applied. -spv::Decoration TGlslangToSpvTraverser::TranslateInterpolationDecoration(const glslang::TQualifier& qualifier) -{ - if (qualifier.smooth) - // Smooth decoration doesn't exist in SPIR-V 1.0 - return spv::DecorationMax; - else if (qualifier.nopersp) - return spv::DecorationNoPerspective; - else if (qualifier.flat) - return spv::DecorationFlat; -#ifdef AMD_EXTENSIONS - else if (qualifier.explicitInterp) { - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::DecorationExplicitInterpAMD; - } -#endif - else - return spv::DecorationMax; -} - -// Translate glslang type to SPIR-V auxiliary storage decorations. -// Returns spv::DecorationMax when no decoration -// should be applied. -spv::Decoration TGlslangToSpvTraverser::TranslateAuxiliaryStorageDecoration(const glslang::TQualifier& qualifier) -{ - if (qualifier.patch) - return spv::DecorationPatch; - else if (qualifier.centroid) - return spv::DecorationCentroid; - else if (qualifier.sample) { - builder.addCapability(spv::CapabilitySampleRateShading); - return spv::DecorationSample; - } else - return spv::DecorationMax; -} - -// If glslang type is invariant, return SPIR-V invariant decoration. -spv::Decoration TranslateInvariantDecoration(const glslang::TQualifier& qualifier) -{ - if (qualifier.invariant) - return spv::DecorationInvariant; - else - return spv::DecorationMax; -} - -// If glslang type is noContraction, return SPIR-V NoContraction decoration. -spv::Decoration TranslateNoContractionDecoration(const glslang::TQualifier& qualifier) -{ - if (qualifier.noContraction) - return spv::DecorationNoContraction; - else - return spv::DecorationMax; -} - -// Translate a glslang built-in variable to a SPIR-V built in decoration. Also generate -// associated capabilities when required. For some built-in variables, a capability -// is generated only when using the variable in an executable instruction, but not when -// just declaring a struct member variable with it. This is true for PointSize, -// ClipDistance, and CullDistance. -spv::BuiltIn TGlslangToSpvTraverser::TranslateBuiltInDecoration(glslang::TBuiltInVariable builtIn, bool memberDeclaration) -{ - switch (builtIn) { - case glslang::EbvPointSize: - // Defer adding the capability until the built-in is actually used. - if (! memberDeclaration) { - switch (glslangIntermediate->getStage()) { - case EShLangGeometry: - builder.addCapability(spv::CapabilityGeometryPointSize); - break; - case EShLangTessControl: - case EShLangTessEvaluation: - builder.addCapability(spv::CapabilityTessellationPointSize); - break; - default: - break; - } - } - return spv::BuiltInPointSize; - - // These *Distance capabilities logically belong here, but if the member is declared and - // then never used, consumers of SPIR-V prefer the capability not be declared. - // They are now generated when used, rather than here when declared. - // Potentially, the specification should be more clear what the minimum - // use needed is to trigger the capability. - // - case glslang::EbvClipDistance: - if (!memberDeclaration) - builder.addCapability(spv::CapabilityClipDistance); - return spv::BuiltInClipDistance; - - case glslang::EbvCullDistance: - if (!memberDeclaration) - builder.addCapability(spv::CapabilityCullDistance); - return spv::BuiltInCullDistance; - - case glslang::EbvViewportIndex: - builder.addCapability(spv::CapabilityMultiViewport); - if (glslangIntermediate->getStage() == EShLangVertex || - glslangIntermediate->getStage() == EShLangTessControl || - glslangIntermediate->getStage() == EShLangTessEvaluation) { - - builder.addExtension(spv::E_SPV_EXT_shader_viewport_index_layer); - builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT); - } - return spv::BuiltInViewportIndex; - - case glslang::EbvSampleId: - builder.addCapability(spv::CapabilitySampleRateShading); - return spv::BuiltInSampleId; - - case glslang::EbvSamplePosition: - builder.addCapability(spv::CapabilitySampleRateShading); - return spv::BuiltInSamplePosition; - - case glslang::EbvSampleMask: - return spv::BuiltInSampleMask; - - case glslang::EbvLayer: - builder.addCapability(spv::CapabilityGeometry); - if (glslangIntermediate->getStage() == EShLangVertex || - glslangIntermediate->getStage() == EShLangTessControl || - glslangIntermediate->getStage() == EShLangTessEvaluation) { - - builder.addExtension(spv::E_SPV_EXT_shader_viewport_index_layer); - builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT); - } - return spv::BuiltInLayer; - - case glslang::EbvPosition: return spv::BuiltInPosition; - case glslang::EbvVertexId: return spv::BuiltInVertexId; - case glslang::EbvInstanceId: return spv::BuiltInInstanceId; - case glslang::EbvVertexIndex: return spv::BuiltInVertexIndex; - case glslang::EbvInstanceIndex: return spv::BuiltInInstanceIndex; - - case glslang::EbvBaseVertex: - builder.addExtension(spv::E_SPV_KHR_shader_draw_parameters); - builder.addCapability(spv::CapabilityDrawParameters); - return spv::BuiltInBaseVertex; - - case glslang::EbvBaseInstance: - builder.addExtension(spv::E_SPV_KHR_shader_draw_parameters); - builder.addCapability(spv::CapabilityDrawParameters); - return spv::BuiltInBaseInstance; - - case glslang::EbvDrawId: - builder.addExtension(spv::E_SPV_KHR_shader_draw_parameters); - builder.addCapability(spv::CapabilityDrawParameters); - return spv::BuiltInDrawIndex; - - case glslang::EbvPrimitiveId: - if (glslangIntermediate->getStage() == EShLangFragment) - builder.addCapability(spv::CapabilityGeometry); - return spv::BuiltInPrimitiveId; - - case glslang::EbvFragStencilRef: - builder.addExtension(spv::E_SPV_EXT_shader_stencil_export); - builder.addCapability(spv::CapabilityStencilExportEXT); - return spv::BuiltInFragStencilRefEXT; - - case glslang::EbvInvocationId: return spv::BuiltInInvocationId; - case glslang::EbvTessLevelInner: return spv::BuiltInTessLevelInner; - case glslang::EbvTessLevelOuter: return spv::BuiltInTessLevelOuter; - case glslang::EbvTessCoord: return spv::BuiltInTessCoord; - case glslang::EbvPatchVertices: return spv::BuiltInPatchVertices; - case glslang::EbvFragCoord: return spv::BuiltInFragCoord; - case glslang::EbvPointCoord: return spv::BuiltInPointCoord; - case glslang::EbvFace: return spv::BuiltInFrontFacing; - case glslang::EbvFragDepth: return spv::BuiltInFragDepth; - case glslang::EbvHelperInvocation: return spv::BuiltInHelperInvocation; - case glslang::EbvNumWorkGroups: return spv::BuiltInNumWorkgroups; - case glslang::EbvWorkGroupSize: return spv::BuiltInWorkgroupSize; - case glslang::EbvWorkGroupId: return spv::BuiltInWorkgroupId; - case glslang::EbvLocalInvocationId: return spv::BuiltInLocalInvocationId; - case glslang::EbvLocalInvocationIndex: return spv::BuiltInLocalInvocationIndex; - case glslang::EbvGlobalInvocationId: return spv::BuiltInGlobalInvocationId; - - case glslang::EbvSubGroupSize: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupSize; - - case glslang::EbvSubGroupInvocation: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupLocalInvocationId; - - case glslang::EbvSubGroupEqMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupEqMaskKHR; - - case glslang::EbvSubGroupGeMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupGeMaskKHR; - - case glslang::EbvSubGroupGtMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupGtMaskKHR; - - case glslang::EbvSubGroupLeMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupLeMaskKHR; - - case glslang::EbvSubGroupLtMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupLtMaskKHR; - -#ifdef AMD_EXTENSIONS - case glslang::EbvBaryCoordNoPersp: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordNoPerspAMD; - - case glslang::EbvBaryCoordNoPerspCentroid: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordNoPerspCentroidAMD; - - case glslang::EbvBaryCoordNoPerspSample: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordNoPerspSampleAMD; - - case glslang::EbvBaryCoordSmooth: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordSmoothAMD; - - case glslang::EbvBaryCoordSmoothCentroid: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordSmoothCentroidAMD; - - case glslang::EbvBaryCoordSmoothSample: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordSmoothSampleAMD; - - case glslang::EbvBaryCoordPullModel: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordPullModelAMD; -#endif - - case glslang::EbvDeviceIndex: - builder.addExtension(spv::E_SPV_KHR_device_group); - builder.addCapability(spv::CapabilityDeviceGroup); - return spv::BuiltInDeviceIndex; - - case glslang::EbvViewIndex: - builder.addExtension(spv::E_SPV_KHR_multiview); - builder.addCapability(spv::CapabilityMultiView); - return spv::BuiltInViewIndex; - -#ifdef NV_EXTENSIONS - case glslang::EbvViewportMaskNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NV_viewport_array2); - builder.addCapability(spv::CapabilityShaderViewportMaskNV); - } - return spv::BuiltInViewportMaskNV; - case glslang::EbvSecondaryPositionNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NV_stereo_view_rendering); - builder.addCapability(spv::CapabilityShaderStereoViewNV); - } - return spv::BuiltInSecondaryPositionNV; - case glslang::EbvSecondaryViewportMaskNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NV_stereo_view_rendering); - builder.addCapability(spv::CapabilityShaderStereoViewNV); - } - return spv::BuiltInSecondaryViewportMaskNV; - case glslang::EbvPositionPerViewNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NVX_multiview_per_view_attributes); - builder.addCapability(spv::CapabilityPerViewAttributesNV); - } - return spv::BuiltInPositionPerViewNV; - case glslang::EbvViewportMaskPerViewNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NVX_multiview_per_view_attributes); - builder.addCapability(spv::CapabilityPerViewAttributesNV); - } - return spv::BuiltInViewportMaskPerViewNV; - case glslang::EbvFragFullyCoveredNV: - builder.addExtension(spv::E_SPV_EXT_fragment_fully_covered); - builder.addCapability(spv::CapabilityFragmentFullyCoveredEXT); - return spv::BuiltInFullyCoveredEXT; -#endif - default: - return spv::BuiltInMax; - } -} - -// Translate glslang image layout format to SPIR-V image format. -spv::ImageFormat TGlslangToSpvTraverser::TranslateImageFormat(const glslang::TType& type) -{ - assert(type.getBasicType() == glslang::EbtSampler); - - // Check for capabilities - switch (type.getQualifier().layoutFormat) { - case glslang::ElfRg32f: - case glslang::ElfRg16f: - case glslang::ElfR11fG11fB10f: - case glslang::ElfR16f: - case glslang::ElfRgba16: - case glslang::ElfRgb10A2: - case glslang::ElfRg16: - case glslang::ElfRg8: - case glslang::ElfR16: - case glslang::ElfR8: - case glslang::ElfRgba16Snorm: - case glslang::ElfRg16Snorm: - case glslang::ElfRg8Snorm: - case glslang::ElfR16Snorm: - case glslang::ElfR8Snorm: - - case glslang::ElfRg32i: - case glslang::ElfRg16i: - case glslang::ElfRg8i: - case glslang::ElfR16i: - case glslang::ElfR8i: - - case glslang::ElfRgb10a2ui: - case glslang::ElfRg32ui: - case glslang::ElfRg16ui: - case glslang::ElfRg8ui: - case glslang::ElfR16ui: - case glslang::ElfR8ui: - builder.addCapability(spv::CapabilityStorageImageExtendedFormats); - break; - - default: - break; - } - - // do the translation - switch (type.getQualifier().layoutFormat) { - case glslang::ElfNone: return spv::ImageFormatUnknown; - case glslang::ElfRgba32f: return spv::ImageFormatRgba32f; - case glslang::ElfRgba16f: return spv::ImageFormatRgba16f; - case glslang::ElfR32f: return spv::ImageFormatR32f; - case glslang::ElfRgba8: return spv::ImageFormatRgba8; - case glslang::ElfRgba8Snorm: return spv::ImageFormatRgba8Snorm; - case glslang::ElfRg32f: return spv::ImageFormatRg32f; - case glslang::ElfRg16f: return spv::ImageFormatRg16f; - case glslang::ElfR11fG11fB10f: return spv::ImageFormatR11fG11fB10f; - case glslang::ElfR16f: return spv::ImageFormatR16f; - case glslang::ElfRgba16: return spv::ImageFormatRgba16; - case glslang::ElfRgb10A2: return spv::ImageFormatRgb10A2; - case glslang::ElfRg16: return spv::ImageFormatRg16; - case glslang::ElfRg8: return spv::ImageFormatRg8; - case glslang::ElfR16: return spv::ImageFormatR16; - case glslang::ElfR8: return spv::ImageFormatR8; - case glslang::ElfRgba16Snorm: return spv::ImageFormatRgba16Snorm; - case glslang::ElfRg16Snorm: return spv::ImageFormatRg16Snorm; - case glslang::ElfRg8Snorm: return spv::ImageFormatRg8Snorm; - case glslang::ElfR16Snorm: return spv::ImageFormatR16Snorm; - case glslang::ElfR8Snorm: return spv::ImageFormatR8Snorm; - case glslang::ElfRgba32i: return spv::ImageFormatRgba32i; - case glslang::ElfRgba16i: return spv::ImageFormatRgba16i; - case glslang::ElfRgba8i: return spv::ImageFormatRgba8i; - case glslang::ElfR32i: return spv::ImageFormatR32i; - case glslang::ElfRg32i: return spv::ImageFormatRg32i; - case glslang::ElfRg16i: return spv::ImageFormatRg16i; - case glslang::ElfRg8i: return spv::ImageFormatRg8i; - case glslang::ElfR16i: return spv::ImageFormatR16i; - case glslang::ElfR8i: return spv::ImageFormatR8i; - case glslang::ElfRgba32ui: return spv::ImageFormatRgba32ui; - case glslang::ElfRgba16ui: return spv::ImageFormatRgba16ui; - case glslang::ElfRgba8ui: return spv::ImageFormatRgba8ui; - case glslang::ElfR32ui: return spv::ImageFormatR32ui; - case glslang::ElfRg32ui: return spv::ImageFormatRg32ui; - case glslang::ElfRg16ui: return spv::ImageFormatRg16ui; - case glslang::ElfRgb10a2ui: return spv::ImageFormatRgb10a2ui; - case glslang::ElfRg8ui: return spv::ImageFormatRg8ui; - case glslang::ElfR16ui: return spv::ImageFormatR16ui; - case glslang::ElfR8ui: return spv::ImageFormatR8ui; - default: return spv::ImageFormatMax; - } -} - -spv::SelectionControlMask TGlslangToSpvTraverser::TranslateSelectionControl(const glslang::TIntermSelection& selectionNode) const -{ - if (selectionNode.getFlatten()) - return spv::SelectionControlFlattenMask; - if (selectionNode.getDontFlatten()) - return spv::SelectionControlDontFlattenMask; - return spv::SelectionControlMaskNone; -} - -spv::SelectionControlMask TGlslangToSpvTraverser::TranslateSwitchControl(const glslang::TIntermSwitch& switchNode) const -{ - if (switchNode.getFlatten()) - return spv::SelectionControlFlattenMask; - if (switchNode.getDontFlatten()) - return spv::SelectionControlDontFlattenMask; - return spv::SelectionControlMaskNone; -} - -// return a non-0 dependency if the dependency argument must be set -spv::LoopControlMask TGlslangToSpvTraverser::TranslateLoopControl(const glslang::TIntermLoop& loopNode, - unsigned int& dependencyLength) const -{ - spv::LoopControlMask control = spv::LoopControlMaskNone; - - if (loopNode.getDontUnroll()) - control = control | spv::LoopControlDontUnrollMask; - if (loopNode.getUnroll()) - control = control | spv::LoopControlUnrollMask; - if (loopNode.getLoopDependency() == glslang::TIntermLoop::dependencyInfinite) - control = control | spv::LoopControlDependencyInfiniteMask; - else if (loopNode.getLoopDependency() > 0) { - control = control | spv::LoopControlDependencyLengthMask; - dependencyLength = loopNode.getLoopDependency(); - } - - return control; -} - -// Translate glslang type to SPIR-V storage class. -spv::StorageClass TGlslangToSpvTraverser::TranslateStorageClass(const glslang::TType& type) -{ - if (type.getQualifier().isPipeInput()) - return spv::StorageClassInput; - if (type.getQualifier().isPipeOutput()) - return spv::StorageClassOutput; - - if (glslangIntermediate->getSource() != glslang::EShSourceHlsl || - type.getQualifier().storage == glslang::EvqUniform) { - if (type.getBasicType() == glslang::EbtAtomicUint) - return spv::StorageClassAtomicCounter; - if (type.containsOpaque()) - return spv::StorageClassUniformConstant; - } - - if (glslangIntermediate->usingStorageBuffer() && type.getQualifier().storage == glslang::EvqBuffer) { - builder.addExtension(spv::E_SPV_KHR_storage_buffer_storage_class); - return spv::StorageClassStorageBuffer; - } - - if (type.getQualifier().isUniformOrBuffer()) { - if (type.getQualifier().layoutPushConstant) - return spv::StorageClassPushConstant; - if (type.getBasicType() == glslang::EbtBlock) - return spv::StorageClassUniform; - return spv::StorageClassUniformConstant; - } - - switch (type.getQualifier().storage) { - case glslang::EvqShared: return spv::StorageClassWorkgroup; - case glslang::EvqGlobal: return spv::StorageClassPrivate; - case glslang::EvqConstReadOnly: return spv::StorageClassFunction; - case glslang::EvqTemporary: return spv::StorageClassFunction; - default: - assert(0); - break; - } - - return spv::StorageClassFunction; -} - -// Return whether or not the given type is something that should be tied to a -// descriptor set. -bool IsDescriptorResource(const glslang::TType& type) -{ - // uniform and buffer blocks are included, unless it is a push_constant - if (type.getBasicType() == glslang::EbtBlock) - return type.getQualifier().isUniformOrBuffer() && ! type.getQualifier().layoutPushConstant; - - // non block... - // basically samplerXXX/subpass/sampler/texture are all included - // if they are the global-scope-class, not the function parameter - // (or local, if they ever exist) class. - if (type.getBasicType() == glslang::EbtSampler) - return type.getQualifier().isUniformOrBuffer(); - - // None of the above. - return false; -} - -void InheritQualifiers(glslang::TQualifier& child, const glslang::TQualifier& parent) -{ - if (child.layoutMatrix == glslang::ElmNone) - child.layoutMatrix = parent.layoutMatrix; - - if (parent.invariant) - child.invariant = true; - if (parent.nopersp) - child.nopersp = true; -#ifdef AMD_EXTENSIONS - if (parent.explicitInterp) - child.explicitInterp = true; -#endif - if (parent.flat) - child.flat = true; - if (parent.centroid) - child.centroid = true; - if (parent.patch) - child.patch = true; - if (parent.sample) - child.sample = true; - if (parent.coherent) - child.coherent = true; - if (parent.volatil) - child.volatil = true; - if (parent.restrict) - child.restrict = true; - if (parent.readonly) - child.readonly = true; - if (parent.writeonly) - child.writeonly = true; -} - -bool HasNonLayoutQualifiers(const glslang::TType& type, const glslang::TQualifier& qualifier) -{ - // This should list qualifiers that simultaneous satisfy: - // - struct members might inherit from a struct declaration - // (note that non-block structs don't explicitly inherit, - // only implicitly, meaning no decoration involved) - // - affect decorations on the struct members - // (note smooth does not, and expecting something like volatile - // to effect the whole object) - // - are not part of the offset/st430/etc or row/column-major layout - return qualifier.invariant || (qualifier.hasLocation() && type.getBasicType() == glslang::EbtBlock); -} - -// -// Implement the TGlslangToSpvTraverser class. -// - -TGlslangToSpvTraverser::TGlslangToSpvTraverser(unsigned int spvVersion, const glslang::TIntermediate* glslangIntermediate, - spv::SpvBuildLogger* buildLogger, glslang::SpvOptions& options) - : TIntermTraverser(true, false, true), - options(options), - shaderEntry(nullptr), currentFunction(nullptr), - sequenceDepth(0), logger(buildLogger), - builder(spvVersion, (glslang::GetKhronosToolId() << 16) | glslang::GetSpirvGeneratorVersion(), logger), - inEntryPoint(false), entryPointTerminated(false), linkageOnly(false), - glslangIntermediate(glslangIntermediate) -{ - spv::ExecutionModel executionModel = TranslateExecutionModel(glslangIntermediate->getStage()); - - builder.clearAccessChain(); - builder.setSource(TranslateSourceLanguage(glslangIntermediate->getSource(), glslangIntermediate->getProfile()), - glslangIntermediate->getVersion()); - - if (options.generateDebugInfo) { - builder.setEmitOpLines(); - builder.setSourceFile(glslangIntermediate->getSourceFile()); - - // Set the source shader's text. If for SPV version 1.0, include - // a preamble in comments stating the OpModuleProcessed instructions. - // Otherwise, emit those as actual instructions. - std::string text; - const std::vector& processes = glslangIntermediate->getProcesses(); - for (int p = 0; p < (int)processes.size(); ++p) { - if (glslangIntermediate->getSpv().spv < 0x00010100) { - text.append("// OpModuleProcessed "); - text.append(processes[p]); - text.append("\n"); - } else - builder.addModuleProcessed(processes[p]); - } - if (glslangIntermediate->getSpv().spv < 0x00010100 && (int)processes.size() > 0) - text.append("#line 1\n"); - text.append(glslangIntermediate->getSourceText()); - builder.setSourceText(text); - } - stdBuiltins = builder.import("GLSL.std.450"); - builder.setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); - shaderEntry = builder.makeEntryPoint(glslangIntermediate->getEntryPointName().c_str()); - entryPoint = builder.addEntryPoint(executionModel, shaderEntry, glslangIntermediate->getEntryPointName().c_str()); - - // Add the source extensions - const auto& sourceExtensions = glslangIntermediate->getRequestedExtensions(); - for (auto it = sourceExtensions.begin(); it != sourceExtensions.end(); ++it) - builder.addSourceExtension(it->c_str()); - - // Add the top-level modes for this shader. - - if (glslangIntermediate->getXfbMode()) { - builder.addCapability(spv::CapabilityTransformFeedback); - builder.addExecutionMode(shaderEntry, spv::ExecutionModeXfb); - } - - unsigned int mode; - switch (glslangIntermediate->getStage()) { - case EShLangVertex: - builder.addCapability(spv::CapabilityShader); - break; - - case EShLangTessEvaluation: - case EShLangTessControl: - builder.addCapability(spv::CapabilityTessellation); - - glslang::TLayoutGeometry primitive; - - if (glslangIntermediate->getStage() == EShLangTessControl) { - builder.addExecutionMode(shaderEntry, spv::ExecutionModeOutputVertices, glslangIntermediate->getVertices()); - primitive = glslangIntermediate->getOutputPrimitive(); - } else { - primitive = glslangIntermediate->getInputPrimitive(); - } - - switch (primitive) { - case glslang::ElgTriangles: mode = spv::ExecutionModeTriangles; break; - case glslang::ElgQuads: mode = spv::ExecutionModeQuads; break; - case glslang::ElgIsolines: mode = spv::ExecutionModeIsolines; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - switch (glslangIntermediate->getVertexSpacing()) { - case glslang::EvsEqual: mode = spv::ExecutionModeSpacingEqual; break; - case glslang::EvsFractionalEven: mode = spv::ExecutionModeSpacingFractionalEven; break; - case glslang::EvsFractionalOdd: mode = spv::ExecutionModeSpacingFractionalOdd; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - switch (glslangIntermediate->getVertexOrder()) { - case glslang::EvoCw: mode = spv::ExecutionModeVertexOrderCw; break; - case glslang::EvoCcw: mode = spv::ExecutionModeVertexOrderCcw; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - if (glslangIntermediate->getPointMode()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModePointMode); - break; - - case EShLangGeometry: - builder.addCapability(spv::CapabilityGeometry); - switch (glslangIntermediate->getInputPrimitive()) { - case glslang::ElgPoints: mode = spv::ExecutionModeInputPoints; break; - case glslang::ElgLines: mode = spv::ExecutionModeInputLines; break; - case glslang::ElgLinesAdjacency: mode = spv::ExecutionModeInputLinesAdjacency; break; - case glslang::ElgTriangles: mode = spv::ExecutionModeTriangles; break; - case glslang::ElgTrianglesAdjacency: mode = spv::ExecutionModeInputTrianglesAdjacency; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - builder.addExecutionMode(shaderEntry, spv::ExecutionModeInvocations, glslangIntermediate->getInvocations()); - - switch (glslangIntermediate->getOutputPrimitive()) { - case glslang::ElgPoints: mode = spv::ExecutionModeOutputPoints; break; - case glslang::ElgLineStrip: mode = spv::ExecutionModeOutputLineStrip; break; - case glslang::ElgTriangleStrip: mode = spv::ExecutionModeOutputTriangleStrip; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - builder.addExecutionMode(shaderEntry, spv::ExecutionModeOutputVertices, glslangIntermediate->getVertices()); - break; - - case EShLangFragment: - builder.addCapability(spv::CapabilityShader); - if (glslangIntermediate->getPixelCenterInteger()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModePixelCenterInteger); - - if (glslangIntermediate->getOriginUpperLeft()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModeOriginUpperLeft); - else - builder.addExecutionMode(shaderEntry, spv::ExecutionModeOriginLowerLeft); - - if (glslangIntermediate->getEarlyFragmentTests()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModeEarlyFragmentTests); - - if (glslangIntermediate->getPostDepthCoverage()) { - builder.addCapability(spv::CapabilitySampleMaskPostDepthCoverage); - builder.addExecutionMode(shaderEntry, spv::ExecutionModePostDepthCoverage); - builder.addExtension(spv::E_SPV_KHR_post_depth_coverage); - } - - switch(glslangIntermediate->getDepth()) { - case glslang::EldGreater: mode = spv::ExecutionModeDepthGreater; break; - case glslang::EldLess: mode = spv::ExecutionModeDepthLess; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - if (glslangIntermediate->getDepth() != glslang::EldUnchanged && glslangIntermediate->isDepthReplacing()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModeDepthReplacing); - break; - - case EShLangCompute: - builder.addCapability(spv::CapabilityShader); - builder.addExecutionMode(shaderEntry, spv::ExecutionModeLocalSize, glslangIntermediate->getLocalSize(0), - glslangIntermediate->getLocalSize(1), - glslangIntermediate->getLocalSize(2)); - break; - - default: - break; - } -} - -// Finish creating SPV, after the traversal is complete. -void TGlslangToSpvTraverser::finishSpv() -{ - if (! entryPointTerminated) { - builder.setBuildPoint(shaderEntry->getLastBlock()); - builder.leaveFunction(); - } - - // finish off the entry-point SPV instruction by adding the Input/Output - for (auto it = iOSet.cbegin(); it != iOSet.cend(); ++it) - entryPoint->addIdOperand(*it); - - builder.eliminateDeadDecorations(); -} - -// Write the SPV into 'out'. -void TGlslangToSpvTraverser::dumpSpv(std::vector& out) -{ - builder.dump(out); -} - -// -// Implement the traversal functions. -// -// Return true from interior nodes to have the external traversal -// continue on to children. Return false if children were -// already processed. -// - -// -// Symbols can turn into -// - uniform/input reads -// - output writes -// - complex lvalue base setups: foo.bar[3].... , where we see foo and start up an access chain -// - something simple that degenerates into the last bullet -// -void TGlslangToSpvTraverser::visitSymbol(glslang::TIntermSymbol* symbol) -{ - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (symbol->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - // getSymbolId() will set up all the IO decorations on the first call. - // Formal function parameters were mapped during makeFunctions(). - spv::Id id = getSymbolId(symbol); - - // Include all "static use" and "linkage only" interface variables on the OpEntryPoint instruction - if (builder.isPointer(id)) { - spv::StorageClass sc = builder.getStorageClass(id); - if (sc == spv::StorageClassInput || sc == spv::StorageClassOutput) { - if (!symbol->getType().isStruct() || symbol->getType().getStruct()->size() > 0) - iOSet.insert(id); - } - } - - // Only process non-linkage-only nodes for generating actual static uses - if (! linkageOnly || symbol->getQualifier().isSpecConstant()) { - // Prepare to generate code for the access - - // L-value chains will be computed left to right. We're on the symbol now, - // which is the left-most part of the access chain, so now is "clear" time, - // followed by setting the base. - builder.clearAccessChain(); - - // For now, we consider all user variables as being in memory, so they are pointers, - // except for - // A) R-Value arguments to a function, which are an intermediate object. - // See comments in handleUserFunctionCall(). - // B) Specialization constants (normal constants don't even come in as a variable), - // These are also pure R-values. - glslang::TQualifier qualifier = symbol->getQualifier(); - if (qualifier.isSpecConstant() || rValueParameters.find(symbol->getId()) != rValueParameters.end()) - builder.setAccessChainRValue(id); - else - builder.setAccessChainLValue(id); - } -} - -bool TGlslangToSpvTraverser::visitBinary(glslang::TVisit /* visit */, glslang::TIntermBinary* node) -{ - builder.setLine(node->getLoc().line); - - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (node->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - // First, handle special cases - switch (node->getOp()) { - case glslang::EOpAssign: - case glslang::EOpAddAssign: - case glslang::EOpSubAssign: - case glslang::EOpMulAssign: - case glslang::EOpVectorTimesMatrixAssign: - case glslang::EOpVectorTimesScalarAssign: - case glslang::EOpMatrixTimesScalarAssign: - case glslang::EOpMatrixTimesMatrixAssign: - case glslang::EOpDivAssign: - case glslang::EOpModAssign: - case glslang::EOpAndAssign: - case glslang::EOpInclusiveOrAssign: - case glslang::EOpExclusiveOrAssign: - case glslang::EOpLeftShiftAssign: - case glslang::EOpRightShiftAssign: - // A bin-op assign "a += b" means the same thing as "a = a + b" - // where a is evaluated before b. For a simple assignment, GLSL - // says to evaluate the left before the right. So, always, left - // node then right node. - { - // get the left l-value, save it away - builder.clearAccessChain(); - node->getLeft()->traverse(this); - spv::Builder::AccessChain lValue = builder.getAccessChain(); - - // evaluate the right - builder.clearAccessChain(); - node->getRight()->traverse(this); - spv::Id rValue = accessChainLoad(node->getRight()->getType()); - - if (node->getOp() != glslang::EOpAssign) { - // the left is also an r-value - builder.setAccessChain(lValue); - spv::Id leftRValue = accessChainLoad(node->getLeft()->getType()); - - // do the operation - rValue = createBinaryOperation(node->getOp(), TranslatePrecisionDecoration(node->getOperationPrecision()), - TranslateNoContractionDecoration(node->getType().getQualifier()), - convertGlslangToSpvType(node->getType()), leftRValue, rValue, - node->getType().getBasicType()); - - // these all need their counterparts in createBinaryOperation() - assert(rValue != spv::NoResult); - } - - // store the result - builder.setAccessChain(lValue); - multiTypeStore(node->getType(), rValue); - - // assignments are expressions having an rValue after they are evaluated... - builder.clearAccessChain(); - builder.setAccessChainRValue(rValue); - } - return false; - case glslang::EOpIndexDirect: - case glslang::EOpIndexDirectStruct: - { - // Get the left part of the access chain. - node->getLeft()->traverse(this); - - // Add the next element in the chain - - const int glslangIndex = node->getRight()->getAsConstantUnion()->getConstArray()[0].getIConst(); - if (! node->getLeft()->getType().isArray() && - node->getLeft()->getType().isVector() && - node->getOp() == glslang::EOpIndexDirect) { - // This is essentially a hard-coded vector swizzle of size 1, - // so short circuit the access-chain stuff with a swizzle. - std::vector swizzle; - swizzle.push_back(glslangIndex); - builder.accessChainPushSwizzle(swizzle, convertGlslangToSpvType(node->getLeft()->getType())); - } else { - int spvIndex = glslangIndex; - if (node->getLeft()->getBasicType() == glslang::EbtBlock && - node->getOp() == glslang::EOpIndexDirectStruct) - { - // This may be, e.g., an anonymous block-member selection, which generally need - // index remapping due to hidden members in anonymous blocks. - std::vector& remapper = memberRemapper[node->getLeft()->getType().getStruct()]; - assert(remapper.size() > 0); - spvIndex = remapper[glslangIndex]; - } - - // normal case for indexing array or structure or block - builder.accessChainPush(builder.makeIntConstant(spvIndex)); - - // Add capabilities here for accessing PointSize and clip/cull distance. - // We have deferred generation of associated capabilities until now. - if (node->getLeft()->getType().isStruct() && ! node->getLeft()->getType().isArray()) - declareUseOfStructMember(*(node->getLeft()->getType().getStruct()), glslangIndex); - } - } - return false; - case glslang::EOpIndexIndirect: - { - // Structure or array or vector indirection. - // Will use native SPIR-V access-chain for struct and array indirection; - // matrices are arrays of vectors, so will also work for a matrix. - // Will use the access chain's 'component' for variable index into a vector. - - // This adapter is building access chains left to right. - // Set up the access chain to the left. - node->getLeft()->traverse(this); - - // save it so that computing the right side doesn't trash it - spv::Builder::AccessChain partial = builder.getAccessChain(); - - // compute the next index in the chain - builder.clearAccessChain(); - node->getRight()->traverse(this); - spv::Id index = accessChainLoad(node->getRight()->getType()); - - // restore the saved access chain - builder.setAccessChain(partial); - - if (! node->getLeft()->getType().isArray() && node->getLeft()->getType().isVector()) - builder.accessChainPushComponent(index, convertGlslangToSpvType(node->getLeft()->getType())); - else - builder.accessChainPush(index); - } - return false; - case glslang::EOpVectorSwizzle: - { - node->getLeft()->traverse(this); - std::vector swizzle; - convertSwizzle(*node->getRight()->getAsAggregate(), swizzle); - builder.accessChainPushSwizzle(swizzle, convertGlslangToSpvType(node->getLeft()->getType())); - } - return false; - case glslang::EOpMatrixSwizzle: - logger->missingFunctionality("matrix swizzle"); - return true; - case glslang::EOpLogicalOr: - case glslang::EOpLogicalAnd: - { - - // These may require short circuiting, but can sometimes be done as straight - // binary operations. The right operand must be short circuited if it has - // side effects, and should probably be if it is complex. - if (isTrivial(node->getRight()->getAsTyped())) - break; // handle below as a normal binary operation - // otherwise, we need to do dynamic short circuiting on the right operand - spv::Id result = createShortCircuit(node->getOp(), *node->getLeft()->getAsTyped(), *node->getRight()->getAsTyped()); - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - } - return false; - default: - break; - } - - // Assume generic binary op... - - // get right operand - builder.clearAccessChain(); - node->getLeft()->traverse(this); - spv::Id left = accessChainLoad(node->getLeft()->getType()); - - // get left operand - builder.clearAccessChain(); - node->getRight()->traverse(this); - spv::Id right = accessChainLoad(node->getRight()->getType()); - - // get result - spv::Id result = createBinaryOperation(node->getOp(), TranslatePrecisionDecoration(node->getOperationPrecision()), - TranslateNoContractionDecoration(node->getType().getQualifier()), - convertGlslangToSpvType(node->getType()), left, right, - node->getLeft()->getType().getBasicType()); - - builder.clearAccessChain(); - if (! result) { - logger->missingFunctionality("unknown glslang binary operation"); - return true; // pick up a child as the place-holder result - } else { - builder.setAccessChainRValue(result); - return false; - } -} - -bool TGlslangToSpvTraverser::visitUnary(glslang::TVisit /* visit */, glslang::TIntermUnary* node) -{ - builder.setLine(node->getLoc().line); - - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (node->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - spv::Id result = spv::NoResult; - - // try texturing first - result = createImageTextureFunctionCall(node); - if (result != spv::NoResult) { - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - - return false; // done with this node - } - - // Non-texturing. - - if (node->getOp() == glslang::EOpArrayLength) { - // Quite special; won't want to evaluate the operand. - - // Normal .length() would have been constant folded by the front-end. - // So, this has to be block.lastMember.length(). - // SPV wants "block" and member number as the operands, go get them. - assert(node->getOperand()->getType().isRuntimeSizedArray()); - glslang::TIntermTyped* block = node->getOperand()->getAsBinaryNode()->getLeft(); - block->traverse(this); - unsigned int member = node->getOperand()->getAsBinaryNode()->getRight()->getAsConstantUnion()->getConstArray()[0].getUConst(); - spv::Id length = builder.createArrayLength(builder.accessChainGetLValue(), member); - - builder.clearAccessChain(); - builder.setAccessChainRValue(length); - - return false; - } - - // Start by evaluating the operand - - // Does it need a swizzle inversion? If so, evaluation is inverted; - // operate first on the swizzle base, then apply the swizzle. - spv::Id invertedType = spv::NoType; - auto resultType = [&invertedType, &node, this](){ return invertedType != spv::NoType ? invertedType : convertGlslangToSpvType(node->getType()); }; - if (node->getOp() == glslang::EOpInterpolateAtCentroid) - invertedType = getInvertedSwizzleType(*node->getOperand()); - - builder.clearAccessChain(); - if (invertedType != spv::NoType) - node->getOperand()->getAsBinaryNode()->getLeft()->traverse(this); - else - node->getOperand()->traverse(this); - - spv::Id operand = spv::NoResult; - - if (node->getOp() == glslang::EOpAtomicCounterIncrement || - node->getOp() == glslang::EOpAtomicCounterDecrement || - node->getOp() == glslang::EOpAtomicCounter || - node->getOp() == glslang::EOpInterpolateAtCentroid) - operand = builder.accessChainGetLValue(); // Special case l-value operands - else - operand = accessChainLoad(node->getOperand()->getType()); - - spv::Decoration precision = TranslatePrecisionDecoration(node->getOperationPrecision()); - spv::Decoration noContraction = TranslateNoContractionDecoration(node->getType().getQualifier()); - - // it could be a conversion - if (! result) - result = createConversion(node->getOp(), precision, noContraction, resultType(), operand, node->getOperand()->getBasicType()); - - // if not, then possibly an operation - if (! result) - result = createUnaryOperation(node->getOp(), precision, noContraction, resultType(), operand, node->getOperand()->getBasicType()); - - if (result) { - if (invertedType) - result = createInvertedSwizzle(precision, *node->getOperand(), result); - - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - - return false; // done with this node - } - - // it must be a special case, check... - switch (node->getOp()) { - case glslang::EOpPostIncrement: - case glslang::EOpPostDecrement: - case glslang::EOpPreIncrement: - case glslang::EOpPreDecrement: - { - // we need the integer value "1" or the floating point "1.0" to add/subtract - spv::Id one = 0; - if (node->getBasicType() == glslang::EbtFloat) - one = builder.makeFloatConstant(1.0F); - else if (node->getBasicType() == glslang::EbtDouble) - one = builder.makeDoubleConstant(1.0); -#ifdef AMD_EXTENSIONS - else if (node->getBasicType() == glslang::EbtFloat16) - one = builder.makeFloat16Constant(1.0F); -#endif - else if (node->getBasicType() == glslang::EbtInt64 || node->getBasicType() == glslang::EbtUint64) - one = builder.makeInt64Constant(1); -#ifdef AMD_EXTENSIONS - else if (node->getBasicType() == glslang::EbtInt16 || node->getBasicType() == glslang::EbtUint16) - one = builder.makeInt16Constant(1); -#endif - else - one = builder.makeIntConstant(1); - glslang::TOperator op; - if (node->getOp() == glslang::EOpPreIncrement || - node->getOp() == glslang::EOpPostIncrement) - op = glslang::EOpAdd; - else - op = glslang::EOpSub; - - spv::Id result = createBinaryOperation(op, precision, - TranslateNoContractionDecoration(node->getType().getQualifier()), - convertGlslangToSpvType(node->getType()), operand, one, - node->getType().getBasicType()); - assert(result != spv::NoResult); - - // The result of operation is always stored, but conditionally the - // consumed result. The consumed result is always an r-value. - builder.accessChainStore(result); - builder.clearAccessChain(); - if (node->getOp() == glslang::EOpPreIncrement || - node->getOp() == glslang::EOpPreDecrement) - builder.setAccessChainRValue(result); - else - builder.setAccessChainRValue(operand); - } - - return false; - - case glslang::EOpEmitStreamVertex: - builder.createNoResultOp(spv::OpEmitStreamVertex, operand); - return false; - case glslang::EOpEndStreamPrimitive: - builder.createNoResultOp(spv::OpEndStreamPrimitive, operand); - return false; - - default: - logger->missingFunctionality("unknown glslang unary"); - return true; // pick up operand as placeholder result - } -} - -bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TIntermAggregate* node) -{ - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (node->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - spv::Id result = spv::NoResult; - spv::Id invertedType = spv::NoType; // to use to override the natural type of the node - auto resultType = [&invertedType, &node, this](){ return invertedType != spv::NoType ? invertedType : convertGlslangToSpvType(node->getType()); }; - - // try texturing - result = createImageTextureFunctionCall(node); - if (result != spv::NoResult) { - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - - return false; -#ifdef AMD_EXTENSIONS - } else if (node->getOp() == glslang::EOpImageStore || node->getOp() == glslang::EOpImageStoreLod) { -#else - } else if (node->getOp() == glslang::EOpImageStore) { -#endif - // "imageStore" is a special case, which has no result - return false; - } - - glslang::TOperator binOp = glslang::EOpNull; - bool reduceComparison = true; - bool isMatrix = false; - bool noReturnValue = false; - bool atomic = false; - - assert(node->getOp()); - - spv::Decoration precision = TranslatePrecisionDecoration(node->getOperationPrecision()); - - switch (node->getOp()) { - case glslang::EOpSequence: - { - if (preVisit) - ++sequenceDepth; - else - --sequenceDepth; - - if (sequenceDepth == 1) { - // If this is the parent node of all the functions, we want to see them - // early, so all call points have actual SPIR-V functions to reference. - // In all cases, still let the traverser visit the children for us. - makeFunctions(node->getAsAggregate()->getSequence()); - - // Also, we want all globals initializers to go into the beginning of the entry point, before - // anything else gets there, so visit out of order, doing them all now. - makeGlobalInitializers(node->getAsAggregate()->getSequence()); - - // Initializers are done, don't want to visit again, but functions and link objects need to be processed, - // so do them manually. - visitFunctions(node->getAsAggregate()->getSequence()); - - return false; - } - - return true; - } - case glslang::EOpLinkerObjects: - { - if (visit == glslang::EvPreVisit) - linkageOnly = true; - else - linkageOnly = false; - - return true; - } - case glslang::EOpComma: - { - // processing from left to right naturally leaves the right-most - // lying around in the access chain - glslang::TIntermSequence& glslangOperands = node->getSequence(); - for (int i = 0; i < (int)glslangOperands.size(); ++i) - glslangOperands[i]->traverse(this); - - return false; - } - case glslang::EOpFunction: - if (visit == glslang::EvPreVisit) { - if (isShaderEntryPoint(node)) { - inEntryPoint = true; - builder.setBuildPoint(shaderEntry->getLastBlock()); - currentFunction = shaderEntry; - } else { - handleFunctionEntry(node); - } - } else { - if (inEntryPoint) - entryPointTerminated = true; - builder.leaveFunction(); - inEntryPoint = false; - } - - return true; - case glslang::EOpParameters: - // Parameters will have been consumed by EOpFunction processing, but not - // the body, so we still visited the function node's children, making this - // child redundant. - return false; - case glslang::EOpFunctionCall: - { - builder.setLine(node->getLoc().line); - if (node->isUserDefined()) - result = handleUserFunctionCall(node); - // assert(result); // this can happen for bad shaders because the call graph completeness checking is not yet done - if (result) { - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - } else - logger->missingFunctionality("missing user function; linker needs to catch that"); - - return false; - } - case glslang::EOpConstructMat2x2: - case glslang::EOpConstructMat2x3: - case glslang::EOpConstructMat2x4: - case glslang::EOpConstructMat3x2: - case glslang::EOpConstructMat3x3: - case glslang::EOpConstructMat3x4: - case glslang::EOpConstructMat4x2: - case glslang::EOpConstructMat4x3: - case glslang::EOpConstructMat4x4: - case glslang::EOpConstructDMat2x2: - case glslang::EOpConstructDMat2x3: - case glslang::EOpConstructDMat2x4: - case glslang::EOpConstructDMat3x2: - case glslang::EOpConstructDMat3x3: - case glslang::EOpConstructDMat3x4: - case glslang::EOpConstructDMat4x2: - case glslang::EOpConstructDMat4x3: - case glslang::EOpConstructDMat4x4: - case glslang::EOpConstructIMat2x2: - case glslang::EOpConstructIMat2x3: - case glslang::EOpConstructIMat2x4: - case glslang::EOpConstructIMat3x2: - case glslang::EOpConstructIMat3x3: - case glslang::EOpConstructIMat3x4: - case glslang::EOpConstructIMat4x2: - case glslang::EOpConstructIMat4x3: - case glslang::EOpConstructIMat4x4: - case glslang::EOpConstructUMat2x2: - case glslang::EOpConstructUMat2x3: - case glslang::EOpConstructUMat2x4: - case glslang::EOpConstructUMat3x2: - case glslang::EOpConstructUMat3x3: - case glslang::EOpConstructUMat3x4: - case glslang::EOpConstructUMat4x2: - case glslang::EOpConstructUMat4x3: - case glslang::EOpConstructUMat4x4: - case glslang::EOpConstructBMat2x2: - case glslang::EOpConstructBMat2x3: - case glslang::EOpConstructBMat2x4: - case glslang::EOpConstructBMat3x2: - case glslang::EOpConstructBMat3x3: - case glslang::EOpConstructBMat3x4: - case glslang::EOpConstructBMat4x2: - case glslang::EOpConstructBMat4x3: - case glslang::EOpConstructBMat4x4: -#ifdef AMD_EXTENSIONS - case glslang::EOpConstructF16Mat2x2: - case glslang::EOpConstructF16Mat2x3: - case glslang::EOpConstructF16Mat2x4: - case glslang::EOpConstructF16Mat3x2: - case glslang::EOpConstructF16Mat3x3: - case glslang::EOpConstructF16Mat3x4: - case glslang::EOpConstructF16Mat4x2: - case glslang::EOpConstructF16Mat4x3: - case glslang::EOpConstructF16Mat4x4: -#endif - isMatrix = true; - // fall through - case glslang::EOpConstructFloat: - case glslang::EOpConstructVec2: - case glslang::EOpConstructVec3: - case glslang::EOpConstructVec4: - case glslang::EOpConstructDouble: - case glslang::EOpConstructDVec2: - case glslang::EOpConstructDVec3: - case glslang::EOpConstructDVec4: -#ifdef AMD_EXTENSIONS - case glslang::EOpConstructFloat16: - case glslang::EOpConstructF16Vec2: - case glslang::EOpConstructF16Vec3: - case glslang::EOpConstructF16Vec4: -#endif - case glslang::EOpConstructBool: - case glslang::EOpConstructBVec2: - case glslang::EOpConstructBVec3: - case glslang::EOpConstructBVec4: - case glslang::EOpConstructInt: - case glslang::EOpConstructIVec2: - case glslang::EOpConstructIVec3: - case glslang::EOpConstructIVec4: - case glslang::EOpConstructUint: - case glslang::EOpConstructUVec2: - case glslang::EOpConstructUVec3: - case glslang::EOpConstructUVec4: - case glslang::EOpConstructInt64: - case glslang::EOpConstructI64Vec2: - case glslang::EOpConstructI64Vec3: - case glslang::EOpConstructI64Vec4: - case glslang::EOpConstructUint64: - case glslang::EOpConstructU64Vec2: - case glslang::EOpConstructU64Vec3: - case glslang::EOpConstructU64Vec4: -#ifdef AMD_EXTENSIONS - case glslang::EOpConstructInt16: - case glslang::EOpConstructI16Vec2: - case glslang::EOpConstructI16Vec3: - case glslang::EOpConstructI16Vec4: - case glslang::EOpConstructUint16: - case glslang::EOpConstructU16Vec2: - case glslang::EOpConstructU16Vec3: - case glslang::EOpConstructU16Vec4: -#endif - case glslang::EOpConstructStruct: - case glslang::EOpConstructTextureSampler: - { - builder.setLine(node->getLoc().line); - std::vector arguments; - translateArguments(*node, arguments); - spv::Id constructed; - if (node->getOp() == glslang::EOpConstructTextureSampler) - constructed = builder.createOp(spv::OpSampledImage, resultType(), arguments); - else if (node->getOp() == glslang::EOpConstructStruct || node->getType().isArray()) { - std::vector constituents; - for (int c = 0; c < (int)arguments.size(); ++c) - constituents.push_back(arguments[c]); - constructed = builder.createCompositeConstruct(resultType(), constituents); - } else if (isMatrix) - constructed = builder.createMatrixConstructor(precision, arguments, resultType()); - else - constructed = builder.createConstructor(precision, arguments, resultType()); - - builder.clearAccessChain(); - builder.setAccessChainRValue(constructed); - - return false; - } - - // These six are component-wise compares with component-wise results. - // Forward on to createBinaryOperation(), requesting a vector result. - case glslang::EOpLessThan: - case glslang::EOpGreaterThan: - case glslang::EOpLessThanEqual: - case glslang::EOpGreaterThanEqual: - case glslang::EOpVectorEqual: - case glslang::EOpVectorNotEqual: - { - // Map the operation to a binary - binOp = node->getOp(); - reduceComparison = false; - switch (node->getOp()) { - case glslang::EOpVectorEqual: binOp = glslang::EOpVectorEqual; break; - case glslang::EOpVectorNotEqual: binOp = glslang::EOpVectorNotEqual; break; - default: binOp = node->getOp(); break; - } - - break; - } - case glslang::EOpMul: - // component-wise matrix multiply - binOp = glslang::EOpMul; - break; - case glslang::EOpOuterProduct: - // two vectors multiplied to make a matrix - binOp = glslang::EOpOuterProduct; - break; - case glslang::EOpDot: - { - // for scalar dot product, use multiply - glslang::TIntermSequence& glslangOperands = node->getSequence(); - if (glslangOperands[0]->getAsTyped()->getVectorSize() == 1) - binOp = glslang::EOpMul; - break; - } - case glslang::EOpMod: - // when an aggregate, this is the floating-point mod built-in function, - // which can be emitted by the one in createBinaryOperation() - binOp = glslang::EOpMod; - break; - case glslang::EOpEmitVertex: - case glslang::EOpEndPrimitive: - case glslang::EOpBarrier: - case glslang::EOpMemoryBarrier: - case glslang::EOpMemoryBarrierAtomicCounter: - case glslang::EOpMemoryBarrierBuffer: - case glslang::EOpMemoryBarrierImage: - case glslang::EOpMemoryBarrierShared: - case glslang::EOpGroupMemoryBarrier: - case glslang::EOpDeviceMemoryBarrier: - case glslang::EOpAllMemoryBarrierWithGroupSync: - case glslang::EOpDeviceMemoryBarrierWithGroupSync: - case glslang::EOpWorkgroupMemoryBarrier: - case glslang::EOpWorkgroupMemoryBarrierWithGroupSync: - noReturnValue = true; - // These all have 0 operands and will naturally finish up in the code below for 0 operands - break; - - case glslang::EOpAtomicAdd: - case glslang::EOpAtomicMin: - case glslang::EOpAtomicMax: - case glslang::EOpAtomicAnd: - case glslang::EOpAtomicOr: - case glslang::EOpAtomicXor: - case glslang::EOpAtomicExchange: - case glslang::EOpAtomicCompSwap: - atomic = true; - break; - - case glslang::EOpAtomicCounterAdd: - case glslang::EOpAtomicCounterSubtract: - case glslang::EOpAtomicCounterMin: - case glslang::EOpAtomicCounterMax: - case glslang::EOpAtomicCounterAnd: - case glslang::EOpAtomicCounterOr: - case glslang::EOpAtomicCounterXor: - case glslang::EOpAtomicCounterExchange: - case glslang::EOpAtomicCounterCompSwap: - builder.addExtension("SPV_KHR_shader_atomic_counter_ops"); - builder.addCapability(spv::CapabilityAtomicStorageOps); - atomic = true; - break; - - default: - break; - } - - // - // See if it maps to a regular operation. - // - if (binOp != glslang::EOpNull) { - glslang::TIntermTyped* left = node->getSequence()[0]->getAsTyped(); - glslang::TIntermTyped* right = node->getSequence()[1]->getAsTyped(); - assert(left && right); - - builder.clearAccessChain(); - left->traverse(this); - spv::Id leftId = accessChainLoad(left->getType()); - - builder.clearAccessChain(); - right->traverse(this); - spv::Id rightId = accessChainLoad(right->getType()); - - builder.setLine(node->getLoc().line); - result = createBinaryOperation(binOp, precision, TranslateNoContractionDecoration(node->getType().getQualifier()), - resultType(), leftId, rightId, - left->getType().getBasicType(), reduceComparison); - - // code above should only make binOp that exists in createBinaryOperation - assert(result != spv::NoResult); - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - - return false; - } - - // - // Create the list of operands. - // - glslang::TIntermSequence& glslangOperands = node->getSequence(); - std::vector operands; - for (int arg = 0; arg < (int)glslangOperands.size(); ++arg) { - // special case l-value operands; there are just a few - bool lvalue = false; - switch (node->getOp()) { - case glslang::EOpFrexp: - case glslang::EOpModf: - if (arg == 1) - lvalue = true; - break; - case glslang::EOpInterpolateAtSample: - case glslang::EOpInterpolateAtOffset: -#ifdef AMD_EXTENSIONS - case glslang::EOpInterpolateAtVertex: -#endif - if (arg == 0) { - lvalue = true; - - // Does it need a swizzle inversion? If so, evaluation is inverted; - // operate first on the swizzle base, then apply the swizzle. - if (glslangOperands[0]->getAsOperator() && - glslangOperands[0]->getAsOperator()->getOp() == glslang::EOpVectorSwizzle) - invertedType = convertGlslangToSpvType(glslangOperands[0]->getAsBinaryNode()->getLeft()->getType()); - } - break; - case glslang::EOpAtomicAdd: - case glslang::EOpAtomicMin: - case glslang::EOpAtomicMax: - case glslang::EOpAtomicAnd: - case glslang::EOpAtomicOr: - case glslang::EOpAtomicXor: - case glslang::EOpAtomicExchange: - case glslang::EOpAtomicCompSwap: - case glslang::EOpAtomicCounterAdd: - case glslang::EOpAtomicCounterSubtract: - case glslang::EOpAtomicCounterMin: - case glslang::EOpAtomicCounterMax: - case glslang::EOpAtomicCounterAnd: - case glslang::EOpAtomicCounterOr: - case glslang::EOpAtomicCounterXor: - case glslang::EOpAtomicCounterExchange: - case glslang::EOpAtomicCounterCompSwap: - if (arg == 0) - lvalue = true; - break; - case glslang::EOpAddCarry: - case glslang::EOpSubBorrow: - if (arg == 2) - lvalue = true; - break; - case glslang::EOpUMulExtended: - case glslang::EOpIMulExtended: - if (arg >= 2) - lvalue = true; - break; - default: - break; - } - builder.clearAccessChain(); - if (invertedType != spv::NoType && arg == 0) - glslangOperands[0]->getAsBinaryNode()->getLeft()->traverse(this); - else - glslangOperands[arg]->traverse(this); - if (lvalue) - operands.push_back(builder.accessChainGetLValue()); - else { - builder.setLine(node->getLoc().line); - operands.push_back(accessChainLoad(glslangOperands[arg]->getAsTyped()->getType())); - } - } - - builder.setLine(node->getLoc().line); - if (atomic) { - // Handle all atomics - result = createAtomicOperation(node->getOp(), precision, resultType(), operands, node->getBasicType()); - } else { - // Pass through to generic operations. - switch (glslangOperands.size()) { - case 0: - result = createNoArgOperation(node->getOp(), precision, resultType()); - break; - case 1: - result = createUnaryOperation( - node->getOp(), precision, - TranslateNoContractionDecoration(node->getType().getQualifier()), - resultType(), operands.front(), - glslangOperands[0]->getAsTyped()->getBasicType()); - break; - default: - result = createMiscOperation(node->getOp(), precision, resultType(), operands, node->getBasicType()); - break; - } - if (invertedType) - result = createInvertedSwizzle(precision, *glslangOperands[0]->getAsBinaryNode(), result); - } - - if (noReturnValue) - return false; - - if (! result) { - logger->missingFunctionality("unknown glslang aggregate"); - return true; // pick up a child as a placeholder operand - } else { - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - return false; - } -} - -// This path handles both if-then-else and ?: -// The if-then-else has a node type of void, while -// ?: has either a void or a non-void node type -// -// Leaving the result, when not void: -// GLSL only has r-values as the result of a :?, but -// if we have an l-value, that can be more efficient if it will -// become the base of a complex r-value expression, because the -// next layer copies r-values into memory to use the access-chain mechanism -bool TGlslangToSpvTraverser::visitSelection(glslang::TVisit /* visit */, glslang::TIntermSelection* node) -{ - // See if it simple and safe to generate OpSelect instead of using control flow. - // Crucially, side effects must be avoided, and there are performance trade-offs. - // Return true if good idea (and safe) for OpSelect, false otherwise. - const auto selectPolicy = [&]() -> bool { - if ((!node->getType().isScalar() && !node->getType().isVector()) || - node->getBasicType() == glslang::EbtVoid) - return false; - - if (node->getTrueBlock() == nullptr || - node->getFalseBlock() == nullptr) - return false; - - assert(node->getType() == node->getTrueBlock() ->getAsTyped()->getType() && - node->getType() == node->getFalseBlock()->getAsTyped()->getType()); - - // return true if a single operand to ? : is okay for OpSelect - const auto operandOkay = [](glslang::TIntermTyped* node) { - return node->getAsSymbolNode() || node->getType().getQualifier().isConstant(); - }; - - return operandOkay(node->getTrueBlock() ->getAsTyped()) && - operandOkay(node->getFalseBlock()->getAsTyped()); - }; - - // Emit OpSelect for this selection. - const auto handleAsOpSelect = [&]() { - node->getCondition()->traverse(this); - spv::Id condition = accessChainLoad(node->getCondition()->getType()); - node->getTrueBlock()->traverse(this); - spv::Id trueValue = accessChainLoad(node->getTrueBlock()->getAsTyped()->getType()); - node->getFalseBlock()->traverse(this); - spv::Id falseValue = accessChainLoad(node->getTrueBlock()->getAsTyped()->getType()); - - builder.setLine(node->getLoc().line); - - // smear condition to vector, if necessary (AST is always scalar) - if (builder.isVector(trueValue)) - condition = builder.smearScalar(spv::NoPrecision, condition, - builder.makeVectorType(builder.makeBoolType(), - builder.getNumComponents(trueValue))); - - spv::Id select = builder.createTriOp(spv::OpSelect, - convertGlslangToSpvType(node->getType()), condition, - trueValue, falseValue); - builder.clearAccessChain(); - builder.setAccessChainRValue(select); - }; - - // Try for OpSelect - - if (selectPolicy()) { - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (node->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - handleAsOpSelect(); - return false; - } - - // Instead, emit control flow... - // Don't handle results as temporaries, because there will be two names - // and better to leave SSA to later passes. - spv::Id result = (node->getBasicType() == glslang::EbtVoid) - ? spv::NoResult - : builder.createVariable(spv::StorageClassFunction, convertGlslangToSpvType(node->getType())); - - // emit the condition before doing anything with selection - node->getCondition()->traverse(this); - - // Selection control: - const spv::SelectionControlMask control = TranslateSelectionControl(*node); - - // make an "if" based on the value created by the condition - spv::Builder::If ifBuilder(accessChainLoad(node->getCondition()->getType()), control, builder); - - // emit the "then" statement - if (node->getTrueBlock() != nullptr) { - node->getTrueBlock()->traverse(this); - if (result != spv::NoResult) - builder.createStore(accessChainLoad(node->getTrueBlock()->getAsTyped()->getType()), result); - } - - if (node->getFalseBlock() != nullptr) { - ifBuilder.makeBeginElse(); - // emit the "else" statement - node->getFalseBlock()->traverse(this); - if (result != spv::NoResult) - builder.createStore(accessChainLoad(node->getFalseBlock()->getAsTyped()->getType()), result); - } - - // finish off the control flow - ifBuilder.makeEndIf(); - - if (result != spv::NoResult) { - // GLSL only has r-values as the result of a :?, but - // if we have an l-value, that can be more efficient if it will - // become the base of a complex r-value expression, because the - // next layer copies r-values into memory to use the access-chain mechanism - builder.clearAccessChain(); - builder.setAccessChainLValue(result); - } - - return false; -} - -bool TGlslangToSpvTraverser::visitSwitch(glslang::TVisit /* visit */, glslang::TIntermSwitch* node) -{ - // emit and get the condition before doing anything with switch - node->getCondition()->traverse(this); - spv::Id selector = accessChainLoad(node->getCondition()->getAsTyped()->getType()); - - // Selection control: - const spv::SelectionControlMask control = TranslateSwitchControl(*node); - - // browse the children to sort out code segments - int defaultSegment = -1; - std::vector codeSegments; - glslang::TIntermSequence& sequence = node->getBody()->getSequence(); - std::vector caseValues; - std::vector valueIndexToSegment(sequence.size()); // note: probably not all are used, it is an overestimate - for (glslang::TIntermSequence::iterator c = sequence.begin(); c != sequence.end(); ++c) { - TIntermNode* child = *c; - if (child->getAsBranchNode() && child->getAsBranchNode()->getFlowOp() == glslang::EOpDefault) - defaultSegment = (int)codeSegments.size(); - else if (child->getAsBranchNode() && child->getAsBranchNode()->getFlowOp() == glslang::EOpCase) { - valueIndexToSegment[caseValues.size()] = (int)codeSegments.size(); - caseValues.push_back(child->getAsBranchNode()->getExpression()->getAsConstantUnion()->getConstArray()[0].getIConst()); - } else - codeSegments.push_back(child); - } - - // handle the case where the last code segment is missing, due to no code - // statements between the last case and the end of the switch statement - if ((caseValues.size() && (int)codeSegments.size() == valueIndexToSegment[caseValues.size() - 1]) || - (int)codeSegments.size() == defaultSegment) - codeSegments.push_back(nullptr); - - // make the switch statement - std::vector segmentBlocks; // returned, as the blocks allocated in the call - builder.makeSwitch(selector, control, (int)codeSegments.size(), caseValues, valueIndexToSegment, defaultSegment, segmentBlocks); - - // emit all the code in the segments - breakForLoop.push(false); - for (unsigned int s = 0; s < codeSegments.size(); ++s) { - builder.nextSwitchSegment(segmentBlocks, s); - if (codeSegments[s]) - codeSegments[s]->traverse(this); - else - builder.addSwitchBreak(); - } - breakForLoop.pop(); - - builder.endSwitch(segmentBlocks); - - return false; -} - -void TGlslangToSpvTraverser::visitConstantUnion(glslang::TIntermConstantUnion* node) -{ - int nextConst = 0; - spv::Id constant = createSpvConstantFromConstUnionArray(node->getType(), node->getConstArray(), nextConst, false); - - builder.clearAccessChain(); - builder.setAccessChainRValue(constant); -} - -bool TGlslangToSpvTraverser::visitLoop(glslang::TVisit /* visit */, glslang::TIntermLoop* node) -{ - auto blocks = builder.makeNewLoop(); - builder.createBranch(&blocks.head); - - // Loop control: - unsigned int dependencyLength = glslang::TIntermLoop::dependencyInfinite; - const spv::LoopControlMask control = TranslateLoopControl(*node, dependencyLength); - - // Spec requires back edges to target header blocks, and every header block - // must dominate its merge block. Make a header block first to ensure these - // conditions are met. By definition, it will contain OpLoopMerge, followed - // by a block-ending branch. But we don't want to put any other body/test - // instructions in it, since the body/test may have arbitrary instructions, - // including merges of its own. - builder.setLine(node->getLoc().line); - builder.setBuildPoint(&blocks.head); - builder.createLoopMerge(&blocks.merge, &blocks.continue_target, control, dependencyLength); - if (node->testFirst() && node->getTest()) { - spv::Block& test = builder.makeNewBlock(); - builder.createBranch(&test); - - builder.setBuildPoint(&test); - node->getTest()->traverse(this); - spv::Id condition = accessChainLoad(node->getTest()->getType()); - builder.createConditionalBranch(condition, &blocks.body, &blocks.merge); - - builder.setBuildPoint(&blocks.body); - breakForLoop.push(true); - if (node->getBody()) - node->getBody()->traverse(this); - builder.createBranch(&blocks.continue_target); - breakForLoop.pop(); - - builder.setBuildPoint(&blocks.continue_target); - if (node->getTerminal()) - node->getTerminal()->traverse(this); - builder.createBranch(&blocks.head); - } else { - builder.setLine(node->getLoc().line); - builder.createBranch(&blocks.body); - - breakForLoop.push(true); - builder.setBuildPoint(&blocks.body); - if (node->getBody()) - node->getBody()->traverse(this); - builder.createBranch(&blocks.continue_target); - breakForLoop.pop(); - - builder.setBuildPoint(&blocks.continue_target); - if (node->getTerminal()) - node->getTerminal()->traverse(this); - if (node->getTest()) { - node->getTest()->traverse(this); - spv::Id condition = - accessChainLoad(node->getTest()->getType()); - builder.createConditionalBranch(condition, &blocks.head, &blocks.merge); - } else { - // TODO: unless there was a break/return/discard instruction - // somewhere in the body, this is an infinite loop, so we should - // issue a warning. - builder.createBranch(&blocks.head); - } - } - builder.setBuildPoint(&blocks.merge); - builder.closeLoop(); - return false; -} - -bool TGlslangToSpvTraverser::visitBranch(glslang::TVisit /* visit */, glslang::TIntermBranch* node) -{ - if (node->getExpression()) - node->getExpression()->traverse(this); - - builder.setLine(node->getLoc().line); - - switch (node->getFlowOp()) { - case glslang::EOpKill: - builder.makeDiscard(); - break; - case glslang::EOpBreak: - if (breakForLoop.top()) - builder.createLoopExit(); - else - builder.addSwitchBreak(); - break; - case glslang::EOpContinue: - builder.createLoopContinue(); - break; - case glslang::EOpReturn: - if (node->getExpression()) { - const glslang::TType& glslangReturnType = node->getExpression()->getType(); - spv::Id returnId = accessChainLoad(glslangReturnType); - if (builder.getTypeId(returnId) != currentFunction->getReturnType()) { - builder.clearAccessChain(); - spv::Id copyId = builder.createVariable(spv::StorageClassFunction, currentFunction->getReturnType()); - builder.setAccessChainLValue(copyId); - multiTypeStore(glslangReturnType, returnId); - returnId = builder.createLoad(copyId); - } - builder.makeReturn(false, returnId); - } else - builder.makeReturn(false); - - builder.clearAccessChain(); - break; - - default: - assert(0); - break; - } - - return false; -} - -spv::Id TGlslangToSpvTraverser::createSpvVariable(const glslang::TIntermSymbol* node) -{ - // First, steer off constants, which are not SPIR-V variables, but - // can still have a mapping to a SPIR-V Id. - // This includes specialization constants. - if (node->getQualifier().isConstant()) { - return createSpvConstant(*node); - } - - // Now, handle actual variables - spv::StorageClass storageClass = TranslateStorageClass(node->getType()); - spv::Id spvType = convertGlslangToSpvType(node->getType()); - -#ifdef AMD_EXTENSIONS - const bool contains16BitType = node->getType().containsBasicType(glslang::EbtFloat16) || - node->getType().containsBasicType(glslang::EbtInt16) || - node->getType().containsBasicType(glslang::EbtUint16); - if (contains16BitType) { - if (storageClass == spv::StorageClassInput || storageClass == spv::StorageClassOutput) { - builder.addExtension(spv::E_SPV_KHR_16bit_storage); - builder.addCapability(spv::CapabilityStorageInputOutput16); - } else if (storageClass == spv::StorageClassPushConstant) { - builder.addExtension(spv::E_SPV_KHR_16bit_storage); - builder.addCapability(spv::CapabilityStoragePushConstant16); - } else if (storageClass == spv::StorageClassUniform) { - builder.addExtension(spv::E_SPV_KHR_16bit_storage); - builder.addCapability(spv::CapabilityStorageUniform16); - if (node->getType().getQualifier().storage == glslang::EvqBuffer) - builder.addCapability(spv::CapabilityStorageUniformBufferBlock16); - } - } -#endif - - const char* name = node->getName().c_str(); - if (glslang::IsAnonymous(name)) - name = ""; - - return builder.createVariable(storageClass, spvType, name); -} - -// Return type Id of the sampled type. -spv::Id TGlslangToSpvTraverser::getSampledType(const glslang::TSampler& sampler) -{ - switch (sampler.type) { - case glslang::EbtFloat: return builder.makeFloatType(32); - case glslang::EbtInt: return builder.makeIntType(32); - case glslang::EbtUint: return builder.makeUintType(32); - default: - assert(0); - return builder.makeFloatType(32); - } -} - -// If node is a swizzle operation, return the type that should be used if -// the swizzle base is first consumed by another operation, before the swizzle -// is applied. -spv::Id TGlslangToSpvTraverser::getInvertedSwizzleType(const glslang::TIntermTyped& node) -{ - if (node.getAsOperator() && - node.getAsOperator()->getOp() == glslang::EOpVectorSwizzle) - return convertGlslangToSpvType(node.getAsBinaryNode()->getLeft()->getType()); - else - return spv::NoType; -} - -// When inverting a swizzle with a parent op, this function -// will apply the swizzle operation to a completed parent operation. -spv::Id TGlslangToSpvTraverser::createInvertedSwizzle(spv::Decoration precision, const glslang::TIntermTyped& node, spv::Id parentResult) -{ - std::vector swizzle; - convertSwizzle(*node.getAsBinaryNode()->getRight()->getAsAggregate(), swizzle); - return builder.createRvalueSwizzle(precision, convertGlslangToSpvType(node.getType()), parentResult, swizzle); -} - -// Convert a glslang AST swizzle node to a swizzle vector for building SPIR-V. -void TGlslangToSpvTraverser::convertSwizzle(const glslang::TIntermAggregate& node, std::vector& swizzle) -{ - const glslang::TIntermSequence& swizzleSequence = node.getSequence(); - for (int i = 0; i < (int)swizzleSequence.size(); ++i) - swizzle.push_back(swizzleSequence[i]->getAsConstantUnion()->getConstArray()[0].getIConst()); -} - -// Convert from a glslang type to an SPV type, by calling into a -// recursive version of this function. This establishes the inherited -// layout state rooted from the top-level type. -spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& type) -{ - return convertGlslangToSpvType(type, getExplicitLayout(type), type.getQualifier()); -} - -// Do full recursive conversion of an arbitrary glslang type to a SPIR-V Id. -// explicitLayout can be kept the same throughout the hierarchical recursive walk. -// Mutually recursive with convertGlslangStructToSpvType(). -spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& type, glslang::TLayoutPacking explicitLayout, const glslang::TQualifier& qualifier) -{ - spv::Id spvType = spv::NoResult; - - switch (type.getBasicType()) { - case glslang::EbtVoid: - spvType = builder.makeVoidType(); - assert (! type.isArray()); - break; - case glslang::EbtFloat: - spvType = builder.makeFloatType(32); - break; - case glslang::EbtDouble: - spvType = builder.makeFloatType(64); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtFloat16: - builder.addExtension(spv::E_SPV_AMD_gpu_shader_half_float); - spvType = builder.makeFloatType(16); - break; -#endif - case glslang::EbtBool: - // "transparent" bool doesn't exist in SPIR-V. The GLSL convention is - // a 32-bit int where non-0 means true. - if (explicitLayout != glslang::ElpNone) - spvType = builder.makeUintType(32); - else - spvType = builder.makeBoolType(); - break; - case glslang::EbtInt: - spvType = builder.makeIntType(32); - break; - case glslang::EbtUint: - spvType = builder.makeUintType(32); - break; - case glslang::EbtInt64: - spvType = builder.makeIntType(64); - break; - case glslang::EbtUint64: - spvType = builder.makeUintType(64); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtInt16: - builder.addExtension(spv::E_SPV_AMD_gpu_shader_int16); - spvType = builder.makeIntType(16); - break; - case glslang::EbtUint16: - builder.addExtension(spv::E_SPV_AMD_gpu_shader_int16); - spvType = builder.makeUintType(16); - break; -#endif - case glslang::EbtAtomicUint: - builder.addCapability(spv::CapabilityAtomicStorage); - spvType = builder.makeUintType(32); - break; - case glslang::EbtSampler: - { - const glslang::TSampler& sampler = type.getSampler(); - if (sampler.sampler) { - // pure sampler - spvType = builder.makeSamplerType(); - } else { - // an image is present, make its type - spvType = builder.makeImageType(getSampledType(sampler), TranslateDimensionality(sampler), sampler.shadow, sampler.arrayed, sampler.ms, - sampler.image ? 2 : 1, TranslateImageFormat(type)); - if (sampler.combined) { - // already has both image and sampler, make the combined type - spvType = builder.makeSampledImageType(spvType); - } - } - } - break; - case glslang::EbtStruct: - case glslang::EbtBlock: - { - // If we've seen this struct type, return it - const glslang::TTypeList* glslangMembers = type.getStruct(); - - // Try to share structs for different layouts, but not yet for other - // kinds of qualification (primarily not yet including interpolant qualification). - if (! HasNonLayoutQualifiers(type, qualifier)) - spvType = structMap[explicitLayout][qualifier.layoutMatrix][glslangMembers]; - if (spvType != spv::NoResult) - break; - - // else, we haven't seen it... - if (type.getBasicType() == glslang::EbtBlock) - memberRemapper[glslangMembers].resize(glslangMembers->size()); - spvType = convertGlslangStructToSpvType(type, glslangMembers, explicitLayout, qualifier); - } - break; - default: - assert(0); - break; - } - - if (type.isMatrix()) - spvType = builder.makeMatrixType(spvType, type.getMatrixCols(), type.getMatrixRows()); - else { - // If this variable has a vector element count greater than 1, create a SPIR-V vector - if (type.getVectorSize() > 1) - spvType = builder.makeVectorType(spvType, type.getVectorSize()); - } - - if (type.isArray()) { - int stride = 0; // keep this 0 unless doing an explicit layout; 0 will mean no decoration, no stride - - // Do all but the outer dimension - if (type.getArraySizes()->getNumDims() > 1) { - // We need to decorate array strides for types needing explicit layout, except blocks. - if (explicitLayout != glslang::ElpNone && type.getBasicType() != glslang::EbtBlock) { - // Use a dummy glslang type for querying internal strides of - // arrays of arrays, but using just a one-dimensional array. - glslang::TType simpleArrayType(type, 0); // deference type of the array - while (simpleArrayType.getArraySizes().getNumDims() > 1) - simpleArrayType.getArraySizes().dereference(); - - // Will compute the higher-order strides here, rather than making a whole - // pile of types and doing repetitive recursion on their contents. - stride = getArrayStride(simpleArrayType, explicitLayout, qualifier.layoutMatrix); - } - - // make the arrays - for (int dim = type.getArraySizes()->getNumDims() - 1; dim > 0; --dim) { - spvType = builder.makeArrayType(spvType, makeArraySizeId(*type.getArraySizes(), dim), stride); - if (stride > 0) - builder.addDecoration(spvType, spv::DecorationArrayStride, stride); - stride *= type.getArraySizes()->getDimSize(dim); - } - } else { - // single-dimensional array, and don't yet have stride - - // We need to decorate array strides for types needing explicit layout, except blocks. - if (explicitLayout != glslang::ElpNone && type.getBasicType() != glslang::EbtBlock) - stride = getArrayStride(type, explicitLayout, qualifier.layoutMatrix); - } - - // Do the outer dimension, which might not be known for a runtime-sized array - if (type.isRuntimeSizedArray()) { - spvType = builder.makeRuntimeArray(spvType); - } else { - assert(type.getOuterArraySize() > 0); - spvType = builder.makeArrayType(spvType, makeArraySizeId(*type.getArraySizes(), 0), stride); - } - if (stride > 0) - builder.addDecoration(spvType, spv::DecorationArrayStride, stride); - } - - return spvType; -} - -// TODO: this functionality should exist at a higher level, in creating the AST -// -// Identify interface members that don't have their required extension turned on. -// -bool TGlslangToSpvTraverser::filterMember(const glslang::TType& member) -{ - auto& extensions = glslangIntermediate->getRequestedExtensions(); - - if (member.getFieldName() == "gl_ViewportMask" && - extensions.find("GL_NV_viewport_array2") == extensions.end()) - return true; - if (member.getFieldName() == "gl_SecondaryViewportMaskNV" && - extensions.find("GL_NV_stereo_view_rendering") == extensions.end()) - return true; - if (member.getFieldName() == "gl_SecondaryPositionNV" && - extensions.find("GL_NV_stereo_view_rendering") == extensions.end()) - return true; - if (member.getFieldName() == "gl_PositionPerViewNV" && - extensions.find("GL_NVX_multiview_per_view_attributes") == extensions.end()) - return true; - if (member.getFieldName() == "gl_ViewportMaskPerViewNV" && - extensions.find("GL_NVX_multiview_per_view_attributes") == extensions.end()) - return true; - - return false; -}; - -// Do full recursive conversion of a glslang structure (or block) type to a SPIR-V Id. -// explicitLayout can be kept the same throughout the hierarchical recursive walk. -// Mutually recursive with convertGlslangToSpvType(). -spv::Id TGlslangToSpvTraverser::convertGlslangStructToSpvType(const glslang::TType& type, - const glslang::TTypeList* glslangMembers, - glslang::TLayoutPacking explicitLayout, - const glslang::TQualifier& qualifier) -{ - // Create a vector of struct types for SPIR-V to consume - std::vector spvMembers; - int memberDelta = 0; // how much the member's index changes from glslang to SPIR-V, normally 0, except sometimes for blocks - for (int i = 0; i < (int)glslangMembers->size(); i++) { - glslang::TType& glslangMember = *(*glslangMembers)[i].type; - if (glslangMember.hiddenMember()) { - ++memberDelta; - if (type.getBasicType() == glslang::EbtBlock) - memberRemapper[glslangMembers][i] = -1; - } else { - if (type.getBasicType() == glslang::EbtBlock) { - memberRemapper[glslangMembers][i] = i - memberDelta; - if (filterMember(glslangMember)) - continue; - } - // modify just this child's view of the qualifier - glslang::TQualifier memberQualifier = glslangMember.getQualifier(); - InheritQualifiers(memberQualifier, qualifier); - - // manually inherit location - if (! memberQualifier.hasLocation() && qualifier.hasLocation()) - memberQualifier.layoutLocation = qualifier.layoutLocation; - - // recurse - spvMembers.push_back(convertGlslangToSpvType(glslangMember, explicitLayout, memberQualifier)); - } - } - - // Make the SPIR-V type - spv::Id spvType = builder.makeStructType(spvMembers, type.getTypeName().c_str()); - if (! HasNonLayoutQualifiers(type, qualifier)) - structMap[explicitLayout][qualifier.layoutMatrix][glslangMembers] = spvType; - - // Decorate it - decorateStructType(type, glslangMembers, explicitLayout, qualifier, spvType); - - return spvType; -} - -void TGlslangToSpvTraverser::decorateStructType(const glslang::TType& type, - const glslang::TTypeList* glslangMembers, - glslang::TLayoutPacking explicitLayout, - const glslang::TQualifier& qualifier, - spv::Id spvType) -{ - // Name and decorate the non-hidden members - int offset = -1; - int locationOffset = 0; // for use within the members of this struct - for (int i = 0; i < (int)glslangMembers->size(); i++) { - glslang::TType& glslangMember = *(*glslangMembers)[i].type; - int member = i; - if (type.getBasicType() == glslang::EbtBlock) { - member = memberRemapper[glslangMembers][i]; - if (filterMember(glslangMember)) - continue; - } - - // modify just this child's view of the qualifier - glslang::TQualifier memberQualifier = glslangMember.getQualifier(); - InheritQualifiers(memberQualifier, qualifier); - - // using -1 above to indicate a hidden member - if (member >= 0) { - builder.addMemberName(spvType, member, glslangMember.getFieldName().c_str()); - addMemberDecoration(spvType, member, TranslateLayoutDecoration(glslangMember, memberQualifier.layoutMatrix)); - addMemberDecoration(spvType, member, TranslatePrecisionDecoration(glslangMember)); - // Add interpolation and auxiliary storage decorations only to top-level members of Input and Output storage classes - if (type.getQualifier().storage == glslang::EvqVaryingIn || - type.getQualifier().storage == glslang::EvqVaryingOut) { - if (type.getBasicType() == glslang::EbtBlock || - glslangIntermediate->getSource() == glslang::EShSourceHlsl) { - addMemberDecoration(spvType, member, TranslateInterpolationDecoration(memberQualifier)); - addMemberDecoration(spvType, member, TranslateAuxiliaryStorageDecoration(memberQualifier)); - } - } - addMemberDecoration(spvType, member, TranslateInvariantDecoration(memberQualifier)); - - if (type.getBasicType() == glslang::EbtBlock && - qualifier.storage == glslang::EvqBuffer) { - // Add memory decorations only to top-level members of shader storage block - std::vector memory; - TranslateMemoryDecoration(memberQualifier, memory); - for (unsigned int i = 0; i < memory.size(); ++i) - addMemberDecoration(spvType, member, memory[i]); - } - - // Location assignment was already completed correctly by the front end, - // just track whether a member needs to be decorated. - // Ignore member locations if the container is an array, as that's - // ill-specified and decisions have been made to not allow this. - if (! type.isArray() && memberQualifier.hasLocation()) - builder.addMemberDecoration(spvType, member, spv::DecorationLocation, memberQualifier.layoutLocation); - - if (qualifier.hasLocation()) // track for upcoming inheritance - locationOffset += glslangIntermediate->computeTypeLocationSize(glslangMember); - - // component, XFB, others - if (glslangMember.getQualifier().hasComponent()) - builder.addMemberDecoration(spvType, member, spv::DecorationComponent, glslangMember.getQualifier().layoutComponent); - if (glslangMember.getQualifier().hasXfbOffset()) - builder.addMemberDecoration(spvType, member, spv::DecorationOffset, glslangMember.getQualifier().layoutXfbOffset); - else if (explicitLayout != glslang::ElpNone) { - // figure out what to do with offset, which is accumulating - int nextOffset; - updateMemberOffset(type, glslangMember, offset, nextOffset, explicitLayout, memberQualifier.layoutMatrix); - if (offset >= 0) - builder.addMemberDecoration(spvType, member, spv::DecorationOffset, offset); - offset = nextOffset; - } - - if (glslangMember.isMatrix() && explicitLayout != glslang::ElpNone) - builder.addMemberDecoration(spvType, member, spv::DecorationMatrixStride, getMatrixStride(glslangMember, explicitLayout, memberQualifier.layoutMatrix)); - - // built-in variable decorations - spv::BuiltIn builtIn = TranslateBuiltInDecoration(glslangMember.getQualifier().builtIn, true); - if (builtIn != spv::BuiltInMax) - addMemberDecoration(spvType, member, spv::DecorationBuiltIn, (int)builtIn); - -#ifdef NV_EXTENSIONS - if (builtIn == spv::BuiltInLayer) { - // SPV_NV_viewport_array2 extension - if (glslangMember.getQualifier().layoutViewportRelative){ - addMemberDecoration(spvType, member, (spv::Decoration)spv::DecorationViewportRelativeNV); - builder.addCapability(spv::CapabilityShaderViewportMaskNV); - builder.addExtension(spv::E_SPV_NV_viewport_array2); - } - if (glslangMember.getQualifier().layoutSecondaryViewportRelativeOffset != -2048){ - addMemberDecoration(spvType, member, (spv::Decoration)spv::DecorationSecondaryViewportRelativeNV, glslangMember.getQualifier().layoutSecondaryViewportRelativeOffset); - builder.addCapability(spv::CapabilityShaderStereoViewNV); - builder.addExtension(spv::E_SPV_NV_stereo_view_rendering); - } - } - if (glslangMember.getQualifier().layoutPassthrough) { - addMemberDecoration(spvType, member, (spv::Decoration)spv::DecorationPassthroughNV); - builder.addCapability(spv::CapabilityGeometryShaderPassthroughNV); - builder.addExtension(spv::E_SPV_NV_geometry_shader_passthrough); - } -#endif - } - } - - // Decorate the structure - addDecoration(spvType, TranslateLayoutDecoration(type, qualifier.layoutMatrix)); - addDecoration(spvType, TranslateBlockDecoration(type, glslangIntermediate->usingStorageBuffer())); - if (type.getQualifier().hasStream() && glslangIntermediate->isMultiStream()) { - builder.addCapability(spv::CapabilityGeometryStreams); - builder.addDecoration(spvType, spv::DecorationStream, type.getQualifier().layoutStream); - } -} - -// Turn the expression forming the array size into an id. -// This is not quite trivial, because of specialization constants. -// Sometimes, a raw constant is turned into an Id, and sometimes -// a specialization constant expression is. -spv::Id TGlslangToSpvTraverser::makeArraySizeId(const glslang::TArraySizes& arraySizes, int dim) -{ - // First, see if this is sized with a node, meaning a specialization constant: - glslang::TIntermTyped* specNode = arraySizes.getDimNode(dim); - if (specNode != nullptr) { - builder.clearAccessChain(); - specNode->traverse(this); - return accessChainLoad(specNode->getAsTyped()->getType()); - } - - // Otherwise, need a compile-time (front end) size, get it: - int size = arraySizes.getDimSize(dim); - assert(size > 0); - return builder.makeUintConstant(size); -} - -// Wrap the builder's accessChainLoad to: -// - localize handling of RelaxedPrecision -// - use the SPIR-V inferred type instead of another conversion of the glslang type -// (avoids unnecessary work and possible type punning for structures) -// - do conversion of concrete to abstract type -spv::Id TGlslangToSpvTraverser::accessChainLoad(const glslang::TType& type) -{ - spv::Id nominalTypeId = builder.accessChainGetInferredType(); - spv::Id loadedId = builder.accessChainLoad(TranslatePrecisionDecoration(type), nominalTypeId); - - // Need to convert to abstract types when necessary - if (type.getBasicType() == glslang::EbtBool) { - if (builder.isScalarType(nominalTypeId)) { - // Conversion for bool - spv::Id boolType = builder.makeBoolType(); - if (nominalTypeId != boolType) - loadedId = builder.createBinOp(spv::OpINotEqual, boolType, loadedId, builder.makeUintConstant(0)); - } else if (builder.isVectorType(nominalTypeId)) { - // Conversion for bvec - int vecSize = builder.getNumTypeComponents(nominalTypeId); - spv::Id bvecType = builder.makeVectorType(builder.makeBoolType(), vecSize); - if (nominalTypeId != bvecType) - loadedId = builder.createBinOp(spv::OpINotEqual, bvecType, loadedId, makeSmearedConstant(builder.makeUintConstant(0), vecSize)); - } - } - - return loadedId; -} - -// Wrap the builder's accessChainStore to: -// - do conversion of concrete to abstract type -// -// Implicitly uses the existing builder.accessChain as the storage target. -void TGlslangToSpvTraverser::accessChainStore(const glslang::TType& type, spv::Id rvalue) -{ - // Need to convert to abstract types when necessary - if (type.getBasicType() == glslang::EbtBool) { - spv::Id nominalTypeId = builder.accessChainGetInferredType(); - - if (builder.isScalarType(nominalTypeId)) { - // Conversion for bool - spv::Id boolType = builder.makeBoolType(); - if (nominalTypeId != boolType) { - // keep these outside arguments, for determinant order-of-evaluation - spv::Id one = builder.makeUintConstant(1); - spv::Id zero = builder.makeUintConstant(0); - rvalue = builder.createTriOp(spv::OpSelect, nominalTypeId, rvalue, one, zero); - } else if (builder.getTypeId(rvalue) != boolType) - rvalue = builder.createBinOp(spv::OpINotEqual, boolType, rvalue, builder.makeUintConstant(0)); - } else if (builder.isVectorType(nominalTypeId)) { - // Conversion for bvec - int vecSize = builder.getNumTypeComponents(nominalTypeId); - spv::Id bvecType = builder.makeVectorType(builder.makeBoolType(), vecSize); - if (nominalTypeId != bvecType) { - // keep these outside arguments, for determinant order-of-evaluation - spv::Id one = makeSmearedConstant(builder.makeUintConstant(1), vecSize); - spv::Id zero = makeSmearedConstant(builder.makeUintConstant(0), vecSize); - rvalue = builder.createTriOp(spv::OpSelect, nominalTypeId, rvalue, one, zero); - } else if (builder.getTypeId(rvalue) != bvecType) - rvalue = builder.createBinOp(spv::OpINotEqual, bvecType, rvalue, - makeSmearedConstant(builder.makeUintConstant(0), vecSize)); - } - } - - builder.accessChainStore(rvalue); -} - -// For storing when types match at the glslang level, but not might match at the -// SPIR-V level. -// -// This especially happens when a single glslang type expands to multiple -// SPIR-V types, like a struct that is used in a member-undecorated way as well -// as in a member-decorated way. -// -// NOTE: This function can handle any store request; if it's not special it -// simplifies to a simple OpStore. -// -// Implicitly uses the existing builder.accessChain as the storage target. -void TGlslangToSpvTraverser::multiTypeStore(const glslang::TType& type, spv::Id rValue) -{ - // we only do the complex path here if it's an aggregate - if (! type.isStruct() && ! type.isArray()) { - accessChainStore(type, rValue); - return; - } - - // and, it has to be a case of type aliasing - spv::Id rType = builder.getTypeId(rValue); - spv::Id lValue = builder.accessChainGetLValue(); - spv::Id lType = builder.getContainedTypeId(builder.getTypeId(lValue)); - if (lType == rType) { - accessChainStore(type, rValue); - return; - } - - // Recursively (as needed) copy an aggregate type to a different aggregate type, - // where the two types were the same type in GLSL. This requires member - // by member copy, recursively. - - // If an array, copy element by element. - if (type.isArray()) { - glslang::TType glslangElementType(type, 0); - spv::Id elementRType = builder.getContainedTypeId(rType); - for (int index = 0; index < type.getOuterArraySize(); ++index) { - // get the source member - spv::Id elementRValue = builder.createCompositeExtract(rValue, elementRType, index); - - // set up the target storage - builder.clearAccessChain(); - builder.setAccessChainLValue(lValue); - builder.accessChainPush(builder.makeIntConstant(index)); - - // store the member - multiTypeStore(glslangElementType, elementRValue); - } - } else { - assert(type.isStruct()); - - // loop over structure members - const glslang::TTypeList& members = *type.getStruct(); - for (int m = 0; m < (int)members.size(); ++m) { - const glslang::TType& glslangMemberType = *members[m].type; - - // get the source member - spv::Id memberRType = builder.getContainedTypeId(rType, m); - spv::Id memberRValue = builder.createCompositeExtract(rValue, memberRType, m); - - // set up the target storage - builder.clearAccessChain(); - builder.setAccessChainLValue(lValue); - builder.accessChainPush(builder.makeIntConstant(m)); - - // store the member - multiTypeStore(glslangMemberType, memberRValue); - } - } -} - -// Decide whether or not this type should be -// decorated with offsets and strides, and if so -// whether std140 or std430 rules should be applied. -glslang::TLayoutPacking TGlslangToSpvTraverser::getExplicitLayout(const glslang::TType& type) const -{ - // has to be a block - if (type.getBasicType() != glslang::EbtBlock) - return glslang::ElpNone; - - // has to be a uniform or buffer block - if (type.getQualifier().storage != glslang::EvqUniform && - type.getQualifier().storage != glslang::EvqBuffer) - return glslang::ElpNone; - - // return the layout to use - switch (type.getQualifier().layoutPacking) { - case glslang::ElpStd140: - case glslang::ElpStd430: - return type.getQualifier().layoutPacking; - default: - return glslang::ElpNone; - } -} - -// Given an array type, returns the integer stride required for that array -int TGlslangToSpvTraverser::getArrayStride(const glslang::TType& arrayType, glslang::TLayoutPacking explicitLayout, glslang::TLayoutMatrix matrixLayout) -{ - int size; - int stride; - glslangIntermediate->getBaseAlignment(arrayType, size, stride, explicitLayout == glslang::ElpStd140, matrixLayout == glslang::ElmRowMajor); - - return stride; -} - -// Given a matrix type, or array (of array) of matrixes type, returns the integer stride required for that matrix -// when used as a member of an interface block -int TGlslangToSpvTraverser::getMatrixStride(const glslang::TType& matrixType, glslang::TLayoutPacking explicitLayout, glslang::TLayoutMatrix matrixLayout) -{ - glslang::TType elementType; - elementType.shallowCopy(matrixType); - elementType.clearArraySizes(); - - int size; - int stride; - glslangIntermediate->getBaseAlignment(elementType, size, stride, explicitLayout == glslang::ElpStd140, matrixLayout == glslang::ElmRowMajor); - - return stride; -} - -// Given a member type of a struct, realign the current offset for it, and compute -// the next (not yet aligned) offset for the next member, which will get aligned -// on the next call. -// 'currentOffset' should be passed in already initialized, ready to modify, and reflecting -// the migration of data from nextOffset -> currentOffset. It should be -1 on the first call. -// -1 means a non-forced member offset (no decoration needed). -void TGlslangToSpvTraverser::updateMemberOffset(const glslang::TType& structType, const glslang::TType& memberType, int& currentOffset, int& nextOffset, - glslang::TLayoutPacking explicitLayout, glslang::TLayoutMatrix matrixLayout) -{ - // this will get a positive value when deemed necessary - nextOffset = -1; - - // override anything in currentOffset with user-set offset - if (memberType.getQualifier().hasOffset()) - currentOffset = memberType.getQualifier().layoutOffset; - - // It could be that current linker usage in glslang updated all the layoutOffset, - // in which case the following code does not matter. But, that's not quite right - // once cross-compilation unit GLSL validation is done, as the original user - // settings are needed in layoutOffset, and then the following will come into play. - - if (explicitLayout == glslang::ElpNone) { - if (! memberType.getQualifier().hasOffset()) - currentOffset = -1; - - return; - } - - // Getting this far means we need explicit offsets - if (currentOffset < 0) - currentOffset = 0; - - // Now, currentOffset is valid (either 0, or from a previous nextOffset), - // but possibly not yet correctly aligned. - - int memberSize; - int dummyStride; - int memberAlignment = glslangIntermediate->getBaseAlignment(memberType, memberSize, dummyStride, explicitLayout == glslang::ElpStd140, matrixLayout == glslang::ElmRowMajor); - - // Adjust alignment for HLSL rules - // TODO: make this consistent in early phases of code: - // adjusting this late means inconsistencies with earlier code, which for reflection is an issue - // Until reflection is brought in sync with these adjustments, don't apply to $Global, - // which is the most likely to rely on reflection, and least likely to rely implicit layouts - if (glslangIntermediate->usingHlslOFfsets() && - ! memberType.isArray() && memberType.isVector() && structType.getTypeName().compare("$Global") != 0) { - int dummySize; - int componentAlignment = glslangIntermediate->getBaseAlignmentScalar(memberType, dummySize); - if (componentAlignment <= 4) - memberAlignment = componentAlignment; - } - - // Bump up to member alignment - glslang::RoundToPow2(currentOffset, memberAlignment); - - // Bump up to vec4 if there is a bad straddle - if (glslangIntermediate->improperStraddle(memberType, memberSize, currentOffset)) - glslang::RoundToPow2(currentOffset, 16); - - nextOffset = currentOffset + memberSize; -} - -void TGlslangToSpvTraverser::declareUseOfStructMember(const glslang::TTypeList& members, int glslangMember) -{ - const glslang::TBuiltInVariable glslangBuiltIn = members[glslangMember].type->getQualifier().builtIn; - switch (glslangBuiltIn) - { - case glslang::EbvClipDistance: - case glslang::EbvCullDistance: - case glslang::EbvPointSize: -#ifdef NV_EXTENSIONS - case glslang::EbvViewportMaskNV: - case glslang::EbvSecondaryPositionNV: - case glslang::EbvSecondaryViewportMaskNV: - case glslang::EbvPositionPerViewNV: - case glslang::EbvViewportMaskPerViewNV: -#endif - // Generate the associated capability. Delegate to TranslateBuiltInDecoration. - // Alternately, we could just call this for any glslang built-in, since the - // capability already guards against duplicates. - TranslateBuiltInDecoration(glslangBuiltIn, false); - break; - default: - // Capabilities were already generated when the struct was declared. - break; - } -} - -bool TGlslangToSpvTraverser::isShaderEntryPoint(const glslang::TIntermAggregate* node) -{ - return node->getName().compare(glslangIntermediate->getEntryPointMangledName().c_str()) == 0; -} - -// Does parameter need a place to keep writes, separate from the original? -// Assumes called after originalParam(), which filters out block/buffer/opaque-based -// qualifiers such that we should have only in/out/inout/constreadonly here. -bool TGlslangToSpvTraverser::writableParam(glslang::TStorageQualifier qualifier) -{ - assert(qualifier == glslang::EvqIn || - qualifier == glslang::EvqOut || - qualifier == glslang::EvqInOut || - qualifier == glslang::EvqConstReadOnly); - return qualifier != glslang::EvqConstReadOnly; -} - -// Is parameter pass-by-original? -bool TGlslangToSpvTraverser::originalParam(glslang::TStorageQualifier qualifier, const glslang::TType& paramType, - bool implicitThisParam) -{ - if (implicitThisParam) // implicit this - return true; - if (glslangIntermediate->getSource() == glslang::EShSourceHlsl) - return paramType.getBasicType() == glslang::EbtBlock; - return paramType.containsOpaque() || // sampler, etc. - (paramType.getBasicType() == glslang::EbtBlock && qualifier == glslang::EvqBuffer); // SSBO -} - -// Make all the functions, skeletally, without actually visiting their bodies. -void TGlslangToSpvTraverser::makeFunctions(const glslang::TIntermSequence& glslFunctions) -{ - const auto getParamDecorations = [](std::vector& decorations, const glslang::TType& type) { - spv::Decoration paramPrecision = TranslatePrecisionDecoration(type); - if (paramPrecision != spv::NoPrecision) - decorations.push_back(paramPrecision); - TranslateMemoryDecoration(type.getQualifier(), decorations); - }; - - for (int f = 0; f < (int)glslFunctions.size(); ++f) { - glslang::TIntermAggregate* glslFunction = glslFunctions[f]->getAsAggregate(); - if (! glslFunction || glslFunction->getOp() != glslang::EOpFunction || isShaderEntryPoint(glslFunction)) - continue; - - // We're on a user function. Set up the basic interface for the function now, - // so that it's available to call. Translating the body will happen later. - // - // Typically (except for a "const in" parameter), an address will be passed to the - // function. What it is an address of varies: - // - // - "in" parameters not marked as "const" can be written to without modifying the calling - // argument so that write needs to be to a copy, hence the address of a copy works. - // - // - "const in" parameters can just be the r-value, as no writes need occur. - // - // - "out" and "inout" arguments can't be done as pointers to the calling argument, because - // GLSL has copy-in/copy-out semantics. They can be handled though with a pointer to a copy. - - std::vector paramTypes; - std::vector> paramDecorations; // list of decorations per parameter - glslang::TIntermSequence& parameters = glslFunction->getSequence()[0]->getAsAggregate()->getSequence(); - - bool implicitThis = (int)parameters.size() > 0 && parameters[0]->getAsSymbolNode()->getName() == - glslangIntermediate->implicitThisName; - - paramDecorations.resize(parameters.size()); - for (int p = 0; p < (int)parameters.size(); ++p) { - const glslang::TType& paramType = parameters[p]->getAsTyped()->getType(); - spv::Id typeId = convertGlslangToSpvType(paramType); - if (originalParam(paramType.getQualifier().storage, paramType, implicitThis && p == 0)) - typeId = builder.makePointer(TranslateStorageClass(paramType), typeId); - else if (writableParam(paramType.getQualifier().storage)) - typeId = builder.makePointer(spv::StorageClassFunction, typeId); - else - rValueParameters.insert(parameters[p]->getAsSymbolNode()->getId()); - getParamDecorations(paramDecorations[p], paramType); - paramTypes.push_back(typeId); - } - - spv::Block* functionBlock; - spv::Function *function = builder.makeFunctionEntry(TranslatePrecisionDecoration(glslFunction->getType()), - convertGlslangToSpvType(glslFunction->getType()), - glslFunction->getName().c_str(), paramTypes, - paramDecorations, &functionBlock); - if (implicitThis) - function->setImplicitThis(); - - // Track function to emit/call later - functionMap[glslFunction->getName().c_str()] = function; - - // Set the parameter id's - for (int p = 0; p < (int)parameters.size(); ++p) { - symbolValues[parameters[p]->getAsSymbolNode()->getId()] = function->getParamId(p); - // give a name too - builder.addName(function->getParamId(p), parameters[p]->getAsSymbolNode()->getName().c_str()); - } - } -} - -// Process all the initializers, while skipping the functions and link objects -void TGlslangToSpvTraverser::makeGlobalInitializers(const glslang::TIntermSequence& initializers) -{ - builder.setBuildPoint(shaderEntry->getLastBlock()); - for (int i = 0; i < (int)initializers.size(); ++i) { - glslang::TIntermAggregate* initializer = initializers[i]->getAsAggregate(); - if (initializer && initializer->getOp() != glslang::EOpFunction && initializer->getOp() != glslang::EOpLinkerObjects) { - - // We're on a top-level node that's not a function. Treat as an initializer, whose - // code goes into the beginning of the entry point. - initializer->traverse(this); - } - } -} - -// Process all the functions, while skipping initializers. -void TGlslangToSpvTraverser::visitFunctions(const glslang::TIntermSequence& glslFunctions) -{ - for (int f = 0; f < (int)glslFunctions.size(); ++f) { - glslang::TIntermAggregate* node = glslFunctions[f]->getAsAggregate(); - if (node && (node->getOp() == glslang::EOpFunction || node->getOp() == glslang::EOpLinkerObjects)) - node->traverse(this); - } -} - -void TGlslangToSpvTraverser::handleFunctionEntry(const glslang::TIntermAggregate* node) -{ - // SPIR-V functions should already be in the functionMap from the prepass - // that called makeFunctions(). - currentFunction = functionMap[node->getName().c_str()]; - spv::Block* functionBlock = currentFunction->getEntryBlock(); - builder.setBuildPoint(functionBlock); -} - -void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate& node, std::vector& arguments) -{ - const glslang::TIntermSequence& glslangArguments = node.getSequence(); - - glslang::TSampler sampler = {}; - bool cubeCompare = false; - if (node.isTexture() || node.isImage()) { - sampler = glslangArguments[0]->getAsTyped()->getType().getSampler(); - cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow; - } - - for (int i = 0; i < (int)glslangArguments.size(); ++i) { - builder.clearAccessChain(); - glslangArguments[i]->traverse(this); - - // Special case l-value operands - bool lvalue = false; - switch (node.getOp()) { - case glslang::EOpImageAtomicAdd: - case glslang::EOpImageAtomicMin: - case glslang::EOpImageAtomicMax: - case glslang::EOpImageAtomicAnd: - case glslang::EOpImageAtomicOr: - case glslang::EOpImageAtomicXor: - case glslang::EOpImageAtomicExchange: - case glslang::EOpImageAtomicCompSwap: - if (i == 0) - lvalue = true; - break; - case glslang::EOpSparseImageLoad: - if ((sampler.ms && i == 3) || (! sampler.ms && i == 2)) - lvalue = true; - break; - case glslang::EOpSparseTexture: - if ((cubeCompare && i == 3) || (! cubeCompare && i == 2)) - lvalue = true; - break; - case glslang::EOpSparseTextureClamp: - if ((cubeCompare && i == 4) || (! cubeCompare && i == 3)) - lvalue = true; - break; - case glslang::EOpSparseTextureLod: - case glslang::EOpSparseTextureOffset: - if (i == 3) - lvalue = true; - break; - case glslang::EOpSparseTextureFetch: - if ((sampler.dim != glslang::EsdRect && i == 3) || (sampler.dim == glslang::EsdRect && i == 2)) - lvalue = true; - break; - case glslang::EOpSparseTextureFetchOffset: - if ((sampler.dim != glslang::EsdRect && i == 4) || (sampler.dim == glslang::EsdRect && i == 3)) - lvalue = true; - break; - case glslang::EOpSparseTextureLodOffset: - case glslang::EOpSparseTextureGrad: - case glslang::EOpSparseTextureOffsetClamp: - if (i == 4) - lvalue = true; - break; - case glslang::EOpSparseTextureGradOffset: - case glslang::EOpSparseTextureGradClamp: - if (i == 5) - lvalue = true; - break; - case glslang::EOpSparseTextureGradOffsetClamp: - if (i == 6) - lvalue = true; - break; - case glslang::EOpSparseTextureGather: - if ((sampler.shadow && i == 3) || (! sampler.shadow && i == 2)) - lvalue = true; - break; - case glslang::EOpSparseTextureGatherOffset: - case glslang::EOpSparseTextureGatherOffsets: - if ((sampler.shadow && i == 4) || (! sampler.shadow && i == 3)) - lvalue = true; - break; -#ifdef AMD_EXTENSIONS - case glslang::EOpSparseTextureGatherLod: - if (i == 3) - lvalue = true; - break; - case glslang::EOpSparseTextureGatherLodOffset: - case glslang::EOpSparseTextureGatherLodOffsets: - if (i == 4) - lvalue = true; - break; - case glslang::EOpSparseImageLoadLod: - if (i == 3) - lvalue = true; - break; -#endif - default: - break; - } - - if (lvalue) - arguments.push_back(builder.accessChainGetLValue()); - else - arguments.push_back(accessChainLoad(glslangArguments[i]->getAsTyped()->getType())); - } -} - -void TGlslangToSpvTraverser::translateArguments(glslang::TIntermUnary& node, std::vector& arguments) -{ - builder.clearAccessChain(); - node.getOperand()->traverse(this); - arguments.push_back(accessChainLoad(node.getOperand()->getType())); -} - -spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermOperator* node) -{ - if (! node->isImage() && ! node->isTexture()) - return spv::NoResult; - - builder.setLine(node->getLoc().line); - - auto resultType = [&node,this]{ return convertGlslangToSpvType(node->getType()); }; - - // Process a GLSL texturing op (will be SPV image) - const glslang::TSampler sampler = node->getAsAggregate() ? node->getAsAggregate()->getSequence()[0]->getAsTyped()->getType().getSampler() - : node->getAsUnaryNode()->getOperand()->getAsTyped()->getType().getSampler(); - std::vector arguments; - if (node->getAsAggregate()) - translateArguments(*node->getAsAggregate(), arguments); - else - translateArguments(*node->getAsUnaryNode(), arguments); - spv::Decoration precision = TranslatePrecisionDecoration(node->getOperationPrecision()); - - spv::Builder::TextureParameters params = { }; - params.sampler = arguments[0]; - - glslang::TCrackedTextureOp cracked; - node->crackTexture(sampler, cracked); - - const bool isUnsignedResult = node->getType().getBasicType() == glslang::EbtUint; - - // Check for queries - if (cracked.query) { - // OpImageQueryLod works on a sampled image, for other queries the image has to be extracted first - if (node->getOp() != glslang::EOpTextureQueryLod && builder.isSampledImage(params.sampler)) - params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler); - - switch (node->getOp()) { - case glslang::EOpImageQuerySize: - case glslang::EOpTextureQuerySize: - if (arguments.size() > 1) { - params.lod = arguments[1]; - return builder.createTextureQueryCall(spv::OpImageQuerySizeLod, params, isUnsignedResult); - } else - return builder.createTextureQueryCall(spv::OpImageQuerySize, params, isUnsignedResult); - case glslang::EOpImageQuerySamples: - case glslang::EOpTextureQuerySamples: - return builder.createTextureQueryCall(spv::OpImageQuerySamples, params, isUnsignedResult); - case glslang::EOpTextureQueryLod: - params.coords = arguments[1]; - return builder.createTextureQueryCall(spv::OpImageQueryLod, params, isUnsignedResult); - case glslang::EOpTextureQueryLevels: - return builder.createTextureQueryCall(spv::OpImageQueryLevels, params, isUnsignedResult); - case glslang::EOpSparseTexelsResident: - return builder.createUnaryOp(spv::OpImageSparseTexelsResident, builder.makeBoolType(), arguments[0]); - default: - assert(0); - break; - } - } - - // Check for image functions other than queries - if (node->isImage()) { - std::vector operands; - auto opIt = arguments.begin(); - operands.push_back(*(opIt++)); - - // Handle subpass operations - // TODO: GLSL should change to have the "MS" only on the type rather than the - // built-in function. - if (cracked.subpass) { - // add on the (0,0) coordinate - spv::Id zero = builder.makeIntConstant(0); - std::vector comps; - comps.push_back(zero); - comps.push_back(zero); - operands.push_back(builder.makeCompositeConstant(builder.makeVectorType(builder.makeIntType(32), 2), comps)); - if (sampler.ms) { - operands.push_back(spv::ImageOperandsSampleMask); - operands.push_back(*(opIt++)); - } - spv::Id result = builder.createOp(spv::OpImageRead, resultType(), operands); - builder.setPrecision(result, precision); - return result; - } - - operands.push_back(*(opIt++)); -#ifdef AMD_EXTENSIONS - if (node->getOp() == glslang::EOpImageLoad || node->getOp() == glslang::EOpImageLoadLod) { -#else - if (node->getOp() == glslang::EOpImageLoad) { -#endif - if (sampler.ms) { - operands.push_back(spv::ImageOperandsSampleMask); - operands.push_back(*opIt); -#ifdef AMD_EXTENSIONS - } else if (cracked.lod) { - builder.addExtension(spv::E_SPV_AMD_shader_image_load_store_lod); - builder.addCapability(spv::CapabilityImageReadWriteLodAMD); - - operands.push_back(spv::ImageOperandsLodMask); - operands.push_back(*opIt); -#endif - } - if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown) - builder.addCapability(spv::CapabilityStorageImageReadWithoutFormat); - - spv::Id result = builder.createOp(spv::OpImageRead, resultType(), operands); - builder.setPrecision(result, precision); - return result; -#ifdef AMD_EXTENSIONS - } else if (node->getOp() == glslang::EOpImageStore || node->getOp() == glslang::EOpImageStoreLod) { -#else - } else if (node->getOp() == glslang::EOpImageStore) { -#endif - if (sampler.ms) { - operands.push_back(*(opIt + 1)); - operands.push_back(spv::ImageOperandsSampleMask); - operands.push_back(*opIt); -#ifdef AMD_EXTENSIONS - } else if (cracked.lod) { - builder.addExtension(spv::E_SPV_AMD_shader_image_load_store_lod); - builder.addCapability(spv::CapabilityImageReadWriteLodAMD); - - operands.push_back(*(opIt + 1)); - operands.push_back(spv::ImageOperandsLodMask); - operands.push_back(*opIt); -#endif - } else - operands.push_back(*opIt); - builder.createNoResultOp(spv::OpImageWrite, operands); - if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown) - builder.addCapability(spv::CapabilityStorageImageWriteWithoutFormat); - return spv::NoResult; -#ifdef AMD_EXTENSIONS - } else if (node->getOp() == glslang::EOpSparseImageLoad || node->getOp() == glslang::EOpSparseImageLoadLod) { -#else - } else if (node->getOp() == glslang::EOpSparseImageLoad) { -#endif - builder.addCapability(spv::CapabilitySparseResidency); - if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown) - builder.addCapability(spv::CapabilityStorageImageReadWithoutFormat); - - if (sampler.ms) { - operands.push_back(spv::ImageOperandsSampleMask); - operands.push_back(*opIt++); -#ifdef AMD_EXTENSIONS - } else if (cracked.lod) { - builder.addExtension(spv::E_SPV_AMD_shader_image_load_store_lod); - builder.addCapability(spv::CapabilityImageReadWriteLodAMD); - - operands.push_back(spv::ImageOperandsLodMask); - operands.push_back(*opIt++); -#endif - } - - // Create the return type that was a special structure - spv::Id texelOut = *opIt; - spv::Id typeId0 = resultType(); - spv::Id typeId1 = builder.getDerefTypeId(texelOut); - spv::Id resultTypeId = builder.makeStructResultType(typeId0, typeId1); - - spv::Id resultId = builder.createOp(spv::OpImageSparseRead, resultTypeId, operands); - - // Decode the return type - builder.createStore(builder.createCompositeExtract(resultId, typeId1, 1), texelOut); - return builder.createCompositeExtract(resultId, typeId0, 0); - } else { - // Process image atomic operations - - // GLSL "IMAGE_PARAMS" will involve in constructing an image texel pointer and this pointer, - // as the first source operand, is required by SPIR-V atomic operations. - operands.push_back(sampler.ms ? *(opIt++) : builder.makeUintConstant(0)); // For non-MS, the value should be 0 - - spv::Id resultTypeId = builder.makePointer(spv::StorageClassImage, resultType()); - spv::Id pointer = builder.createOp(spv::OpImageTexelPointer, resultTypeId, operands); - - std::vector operands; - operands.push_back(pointer); - for (; opIt != arguments.end(); ++opIt) - operands.push_back(*opIt); - - return createAtomicOperation(node->getOp(), precision, resultType(), operands, node->getBasicType()); - } - } - -#ifdef AMD_EXTENSIONS - // Check for fragment mask functions other than queries - if (cracked.fragMask) { - assert(sampler.ms); - - auto opIt = arguments.begin(); - std::vector operands; - - // Extract the image if necessary - if (builder.isSampledImage(params.sampler)) - params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler); - - operands.push_back(params.sampler); - ++opIt; - - if (sampler.isSubpass()) { - // add on the (0,0) coordinate - spv::Id zero = builder.makeIntConstant(0); - std::vector comps; - comps.push_back(zero); - comps.push_back(zero); - operands.push_back(builder.makeCompositeConstant(builder.makeVectorType(builder.makeIntType(32), 2), comps)); - } - - for (; opIt != arguments.end(); ++opIt) - operands.push_back(*opIt); - - spv::Op fragMaskOp = spv::OpNop; - if (node->getOp() == glslang::EOpFragmentMaskFetch) - fragMaskOp = spv::OpFragmentMaskFetchAMD; - else if (node->getOp() == glslang::EOpFragmentFetch) - fragMaskOp = spv::OpFragmentFetchAMD; - - builder.addExtension(spv::E_SPV_AMD_shader_fragment_mask); - builder.addCapability(spv::CapabilityFragmentMaskAMD); - return builder.createOp(fragMaskOp, resultType(), operands); - } -#endif - - // Check for texture functions other than queries - bool sparse = node->isSparseTexture(); - bool cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow; - - // check for bias argument - bool bias = false; -#ifdef AMD_EXTENSIONS - if (! cracked.lod && ! cracked.grad && ! cracked.fetch && ! cubeCompare) { -#else - if (! cracked.lod && ! cracked.gather && ! cracked.grad && ! cracked.fetch && ! cubeCompare) { -#endif - int nonBiasArgCount = 2; -#ifdef AMD_EXTENSIONS - if (cracked.gather) - ++nonBiasArgCount; // comp argument should be present when bias argument is present -#endif - if (cracked.offset) - ++nonBiasArgCount; -#ifdef AMD_EXTENSIONS - else if (cracked.offsets) - ++nonBiasArgCount; -#endif - if (cracked.grad) - nonBiasArgCount += 2; - if (cracked.lodClamp) - ++nonBiasArgCount; - if (sparse) - ++nonBiasArgCount; - - if ((int)arguments.size() > nonBiasArgCount) - bias = true; - } - - // See if the sampler param should really be just the SPV image part - if (cracked.fetch) { - // a fetch needs to have the image extracted first - if (builder.isSampledImage(params.sampler)) - params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler); - } - -#ifdef AMD_EXTENSIONS - if (cracked.gather) { - const auto& sourceExtensions = glslangIntermediate->getRequestedExtensions(); - if (bias || cracked.lod || - sourceExtensions.find(glslang::E_GL_AMD_texture_gather_bias_lod) != sourceExtensions.end()) { - builder.addExtension(spv::E_SPV_AMD_texture_gather_bias_lod); - builder.addCapability(spv::CapabilityImageGatherBiasLodAMD); - } - } -#endif - - // set the rest of the arguments - - params.coords = arguments[1]; - int extraArgs = 0; - bool noImplicitLod = false; - - // sort out where Dref is coming from - if (cubeCompare) { - params.Dref = arguments[2]; - ++extraArgs; - } else if (sampler.shadow && cracked.gather) { - params.Dref = arguments[2]; - ++extraArgs; - } else if (sampler.shadow) { - std::vector indexes; - int dRefComp; - if (cracked.proj) - dRefComp = 2; // "The resulting 3rd component of P in the shadow forms is used as Dref" - else - dRefComp = builder.getNumComponents(params.coords) - 1; - indexes.push_back(dRefComp); - params.Dref = builder.createCompositeExtract(params.coords, builder.getScalarTypeId(builder.getTypeId(params.coords)), indexes); - } - - // lod - if (cracked.lod) { - params.lod = arguments[2 + extraArgs]; - ++extraArgs; - } else if (glslangIntermediate->getStage() != EShLangFragment) { - // we need to invent the default lod for an explicit lod instruction for a non-fragment stage - noImplicitLod = true; - } - - // multisample - if (sampler.ms) { - params.sample = arguments[2 + extraArgs]; // For MS, "sample" should be specified - ++extraArgs; - } - - // gradient - if (cracked.grad) { - params.gradX = arguments[2 + extraArgs]; - params.gradY = arguments[3 + extraArgs]; - extraArgs += 2; - } - - // offset and offsets - if (cracked.offset) { - params.offset = arguments[2 + extraArgs]; - ++extraArgs; - } else if (cracked.offsets) { - params.offsets = arguments[2 + extraArgs]; - ++extraArgs; - } - - // lod clamp - if (cracked.lodClamp) { - params.lodClamp = arguments[2 + extraArgs]; - ++extraArgs; - } - - // sparse - if (sparse) { - params.texelOut = arguments[2 + extraArgs]; - ++extraArgs; - } - - // gather component - if (cracked.gather && ! sampler.shadow) { - // default component is 0, if missing, otherwise an argument - if (2 + extraArgs < (int)arguments.size()) { - params.component = arguments[2 + extraArgs]; - ++extraArgs; - } else - params.component = builder.makeIntConstant(0); - } - - // bias - if (bias) { - params.bias = arguments[2 + extraArgs]; - ++extraArgs; - } - - // projective component (might not to move) - // GLSL: "The texture coordinates consumed from P, not including the last component of P, - // are divided by the last component of P." - // SPIR-V: "... (u [, v] [, w], q)... It may be a vector larger than needed, but all - // unused components will appear after all used components." - if (cracked.proj) { - int projSourceComp = builder.getNumComponents(params.coords) - 1; - int projTargetComp; - switch (sampler.dim) { - case glslang::Esd1D: projTargetComp = 1; break; - case glslang::Esd2D: projTargetComp = 2; break; - case glslang::EsdRect: projTargetComp = 2; break; - default: projTargetComp = projSourceComp; break; - } - // copy the projective coordinate if we have to - if (projTargetComp != projSourceComp) { - spv::Id projComp = builder.createCompositeExtract(params.coords, - builder.getScalarTypeId(builder.getTypeId(params.coords)), - projSourceComp); - params.coords = builder.createCompositeInsert(projComp, params.coords, - builder.getTypeId(params.coords), projTargetComp); - } - } - - return builder.createTextureCall(precision, resultType(), sparse, cracked.fetch, cracked.proj, cracked.gather, noImplicitLod, params); -} - -spv::Id TGlslangToSpvTraverser::handleUserFunctionCall(const glslang::TIntermAggregate* node) -{ - // Grab the function's pointer from the previously created function - spv::Function* function = functionMap[node->getName().c_str()]; - if (! function) - return 0; - - const glslang::TIntermSequence& glslangArgs = node->getSequence(); - const glslang::TQualifierList& qualifiers = node->getQualifierList(); - - // See comments in makeFunctions() for details about the semantics for parameter passing. - // - // These imply we need a four step process: - // 1. Evaluate the arguments - // 2. Allocate and make copies of in, out, and inout arguments - // 3. Make the call - // 4. Copy back the results - - // 1. Evaluate the arguments - std::vector lValues; - std::vector rValues; - std::vector argTypes; - for (int a = 0; a < (int)glslangArgs.size(); ++a) { - const glslang::TType& paramType = glslangArgs[a]->getAsTyped()->getType(); - // build l-value - builder.clearAccessChain(); - glslangArgs[a]->traverse(this); - argTypes.push_back(¶mType); - // keep outputs and pass-by-originals as l-values, evaluate others as r-values - if (originalParam(qualifiers[a], paramType, function->hasImplicitThis() && a == 0) || - writableParam(qualifiers[a])) { - // save l-value - lValues.push_back(builder.getAccessChain()); - } else { - // process r-value - rValues.push_back(accessChainLoad(*argTypes.back())); - } - } - - // 2. Allocate space for anything needing a copy, and if it's "in" or "inout" - // copy the original into that space. - // - // Also, build up the list of actual arguments to pass in for the call - int lValueCount = 0; - int rValueCount = 0; - std::vector spvArgs; - for (int a = 0; a < (int)glslangArgs.size(); ++a) { - const glslang::TType& paramType = glslangArgs[a]->getAsTyped()->getType(); - spv::Id arg; - if (originalParam(qualifiers[a], paramType, function->hasImplicitThis() && a == 0)) { - builder.setAccessChain(lValues[lValueCount]); - arg = builder.accessChainGetLValue(); - ++lValueCount; - } else if (writableParam(qualifiers[a])) { - // need space to hold the copy - arg = builder.createVariable(spv::StorageClassFunction, convertGlslangToSpvType(paramType), "param"); - if (qualifiers[a] == glslang::EvqIn || qualifiers[a] == glslang::EvqInOut) { - // need to copy the input into output space - builder.setAccessChain(lValues[lValueCount]); - spv::Id copy = accessChainLoad(*argTypes[a]); - builder.clearAccessChain(); - builder.setAccessChainLValue(arg); - multiTypeStore(paramType, copy); - } - ++lValueCount; - } else { - arg = rValues[rValueCount]; - ++rValueCount; - } - spvArgs.push_back(arg); - } - - // 3. Make the call. - spv::Id result = builder.createFunctionCall(function, spvArgs); - builder.setPrecision(result, TranslatePrecisionDecoration(node->getType())); - - // 4. Copy back out an "out" arguments. - lValueCount = 0; - for (int a = 0; a < (int)glslangArgs.size(); ++a) { - const glslang::TType& paramType = glslangArgs[a]->getAsTyped()->getType(); - if (originalParam(qualifiers[a], paramType, function->hasImplicitThis() && a == 0)) - ++lValueCount; - else if (writableParam(qualifiers[a])) { - if (qualifiers[a] == glslang::EvqOut || qualifiers[a] == glslang::EvqInOut) { - spv::Id copy = builder.createLoad(spvArgs[a]); - builder.setAccessChain(lValues[lValueCount]); - multiTypeStore(paramType, copy); - } - ++lValueCount; - } - } - - return result; -} - -// Translate AST operation to SPV operation, already having SPV-based operands/types. -spv::Id TGlslangToSpvTraverser::createBinaryOperation(glslang::TOperator op, spv::Decoration precision, - spv::Decoration noContraction, - spv::Id typeId, spv::Id left, spv::Id right, - glslang::TBasicType typeProxy, bool reduceComparison) -{ -#ifdef AMD_EXTENSIONS - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64 || typeProxy == glslang::EbtUint16; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble || typeProxy == glslang::EbtFloat16; -#else - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble; -#endif - bool isBool = typeProxy == glslang::EbtBool; - - spv::Op binOp = spv::OpNop; - bool needMatchingVectors = true; // for non-matrix ops, would a scalar need to smear to match a vector? - bool comparison = false; - - switch (op) { - case glslang::EOpAdd: - case glslang::EOpAddAssign: - if (isFloat) - binOp = spv::OpFAdd; - else - binOp = spv::OpIAdd; - break; - case glslang::EOpSub: - case glslang::EOpSubAssign: - if (isFloat) - binOp = spv::OpFSub; - else - binOp = spv::OpISub; - break; - case glslang::EOpMul: - case glslang::EOpMulAssign: - if (isFloat) - binOp = spv::OpFMul; - else - binOp = spv::OpIMul; - break; - case glslang::EOpVectorTimesScalar: - case glslang::EOpVectorTimesScalarAssign: - if (isFloat && (builder.isVector(left) || builder.isVector(right))) { - if (builder.isVector(right)) - std::swap(left, right); - assert(builder.isScalar(right)); - needMatchingVectors = false; - binOp = spv::OpVectorTimesScalar; - } else - binOp = spv::OpIMul; - break; - case glslang::EOpVectorTimesMatrix: - case glslang::EOpVectorTimesMatrixAssign: - binOp = spv::OpVectorTimesMatrix; - break; - case glslang::EOpMatrixTimesVector: - binOp = spv::OpMatrixTimesVector; - break; - case glslang::EOpMatrixTimesScalar: - case glslang::EOpMatrixTimesScalarAssign: - binOp = spv::OpMatrixTimesScalar; - break; - case glslang::EOpMatrixTimesMatrix: - case glslang::EOpMatrixTimesMatrixAssign: - binOp = spv::OpMatrixTimesMatrix; - break; - case glslang::EOpOuterProduct: - binOp = spv::OpOuterProduct; - needMatchingVectors = false; - break; - - case glslang::EOpDiv: - case glslang::EOpDivAssign: - if (isFloat) - binOp = spv::OpFDiv; - else if (isUnsigned) - binOp = spv::OpUDiv; - else - binOp = spv::OpSDiv; - break; - case glslang::EOpMod: - case glslang::EOpModAssign: - if (isFloat) - binOp = spv::OpFMod; - else if (isUnsigned) - binOp = spv::OpUMod; - else - binOp = spv::OpSMod; - break; - case glslang::EOpRightShift: - case glslang::EOpRightShiftAssign: - if (isUnsigned) - binOp = spv::OpShiftRightLogical; - else - binOp = spv::OpShiftRightArithmetic; - break; - case glslang::EOpLeftShift: - case glslang::EOpLeftShiftAssign: - binOp = spv::OpShiftLeftLogical; - break; - case glslang::EOpAnd: - case glslang::EOpAndAssign: - binOp = spv::OpBitwiseAnd; - break; - case glslang::EOpLogicalAnd: - needMatchingVectors = false; - binOp = spv::OpLogicalAnd; - break; - case glslang::EOpInclusiveOr: - case glslang::EOpInclusiveOrAssign: - binOp = spv::OpBitwiseOr; - break; - case glslang::EOpLogicalOr: - needMatchingVectors = false; - binOp = spv::OpLogicalOr; - break; - case glslang::EOpExclusiveOr: - case glslang::EOpExclusiveOrAssign: - binOp = spv::OpBitwiseXor; - break; - case glslang::EOpLogicalXor: - needMatchingVectors = false; - binOp = spv::OpLogicalNotEqual; - break; - - case glslang::EOpLessThan: - case glslang::EOpGreaterThan: - case glslang::EOpLessThanEqual: - case glslang::EOpGreaterThanEqual: - case glslang::EOpEqual: - case glslang::EOpNotEqual: - case glslang::EOpVectorEqual: - case glslang::EOpVectorNotEqual: - comparison = true; - break; - default: - break; - } - - // handle mapped binary operations (should be non-comparison) - if (binOp != spv::OpNop) { - assert(comparison == false); - if (builder.isMatrix(left) || builder.isMatrix(right)) - return createBinaryMatrixOperation(binOp, precision, noContraction, typeId, left, right); - - // No matrix involved; make both operands be the same number of components, if needed - if (needMatchingVectors) - builder.promoteScalar(precision, left, right); - - spv::Id result = builder.createBinOp(binOp, typeId, left, right); - addDecoration(result, noContraction); - return builder.setPrecision(result, precision); - } - - if (! comparison) - return 0; - - // Handle comparison instructions - - if (reduceComparison && (op == glslang::EOpEqual || op == glslang::EOpNotEqual) - && (builder.isVector(left) || builder.isMatrix(left) || builder.isAggregate(left))) - return builder.createCompositeCompare(precision, left, right, op == glslang::EOpEqual); - - switch (op) { - case glslang::EOpLessThan: - if (isFloat) - binOp = spv::OpFOrdLessThan; - else if (isUnsigned) - binOp = spv::OpULessThan; - else - binOp = spv::OpSLessThan; - break; - case glslang::EOpGreaterThan: - if (isFloat) - binOp = spv::OpFOrdGreaterThan; - else if (isUnsigned) - binOp = spv::OpUGreaterThan; - else - binOp = spv::OpSGreaterThan; - break; - case glslang::EOpLessThanEqual: - if (isFloat) - binOp = spv::OpFOrdLessThanEqual; - else if (isUnsigned) - binOp = spv::OpULessThanEqual; - else - binOp = spv::OpSLessThanEqual; - break; - case glslang::EOpGreaterThanEqual: - if (isFloat) - binOp = spv::OpFOrdGreaterThanEqual; - else if (isUnsigned) - binOp = spv::OpUGreaterThanEqual; - else - binOp = spv::OpSGreaterThanEqual; - break; - case glslang::EOpEqual: - case glslang::EOpVectorEqual: - if (isFloat) - binOp = spv::OpFOrdEqual; - else if (isBool) - binOp = spv::OpLogicalEqual; - else - binOp = spv::OpIEqual; - break; - case glslang::EOpNotEqual: - case glslang::EOpVectorNotEqual: - if (isFloat) - binOp = spv::OpFOrdNotEqual; - else if (isBool) - binOp = spv::OpLogicalNotEqual; - else - binOp = spv::OpINotEqual; - break; - default: - break; - } - - if (binOp != spv::OpNop) { - spv::Id result = builder.createBinOp(binOp, typeId, left, right); - addDecoration(result, noContraction); - return builder.setPrecision(result, precision); - } - - return 0; -} - -// -// Translate AST matrix operation to SPV operation, already having SPV-based operands/types. -// These can be any of: -// -// matrix * scalar -// scalar * matrix -// matrix * matrix linear algebraic -// matrix * vector -// vector * matrix -// matrix * matrix componentwise -// matrix op matrix op in {+, -, /} -// matrix op scalar op in {+, -, /} -// scalar op matrix op in {+, -, /} -// -spv::Id TGlslangToSpvTraverser::createBinaryMatrixOperation(spv::Op op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id left, spv::Id right) -{ - bool firstClass = true; - - // First, handle first-class matrix operations (* and matrix/scalar) - switch (op) { - case spv::OpFDiv: - if (builder.isMatrix(left) && builder.isScalar(right)) { - // turn matrix / scalar into a multiply... - right = builder.createBinOp(spv::OpFDiv, builder.getTypeId(right), builder.makeFloatConstant(1.0F), right); - op = spv::OpMatrixTimesScalar; - } else - firstClass = false; - break; - case spv::OpMatrixTimesScalar: - if (builder.isMatrix(right)) - std::swap(left, right); - assert(builder.isScalar(right)); - break; - case spv::OpVectorTimesMatrix: - assert(builder.isVector(left)); - assert(builder.isMatrix(right)); - break; - case spv::OpMatrixTimesVector: - assert(builder.isMatrix(left)); - assert(builder.isVector(right)); - break; - case spv::OpMatrixTimesMatrix: - assert(builder.isMatrix(left)); - assert(builder.isMatrix(right)); - break; - default: - firstClass = false; - break; - } - - if (firstClass) { - spv::Id result = builder.createBinOp(op, typeId, left, right); - addDecoration(result, noContraction); - return builder.setPrecision(result, precision); - } - - // Handle component-wise +, -, *, %, and / for all combinations of type. - // The result type of all of them is the same type as the (a) matrix operand. - // The algorithm is to: - // - break the matrix(es) into vectors - // - smear any scalar to a vector - // - do vector operations - // - make a matrix out the vector results - switch (op) { - case spv::OpFAdd: - case spv::OpFSub: - case spv::OpFDiv: - case spv::OpFMod: - case spv::OpFMul: - { - // one time set up... - bool leftMat = builder.isMatrix(left); - bool rightMat = builder.isMatrix(right); - unsigned int numCols = leftMat ? builder.getNumColumns(left) : builder.getNumColumns(right); - int numRows = leftMat ? builder.getNumRows(left) : builder.getNumRows(right); - spv::Id scalarType = builder.getScalarTypeId(typeId); - spv::Id vecType = builder.makeVectorType(scalarType, numRows); - std::vector results; - spv::Id smearVec = spv::NoResult; - if (builder.isScalar(left)) - smearVec = builder.smearScalar(precision, left, vecType); - else if (builder.isScalar(right)) - smearVec = builder.smearScalar(precision, right, vecType); - - // do each vector op - for (unsigned int c = 0; c < numCols; ++c) { - std::vector indexes; - indexes.push_back(c); - spv::Id leftVec = leftMat ? builder.createCompositeExtract( left, vecType, indexes) : smearVec; - spv::Id rightVec = rightMat ? builder.createCompositeExtract(right, vecType, indexes) : smearVec; - spv::Id result = builder.createBinOp(op, vecType, leftVec, rightVec); - addDecoration(result, noContraction); - results.push_back(builder.setPrecision(result, precision)); - } - - // put the pieces together - return builder.setPrecision(builder.createCompositeConstruct(typeId, results), precision); - } - default: - assert(0); - return spv::NoResult; - } -} - -spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id operand, glslang::TBasicType typeProxy) -{ - spv::Op unaryOp = spv::OpNop; - int extBuiltins = -1; - int libCall = -1; -#ifdef AMD_EXTENSIONS - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64 || typeProxy == glslang::EbtUint16; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble || typeProxy == glslang::EbtFloat16; -#else - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble; -#endif - - switch (op) { - case glslang::EOpNegative: - if (isFloat) { - unaryOp = spv::OpFNegate; - if (builder.isMatrixType(typeId)) - return createUnaryMatrixOperation(unaryOp, precision, noContraction, typeId, operand, typeProxy); - } else - unaryOp = spv::OpSNegate; - break; - - case glslang::EOpLogicalNot: - case glslang::EOpVectorLogicalNot: - unaryOp = spv::OpLogicalNot; - break; - case glslang::EOpBitwiseNot: - unaryOp = spv::OpNot; - break; - - case glslang::EOpDeterminant: - libCall = spv::GLSLstd450Determinant; - break; - case glslang::EOpMatrixInverse: - libCall = spv::GLSLstd450MatrixInverse; - break; - case glslang::EOpTranspose: - unaryOp = spv::OpTranspose; - break; - - case glslang::EOpRadians: - libCall = spv::GLSLstd450Radians; - break; - case glslang::EOpDegrees: - libCall = spv::GLSLstd450Degrees; - break; - case glslang::EOpSin: - libCall = spv::GLSLstd450Sin; - break; - case glslang::EOpCos: - libCall = spv::GLSLstd450Cos; - break; - case glslang::EOpTan: - libCall = spv::GLSLstd450Tan; - break; - case glslang::EOpAcos: - libCall = spv::GLSLstd450Acos; - break; - case glslang::EOpAsin: - libCall = spv::GLSLstd450Asin; - break; - case glslang::EOpAtan: - libCall = spv::GLSLstd450Atan; - break; - - case glslang::EOpAcosh: - libCall = spv::GLSLstd450Acosh; - break; - case glslang::EOpAsinh: - libCall = spv::GLSLstd450Asinh; - break; - case glslang::EOpAtanh: - libCall = spv::GLSLstd450Atanh; - break; - case glslang::EOpTanh: - libCall = spv::GLSLstd450Tanh; - break; - case glslang::EOpCosh: - libCall = spv::GLSLstd450Cosh; - break; - case glslang::EOpSinh: - libCall = spv::GLSLstd450Sinh; - break; - - case glslang::EOpLength: - libCall = spv::GLSLstd450Length; - break; - case glslang::EOpNormalize: - libCall = spv::GLSLstd450Normalize; - break; - - case glslang::EOpExp: - libCall = spv::GLSLstd450Exp; - break; - case glslang::EOpLog: - libCall = spv::GLSLstd450Log; - break; - case glslang::EOpExp2: - libCall = spv::GLSLstd450Exp2; - break; - case glslang::EOpLog2: - libCall = spv::GLSLstd450Log2; - break; - case glslang::EOpSqrt: - libCall = spv::GLSLstd450Sqrt; - break; - case glslang::EOpInverseSqrt: - libCall = spv::GLSLstd450InverseSqrt; - break; - - case glslang::EOpFloor: - libCall = spv::GLSLstd450Floor; - break; - case glslang::EOpTrunc: - libCall = spv::GLSLstd450Trunc; - break; - case glslang::EOpRound: - libCall = spv::GLSLstd450Round; - break; - case glslang::EOpRoundEven: - libCall = spv::GLSLstd450RoundEven; - break; - case glslang::EOpCeil: - libCall = spv::GLSLstd450Ceil; - break; - case glslang::EOpFract: - libCall = spv::GLSLstd450Fract; - break; - - case glslang::EOpIsNan: - unaryOp = spv::OpIsNan; - break; - case glslang::EOpIsInf: - unaryOp = spv::OpIsInf; - break; - case glslang::EOpIsFinite: - unaryOp = spv::OpIsFinite; - break; - - case glslang::EOpFloatBitsToInt: - case glslang::EOpFloatBitsToUint: - case glslang::EOpIntBitsToFloat: - case glslang::EOpUintBitsToFloat: - case glslang::EOpDoubleBitsToInt64: - case glslang::EOpDoubleBitsToUint64: - case glslang::EOpInt64BitsToDouble: - case glslang::EOpUint64BitsToDouble: -#ifdef AMD_EXTENSIONS - case glslang::EOpFloat16BitsToInt16: - case glslang::EOpFloat16BitsToUint16: - case glslang::EOpInt16BitsToFloat16: - case glslang::EOpUint16BitsToFloat16: -#endif - unaryOp = spv::OpBitcast; - break; - - case glslang::EOpPackSnorm2x16: - libCall = spv::GLSLstd450PackSnorm2x16; - break; - case glslang::EOpUnpackSnorm2x16: - libCall = spv::GLSLstd450UnpackSnorm2x16; - break; - case glslang::EOpPackUnorm2x16: - libCall = spv::GLSLstd450PackUnorm2x16; - break; - case glslang::EOpUnpackUnorm2x16: - libCall = spv::GLSLstd450UnpackUnorm2x16; - break; - case glslang::EOpPackHalf2x16: - libCall = spv::GLSLstd450PackHalf2x16; - break; - case glslang::EOpUnpackHalf2x16: - libCall = spv::GLSLstd450UnpackHalf2x16; - break; - case glslang::EOpPackSnorm4x8: - libCall = spv::GLSLstd450PackSnorm4x8; - break; - case glslang::EOpUnpackSnorm4x8: - libCall = spv::GLSLstd450UnpackSnorm4x8; - break; - case glslang::EOpPackUnorm4x8: - libCall = spv::GLSLstd450PackUnorm4x8; - break; - case glslang::EOpUnpackUnorm4x8: - libCall = spv::GLSLstd450UnpackUnorm4x8; - break; - case glslang::EOpPackDouble2x32: - libCall = spv::GLSLstd450PackDouble2x32; - break; - case glslang::EOpUnpackDouble2x32: - libCall = spv::GLSLstd450UnpackDouble2x32; - break; - - case glslang::EOpPackInt2x32: - case glslang::EOpUnpackInt2x32: - case glslang::EOpPackUint2x32: - case glslang::EOpUnpackUint2x32: - unaryOp = spv::OpBitcast; - break; - -#ifdef AMD_EXTENSIONS - case glslang::EOpPackInt2x16: - case glslang::EOpUnpackInt2x16: - case glslang::EOpPackUint2x16: - case glslang::EOpUnpackUint2x16: - case glslang::EOpPackInt4x16: - case glslang::EOpUnpackInt4x16: - case glslang::EOpPackUint4x16: - case glslang::EOpUnpackUint4x16: - case glslang::EOpPackFloat2x16: - case glslang::EOpUnpackFloat2x16: - unaryOp = spv::OpBitcast; - break; -#endif - - case glslang::EOpDPdx: - unaryOp = spv::OpDPdx; - break; - case glslang::EOpDPdy: - unaryOp = spv::OpDPdy; - break; - case glslang::EOpFwidth: - unaryOp = spv::OpFwidth; - break; - case glslang::EOpDPdxFine: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpDPdxFine; - break; - case glslang::EOpDPdyFine: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpDPdyFine; - break; - case glslang::EOpFwidthFine: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpFwidthFine; - break; - case glslang::EOpDPdxCoarse: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpDPdxCoarse; - break; - case glslang::EOpDPdyCoarse: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpDPdyCoarse; - break; - case glslang::EOpFwidthCoarse: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpFwidthCoarse; - break; - case glslang::EOpInterpolateAtCentroid: - builder.addCapability(spv::CapabilityInterpolationFunction); - libCall = spv::GLSLstd450InterpolateAtCentroid; - break; - case glslang::EOpAny: - unaryOp = spv::OpAny; - break; - case glslang::EOpAll: - unaryOp = spv::OpAll; - break; - - case glslang::EOpAbs: - if (isFloat) - libCall = spv::GLSLstd450FAbs; - else - libCall = spv::GLSLstd450SAbs; - break; - case glslang::EOpSign: - if (isFloat) - libCall = spv::GLSLstd450FSign; - else - libCall = spv::GLSLstd450SSign; - break; - - case glslang::EOpAtomicCounterIncrement: - case glslang::EOpAtomicCounterDecrement: - case glslang::EOpAtomicCounter: - { - // Handle all of the atomics in one place, in createAtomicOperation() - std::vector operands; - operands.push_back(operand); - return createAtomicOperation(op, precision, typeId, operands, typeProxy); - } - - case glslang::EOpBitFieldReverse: - unaryOp = spv::OpBitReverse; - break; - case glslang::EOpBitCount: - unaryOp = spv::OpBitCount; - break; - case glslang::EOpFindLSB: - libCall = spv::GLSLstd450FindILsb; - break; - case glslang::EOpFindMSB: - if (isUnsigned) - libCall = spv::GLSLstd450FindUMsb; - else - libCall = spv::GLSLstd450FindSMsb; - break; - - case glslang::EOpBallot: - case glslang::EOpReadFirstInvocation: - case glslang::EOpAnyInvocation: - case glslang::EOpAllInvocations: - case glslang::EOpAllInvocationsEqual: -#ifdef AMD_EXTENSIONS - case glslang::EOpMinInvocations: - case glslang::EOpMaxInvocations: - case glslang::EOpAddInvocations: - case glslang::EOpMinInvocationsNonUniform: - case glslang::EOpMaxInvocationsNonUniform: - case glslang::EOpAddInvocationsNonUniform: - case glslang::EOpMinInvocationsInclusiveScan: - case glslang::EOpMaxInvocationsInclusiveScan: - case glslang::EOpAddInvocationsInclusiveScan: - case glslang::EOpMinInvocationsInclusiveScanNonUniform: - case glslang::EOpMaxInvocationsInclusiveScanNonUniform: - case glslang::EOpAddInvocationsInclusiveScanNonUniform: - case glslang::EOpMinInvocationsExclusiveScan: - case glslang::EOpMaxInvocationsExclusiveScan: - case glslang::EOpAddInvocationsExclusiveScan: - case glslang::EOpMinInvocationsExclusiveScanNonUniform: - case glslang::EOpMaxInvocationsExclusiveScanNonUniform: - case glslang::EOpAddInvocationsExclusiveScanNonUniform: -#endif - { - std::vector operands; - operands.push_back(operand); - return createInvocationsOperation(op, typeId, operands, typeProxy); - } - -#ifdef AMD_EXTENSIONS - case glslang::EOpMbcnt: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_ballot); - libCall = spv::MbcntAMD; - break; - - case glslang::EOpCubeFaceIndex: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_gcn_shader); - libCall = spv::CubeFaceIndexAMD; - break; - - case glslang::EOpCubeFaceCoord: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_gcn_shader); - libCall = spv::CubeFaceCoordAMD; - break; -#endif - - default: - return 0; - } - - spv::Id id; - if (libCall >= 0) { - std::vector args; - args.push_back(operand); - id = builder.createBuiltinCall(typeId, extBuiltins >= 0 ? extBuiltins : stdBuiltins, libCall, args); - } else { - id = builder.createUnaryOp(unaryOp, typeId, operand); - } - - addDecoration(id, noContraction); - return builder.setPrecision(id, precision); -} - -// Create a unary operation on a matrix -spv::Id TGlslangToSpvTraverser::createUnaryMatrixOperation(spv::Op op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id operand, glslang::TBasicType /* typeProxy */) -{ - // Handle unary operations vector by vector. - // The result type is the same type as the original type. - // The algorithm is to: - // - break the matrix into vectors - // - apply the operation to each vector - // - make a matrix out the vector results - - // get the types sorted out - int numCols = builder.getNumColumns(operand); - int numRows = builder.getNumRows(operand); - spv::Id srcVecType = builder.makeVectorType(builder.getScalarTypeId(builder.getTypeId(operand)), numRows); - spv::Id destVecType = builder.makeVectorType(builder.getScalarTypeId(typeId), numRows); - std::vector results; - - // do each vector op - for (int c = 0; c < numCols; ++c) { - std::vector indexes; - indexes.push_back(c); - spv::Id srcVec = builder.createCompositeExtract(operand, srcVecType, indexes); - spv::Id destVec = builder.createUnaryOp(op, destVecType, srcVec); - addDecoration(destVec, noContraction); - results.push_back(builder.setPrecision(destVec, precision)); - } - - // put the pieces together - return builder.setPrecision(builder.createCompositeConstruct(typeId, results), precision); -} - -spv::Id TGlslangToSpvTraverser::createConversion(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id destType, spv::Id operand, glslang::TBasicType typeProxy) -{ - spv::Op convOp = spv::OpNop; - spv::Id zero = 0; - spv::Id one = 0; - spv::Id type = 0; - - int vectorSize = builder.isVectorType(destType) ? builder.getNumTypeComponents(destType) : 0; - - switch (op) { - case glslang::EOpConvIntToBool: - case glslang::EOpConvUintToBool: - case glslang::EOpConvInt64ToBool: - case glslang::EOpConvUint64ToBool: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToBool: - case glslang::EOpConvUint16ToBool: -#endif - if (op == glslang::EOpConvInt64ToBool || op == glslang::EOpConvUint64ToBool) - zero = builder.makeUint64Constant(0); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvInt16ToBool || op == glslang::EOpConvUint16ToBool) - zero = builder.makeUint16Constant(0); -#endif - else - zero = builder.makeUintConstant(0); - zero = makeSmearedConstant(zero, vectorSize); - return builder.createBinOp(spv::OpINotEqual, destType, operand, zero); - - case glslang::EOpConvFloatToBool: - zero = builder.makeFloatConstant(0.0F); - zero = makeSmearedConstant(zero, vectorSize); - return builder.createBinOp(spv::OpFOrdNotEqual, destType, operand, zero); - - case glslang::EOpConvDoubleToBool: - zero = builder.makeDoubleConstant(0.0); - zero = makeSmearedConstant(zero, vectorSize); - return builder.createBinOp(spv::OpFOrdNotEqual, destType, operand, zero); - -#ifdef AMD_EXTENSIONS - case glslang::EOpConvFloat16ToBool: - zero = builder.makeFloat16Constant(0.0F); - zero = makeSmearedConstant(zero, vectorSize); - return builder.createBinOp(spv::OpFOrdNotEqual, destType, operand, zero); -#endif - - case glslang::EOpConvBoolToFloat: - convOp = spv::OpSelect; - zero = builder.makeFloatConstant(0.0F); - one = builder.makeFloatConstant(1.0F); - break; - - case glslang::EOpConvBoolToDouble: - convOp = spv::OpSelect; - zero = builder.makeDoubleConstant(0.0); - one = builder.makeDoubleConstant(1.0); - break; - -#ifdef AMD_EXTENSIONS - case glslang::EOpConvBoolToFloat16: - convOp = spv::OpSelect; - zero = builder.makeFloat16Constant(0.0F); - one = builder.makeFloat16Constant(1.0F); - break; -#endif - - case glslang::EOpConvBoolToInt: - case glslang::EOpConvBoolToInt64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvBoolToInt16: -#endif - if (op == glslang::EOpConvBoolToInt64) - zero = builder.makeInt64Constant(0); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvBoolToInt16) - zero = builder.makeInt16Constant(0); -#endif - else - zero = builder.makeIntConstant(0); - - if (op == glslang::EOpConvBoolToInt64) - one = builder.makeInt64Constant(1); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvBoolToInt16) - one = builder.makeInt16Constant(1); -#endif - else - one = builder.makeIntConstant(1); - - convOp = spv::OpSelect; - break; - - case glslang::EOpConvBoolToUint: - case glslang::EOpConvBoolToUint64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvBoolToUint16: -#endif - if (op == glslang::EOpConvBoolToUint64) - zero = builder.makeUint64Constant(0); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvBoolToUint16) - zero = builder.makeUint16Constant(0); -#endif - else - zero = builder.makeUintConstant(0); - - if (op == glslang::EOpConvBoolToUint64) - one = builder.makeUint64Constant(1); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvBoolToUint16) - one = builder.makeUint16Constant(1); -#endif - else - one = builder.makeUintConstant(1); - - convOp = spv::OpSelect; - break; - - case glslang::EOpConvIntToFloat: - case glslang::EOpConvIntToDouble: - case glslang::EOpConvInt64ToFloat: - case glslang::EOpConvInt64ToDouble: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToFloat: - case glslang::EOpConvInt16ToDouble: - case glslang::EOpConvInt16ToFloat16: - case glslang::EOpConvIntToFloat16: - case glslang::EOpConvInt64ToFloat16: -#endif - convOp = spv::OpConvertSToF; - break; - - case glslang::EOpConvUintToFloat: - case glslang::EOpConvUintToDouble: - case glslang::EOpConvUint64ToFloat: - case glslang::EOpConvUint64ToDouble: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUint16ToFloat: - case glslang::EOpConvUint16ToDouble: - case glslang::EOpConvUint16ToFloat16: - case glslang::EOpConvUintToFloat16: - case glslang::EOpConvUint64ToFloat16: -#endif - convOp = spv::OpConvertUToF; - break; - - case glslang::EOpConvDoubleToFloat: - case glslang::EOpConvFloatToDouble: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvDoubleToFloat16: - case glslang::EOpConvFloat16ToDouble: - case glslang::EOpConvFloatToFloat16: - case glslang::EOpConvFloat16ToFloat: -#endif - convOp = spv::OpFConvert; - if (builder.isMatrixType(destType)) - return createUnaryMatrixOperation(convOp, precision, noContraction, destType, operand, typeProxy); - break; - - case glslang::EOpConvFloatToInt: - case glslang::EOpConvDoubleToInt: - case glslang::EOpConvFloatToInt64: - case glslang::EOpConvDoubleToInt64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvFloatToInt16: - case glslang::EOpConvDoubleToInt16: - case glslang::EOpConvFloat16ToInt16: - case glslang::EOpConvFloat16ToInt: - case glslang::EOpConvFloat16ToInt64: -#endif - convOp = spv::OpConvertFToS; - break; - - case glslang::EOpConvUintToInt: - case glslang::EOpConvIntToUint: - case glslang::EOpConvUint64ToInt64: - case glslang::EOpConvInt64ToUint64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUint16ToInt16: - case glslang::EOpConvInt16ToUint16: -#endif - if (builder.isInSpecConstCodeGenMode()) { - // Build zero scalar or vector for OpIAdd. - if (op == glslang::EOpConvUint64ToInt64 || op == glslang::EOpConvInt64ToUint64) - zero = builder.makeUint64Constant(0); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvUint16ToInt16 || op == glslang::EOpConvInt16ToUint16) - zero = builder.makeUint16Constant(0); -#endif - else - zero = builder.makeUintConstant(0); - - zero = makeSmearedConstant(zero, vectorSize); - // Use OpIAdd, instead of OpBitcast to do the conversion when - // generating for OpSpecConstantOp instruction. - return builder.createBinOp(spv::OpIAdd, destType, operand, zero); - } - // For normal run-time conversion instruction, use OpBitcast. - convOp = spv::OpBitcast; - break; - - case glslang::EOpConvFloatToUint: - case glslang::EOpConvDoubleToUint: - case glslang::EOpConvFloatToUint64: - case glslang::EOpConvDoubleToUint64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvFloatToUint16: - case glslang::EOpConvDoubleToUint16: - case glslang::EOpConvFloat16ToUint16: - case glslang::EOpConvFloat16ToUint: - case glslang::EOpConvFloat16ToUint64: -#endif - convOp = spv::OpConvertFToU; - break; - - case glslang::EOpConvIntToInt64: - case glslang::EOpConvInt64ToInt: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvIntToInt16: - case glslang::EOpConvInt16ToInt: - case glslang::EOpConvInt64ToInt16: - case glslang::EOpConvInt16ToInt64: -#endif - convOp = spv::OpSConvert; - break; - - case glslang::EOpConvUintToUint64: - case glslang::EOpConvUint64ToUint: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUintToUint16: - case glslang::EOpConvUint16ToUint: - case glslang::EOpConvUint64ToUint16: - case glslang::EOpConvUint16ToUint64: -#endif - convOp = spv::OpUConvert; - break; - - case glslang::EOpConvIntToUint64: - case glslang::EOpConvInt64ToUint: - case glslang::EOpConvUint64ToInt: - case glslang::EOpConvUintToInt64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToUint: - case glslang::EOpConvUintToInt16: - case glslang::EOpConvInt16ToUint64: - case glslang::EOpConvUint64ToInt16: - case glslang::EOpConvUint16ToInt: - case glslang::EOpConvIntToUint16: - case glslang::EOpConvUint16ToInt64: - case glslang::EOpConvInt64ToUint16: -#endif - // OpSConvert/OpUConvert + OpBitCast - switch (op) { - case glslang::EOpConvIntToUint64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToUint64: -#endif - convOp = spv::OpSConvert; - type = builder.makeIntType(64); - break; - case glslang::EOpConvInt64ToUint: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToUint: -#endif - convOp = spv::OpSConvert; - type = builder.makeIntType(32); - break; - case glslang::EOpConvUint64ToInt: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUint16ToInt: -#endif - convOp = spv::OpUConvert; - type = builder.makeUintType(32); - break; - case glslang::EOpConvUintToInt64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUint16ToInt64: -#endif - convOp = spv::OpUConvert; - type = builder.makeUintType(64); - break; -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUintToInt16: - case glslang::EOpConvUint64ToInt16: - convOp = spv::OpUConvert; - type = builder.makeUintType(16); - break; - case glslang::EOpConvIntToUint16: - case glslang::EOpConvInt64ToUint16: - convOp = spv::OpSConvert; - type = builder.makeIntType(16); - break; -#endif - default: - assert(0); - break; - } - - if (vectorSize > 0) - type = builder.makeVectorType(type, vectorSize); - - operand = builder.createUnaryOp(convOp, type, operand); - - if (builder.isInSpecConstCodeGenMode()) { - // Build zero scalar or vector for OpIAdd. -#ifdef AMD_EXTENSIONS - if (op == glslang::EOpConvIntToUint64 || op == glslang::EOpConvUintToInt64 || - op == glslang::EOpConvInt16ToUint64 || op == glslang::EOpConvUint16ToInt64) - zero = builder.makeUint64Constant(0); - else if (op == glslang::EOpConvIntToUint16 || op == glslang::EOpConvUintToInt16 || - op == glslang::EOpConvInt64ToUint16 || op == glslang::EOpConvUint64ToInt16) - zero = builder.makeUint16Constant(0); - else - zero = builder.makeUintConstant(0); -#else - if (op == glslang::EOpConvIntToUint64 || op == glslang::EOpConvUintToInt64) - zero = builder.makeUint64Constant(0); - else - zero = builder.makeUintConstant(0); -#endif - - zero = makeSmearedConstant(zero, vectorSize); - // Use OpIAdd, instead of OpBitcast to do the conversion when - // generating for OpSpecConstantOp instruction. - return builder.createBinOp(spv::OpIAdd, destType, operand, zero); - } - // For normal run-time conversion instruction, use OpBitcast. - convOp = spv::OpBitcast; - break; - default: - break; - } - - spv::Id result = 0; - if (convOp == spv::OpNop) - return result; - - if (convOp == spv::OpSelect) { - zero = makeSmearedConstant(zero, vectorSize); - one = makeSmearedConstant(one, vectorSize); - result = builder.createTriOp(convOp, destType, operand, one, zero); - } else - result = builder.createUnaryOp(convOp, destType, operand); - - return builder.setPrecision(result, precision); -} - -spv::Id TGlslangToSpvTraverser::makeSmearedConstant(spv::Id constant, int vectorSize) -{ - if (vectorSize == 0) - return constant; - - spv::Id vectorTypeId = builder.makeVectorType(builder.getTypeId(constant), vectorSize); - std::vector components; - for (int c = 0; c < vectorSize; ++c) - components.push_back(constant); - return builder.makeCompositeConstant(vectorTypeId, components); -} - -// For glslang ops that map to SPV atomic opCodes -spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv::Decoration /*precision*/, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy) -{ - spv::Op opCode = spv::OpNop; - - switch (op) { - case glslang::EOpAtomicAdd: - case glslang::EOpImageAtomicAdd: - case glslang::EOpAtomicCounterAdd: - opCode = spv::OpAtomicIAdd; - break; - case glslang::EOpAtomicCounterSubtract: - opCode = spv::OpAtomicISub; - break; - case glslang::EOpAtomicMin: - case glslang::EOpImageAtomicMin: - case glslang::EOpAtomicCounterMin: - opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMin : spv::OpAtomicSMin; - break; - case glslang::EOpAtomicMax: - case glslang::EOpImageAtomicMax: - case glslang::EOpAtomicCounterMax: - opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMax : spv::OpAtomicSMax; - break; - case glslang::EOpAtomicAnd: - case glslang::EOpImageAtomicAnd: - case glslang::EOpAtomicCounterAnd: - opCode = spv::OpAtomicAnd; - break; - case glslang::EOpAtomicOr: - case glslang::EOpImageAtomicOr: - case glslang::EOpAtomicCounterOr: - opCode = spv::OpAtomicOr; - break; - case glslang::EOpAtomicXor: - case glslang::EOpImageAtomicXor: - case glslang::EOpAtomicCounterXor: - opCode = spv::OpAtomicXor; - break; - case glslang::EOpAtomicExchange: - case glslang::EOpImageAtomicExchange: - case glslang::EOpAtomicCounterExchange: - opCode = spv::OpAtomicExchange; - break; - case glslang::EOpAtomicCompSwap: - case glslang::EOpImageAtomicCompSwap: - case glslang::EOpAtomicCounterCompSwap: - opCode = spv::OpAtomicCompareExchange; - break; - case glslang::EOpAtomicCounterIncrement: - opCode = spv::OpAtomicIIncrement; - break; - case glslang::EOpAtomicCounterDecrement: - opCode = spv::OpAtomicIDecrement; - break; - case glslang::EOpAtomicCounter: - opCode = spv::OpAtomicLoad; - break; - default: - assert(0); - break; - } - - if (typeProxy == glslang::EbtInt64 || typeProxy == glslang::EbtUint64) - builder.addCapability(spv::CapabilityInt64Atomics); - - // Sort out the operands - // - mapping from glslang -> SPV - // - there are extra SPV operands with no glslang source - // - compare-exchange swaps the value and comparator - // - compare-exchange has an extra memory semantics - // - EOpAtomicCounterDecrement needs a post decrement - std::vector spvAtomicOperands; // hold the spv operands - auto opIt = operands.begin(); // walk the glslang operands - spvAtomicOperands.push_back(*(opIt++)); - spvAtomicOperands.push_back(builder.makeUintConstant(spv::ScopeDevice)); // TBD: what is the correct scope? - spvAtomicOperands.push_back(builder.makeUintConstant(spv::MemorySemanticsMaskNone)); // TBD: what are the correct memory semantics? - if (opCode == spv::OpAtomicCompareExchange) { - // There are 2 memory semantics for compare-exchange. And the operand order of "comparator" and "new value" in GLSL - // differs from that in SPIR-V. Hence, special processing is required. - spvAtomicOperands.push_back(builder.makeUintConstant(spv::MemorySemanticsMaskNone)); - spvAtomicOperands.push_back(*(opIt + 1)); - spvAtomicOperands.push_back(*opIt); - opIt += 2; - } - - // Add the rest of the operands, skipping any that were dealt with above. - for (; opIt != operands.end(); ++opIt) - spvAtomicOperands.push_back(*opIt); - - spv::Id resultId = builder.createOp(opCode, typeId, spvAtomicOperands); - - // GLSL and HLSL atomic-counter decrement return post-decrement value, - // while SPIR-V returns pre-decrement value. Translate between these semantics. - if (op == glslang::EOpAtomicCounterDecrement) - resultId = builder.createBinOp(spv::OpISub, typeId, resultId, builder.makeIntConstant(1)); - - return resultId; -} - -// Create group invocation operations. -spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy) -{ -#ifdef AMD_EXTENSIONS - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble || typeProxy == glslang::EbtFloat16; -#endif - - spv::Op opCode = spv::OpNop; - std::vector spvGroupOperands; - spv::GroupOperation groupOperation = spv::GroupOperationMax; - - if (op == glslang::EOpBallot || op == glslang::EOpReadFirstInvocation || - op == glslang::EOpReadInvocation) { - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - } else if (op == glslang::EOpAnyInvocation || - op == glslang::EOpAllInvocations || - op == glslang::EOpAllInvocationsEqual) { - builder.addExtension(spv::E_SPV_KHR_subgroup_vote); - builder.addCapability(spv::CapabilitySubgroupVoteKHR); - } else { - builder.addCapability(spv::CapabilityGroups); -#ifdef AMD_EXTENSIONS - if (op == glslang::EOpMinInvocationsNonUniform || - op == glslang::EOpMaxInvocationsNonUniform || - op == glslang::EOpAddInvocationsNonUniform || - op == glslang::EOpMinInvocationsInclusiveScanNonUniform || - op == glslang::EOpMaxInvocationsInclusiveScanNonUniform || - op == glslang::EOpAddInvocationsInclusiveScanNonUniform || - op == glslang::EOpMinInvocationsExclusiveScanNonUniform || - op == glslang::EOpMaxInvocationsExclusiveScanNonUniform || - op == glslang::EOpAddInvocationsExclusiveScanNonUniform) - builder.addExtension(spv::E_SPV_AMD_shader_ballot); -#endif - - spvGroupOperands.push_back(builder.makeUintConstant(spv::ScopeSubgroup)); -#ifdef AMD_EXTENSIONS - switch (op) { - case glslang::EOpMinInvocations: - case glslang::EOpMaxInvocations: - case glslang::EOpAddInvocations: - case glslang::EOpMinInvocationsNonUniform: - case glslang::EOpMaxInvocationsNonUniform: - case glslang::EOpAddInvocationsNonUniform: - groupOperation = spv::GroupOperationReduce; - spvGroupOperands.push_back(groupOperation); - break; - case glslang::EOpMinInvocationsInclusiveScan: - case glslang::EOpMaxInvocationsInclusiveScan: - case glslang::EOpAddInvocationsInclusiveScan: - case glslang::EOpMinInvocationsInclusiveScanNonUniform: - case glslang::EOpMaxInvocationsInclusiveScanNonUniform: - case glslang::EOpAddInvocationsInclusiveScanNonUniform: - groupOperation = spv::GroupOperationInclusiveScan; - spvGroupOperands.push_back(groupOperation); - break; - case glslang::EOpMinInvocationsExclusiveScan: - case glslang::EOpMaxInvocationsExclusiveScan: - case glslang::EOpAddInvocationsExclusiveScan: - case glslang::EOpMinInvocationsExclusiveScanNonUniform: - case glslang::EOpMaxInvocationsExclusiveScanNonUniform: - case glslang::EOpAddInvocationsExclusiveScanNonUniform: - groupOperation = spv::GroupOperationExclusiveScan; - spvGroupOperands.push_back(groupOperation); - break; - default: - break; - } -#endif - } - - for (auto opIt = operands.begin(); opIt != operands.end(); ++opIt) - spvGroupOperands.push_back(*opIt); - - switch (op) { - case glslang::EOpAnyInvocation: - opCode = spv::OpSubgroupAnyKHR; - break; - case glslang::EOpAllInvocations: - opCode = spv::OpSubgroupAllKHR; - break; - case glslang::EOpAllInvocationsEqual: - opCode = spv::OpSubgroupAllEqualKHR; - break; - case glslang::EOpReadInvocation: - opCode = spv::OpSubgroupReadInvocationKHR; - if (builder.isVectorType(typeId)) - return CreateInvocationsVectorOperation(opCode, groupOperation, typeId, operands); - break; - case glslang::EOpReadFirstInvocation: - opCode = spv::OpSubgroupFirstInvocationKHR; - break; - case glslang::EOpBallot: - { - // NOTE: According to the spec, the result type of "OpSubgroupBallotKHR" must be a 4 component vector of 32 - // bit integer types. The GLSL built-in function "ballotARB()" assumes the maximum number of invocations in - // a subgroup is 64. Thus, we have to convert uvec4.xy to uint64_t as follow: - // - // result = Bitcast(SubgroupBallotKHR(Predicate).xy) - // - spv::Id uintType = builder.makeUintType(32); - spv::Id uvec4Type = builder.makeVectorType(uintType, 4); - spv::Id result = builder.createOp(spv::OpSubgroupBallotKHR, uvec4Type, spvGroupOperands); - - std::vector components; - components.push_back(builder.createCompositeExtract(result, uintType, 0)); - components.push_back(builder.createCompositeExtract(result, uintType, 1)); - - spv::Id uvec2Type = builder.makeVectorType(uintType, 2); - return builder.createUnaryOp(spv::OpBitcast, typeId, - builder.createCompositeConstruct(uvec2Type, components)); - } - -#ifdef AMD_EXTENSIONS - case glslang::EOpMinInvocations: - case glslang::EOpMaxInvocations: - case glslang::EOpAddInvocations: - case glslang::EOpMinInvocationsInclusiveScan: - case glslang::EOpMaxInvocationsInclusiveScan: - case glslang::EOpAddInvocationsInclusiveScan: - case glslang::EOpMinInvocationsExclusiveScan: - case glslang::EOpMaxInvocationsExclusiveScan: - case glslang::EOpAddInvocationsExclusiveScan: - if (op == glslang::EOpMinInvocations || - op == glslang::EOpMinInvocationsInclusiveScan || - op == glslang::EOpMinInvocationsExclusiveScan) { - if (isFloat) - opCode = spv::OpGroupFMin; - else { - if (isUnsigned) - opCode = spv::OpGroupUMin; - else - opCode = spv::OpGroupSMin; - } - } else if (op == glslang::EOpMaxInvocations || - op == glslang::EOpMaxInvocationsInclusiveScan || - op == glslang::EOpMaxInvocationsExclusiveScan) { - if (isFloat) - opCode = spv::OpGroupFMax; - else { - if (isUnsigned) - opCode = spv::OpGroupUMax; - else - opCode = spv::OpGroupSMax; - } - } else { - if (isFloat) - opCode = spv::OpGroupFAdd; - else - opCode = spv::OpGroupIAdd; - } - - if (builder.isVectorType(typeId)) - return CreateInvocationsVectorOperation(opCode, groupOperation, typeId, operands); - - break; - case glslang::EOpMinInvocationsNonUniform: - case glslang::EOpMaxInvocationsNonUniform: - case glslang::EOpAddInvocationsNonUniform: - case glslang::EOpMinInvocationsInclusiveScanNonUniform: - case glslang::EOpMaxInvocationsInclusiveScanNonUniform: - case glslang::EOpAddInvocationsInclusiveScanNonUniform: - case glslang::EOpMinInvocationsExclusiveScanNonUniform: - case glslang::EOpMaxInvocationsExclusiveScanNonUniform: - case glslang::EOpAddInvocationsExclusiveScanNonUniform: - if (op == glslang::EOpMinInvocationsNonUniform || - op == glslang::EOpMinInvocationsInclusiveScanNonUniform || - op == glslang::EOpMinInvocationsExclusiveScanNonUniform) { - if (isFloat) - opCode = spv::OpGroupFMinNonUniformAMD; - else { - if (isUnsigned) - opCode = spv::OpGroupUMinNonUniformAMD; - else - opCode = spv::OpGroupSMinNonUniformAMD; - } - } - else if (op == glslang::EOpMaxInvocationsNonUniform || - op == glslang::EOpMaxInvocationsInclusiveScanNonUniform || - op == glslang::EOpMaxInvocationsExclusiveScanNonUniform) { - if (isFloat) - opCode = spv::OpGroupFMaxNonUniformAMD; - else { - if (isUnsigned) - opCode = spv::OpGroupUMaxNonUniformAMD; - else - opCode = spv::OpGroupSMaxNonUniformAMD; - } - } - else { - if (isFloat) - opCode = spv::OpGroupFAddNonUniformAMD; - else - opCode = spv::OpGroupIAddNonUniformAMD; - } - - if (builder.isVectorType(typeId)) - return CreateInvocationsVectorOperation(opCode, groupOperation, typeId, operands); - - break; -#endif - default: - logger->missingFunctionality("invocation operation"); - return spv::NoResult; - } - - assert(opCode != spv::OpNop); - return builder.createOp(opCode, typeId, spvGroupOperands); -} - -// Create group invocation operations on a vector -spv::Id TGlslangToSpvTraverser::CreateInvocationsVectorOperation(spv::Op op, spv::GroupOperation groupOperation, spv::Id typeId, std::vector& operands) -{ -#ifdef AMD_EXTENSIONS - assert(op == spv::OpGroupFMin || op == spv::OpGroupUMin || op == spv::OpGroupSMin || - op == spv::OpGroupFMax || op == spv::OpGroupUMax || op == spv::OpGroupSMax || - op == spv::OpGroupFAdd || op == spv::OpGroupIAdd || op == spv::OpGroupBroadcast || - op == spv::OpSubgroupReadInvocationKHR || - op == spv::OpGroupFMinNonUniformAMD || op == spv::OpGroupUMinNonUniformAMD || op == spv::OpGroupSMinNonUniformAMD || - op == spv::OpGroupFMaxNonUniformAMD || op == spv::OpGroupUMaxNonUniformAMD || op == spv::OpGroupSMaxNonUniformAMD || - op == spv::OpGroupFAddNonUniformAMD || op == spv::OpGroupIAddNonUniformAMD); -#else - assert(op == spv::OpGroupFMin || op == spv::OpGroupUMin || op == spv::OpGroupSMin || - op == spv::OpGroupFMax || op == spv::OpGroupUMax || op == spv::OpGroupSMax || - op == spv::OpGroupFAdd || op == spv::OpGroupIAdd || op == spv::OpGroupBroadcast || - op == spv::OpSubgroupReadInvocationKHR); -#endif - - // Handle group invocation operations scalar by scalar. - // The result type is the same type as the original type. - // The algorithm is to: - // - break the vector into scalars - // - apply the operation to each scalar - // - make a vector out the scalar results - - // get the types sorted out - int numComponents = builder.getNumComponents(operands[0]); - spv::Id scalarType = builder.getScalarTypeId(builder.getTypeId(operands[0])); - std::vector results; - - // do each scalar op - for (int comp = 0; comp < numComponents; ++comp) { - std::vector indexes; - indexes.push_back(comp); - spv::Id scalar = builder.createCompositeExtract(operands[0], scalarType, indexes); - std::vector spvGroupOperands; - if (op == spv::OpSubgroupReadInvocationKHR) { - spvGroupOperands.push_back(scalar); - spvGroupOperands.push_back(operands[1]); - } else if (op == spv::OpGroupBroadcast) { - spvGroupOperands.push_back(builder.makeUintConstant(spv::ScopeSubgroup)); - spvGroupOperands.push_back(scalar); - spvGroupOperands.push_back(operands[1]); - } else { - spvGroupOperands.push_back(builder.makeUintConstant(spv::ScopeSubgroup)); - spvGroupOperands.push_back(groupOperation); - spvGroupOperands.push_back(scalar); - } - - results.push_back(builder.createOp(op, scalarType, spvGroupOperands)); - } - - // put the pieces together - return builder.createCompositeConstruct(typeId, results); -} - -spv::Id TGlslangToSpvTraverser::createMiscOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy) -{ -#ifdef AMD_EXTENSIONS - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64 || typeProxy == glslang::EbtUint16; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble || typeProxy == glslang::EbtFloat16; -#else - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble; -#endif - - spv::Op opCode = spv::OpNop; - int extBuiltins = -1; - int libCall = -1; - size_t consumedOperands = operands.size(); - spv::Id typeId0 = 0; - if (consumedOperands > 0) - typeId0 = builder.getTypeId(operands[0]); - spv::Id typeId1 = 0; - if (consumedOperands > 1) - typeId1 = builder.getTypeId(operands[1]); - spv::Id frexpIntType = 0; - - switch (op) { - case glslang::EOpMin: - if (isFloat) - libCall = spv::GLSLstd450FMin; - else if (isUnsigned) - libCall = spv::GLSLstd450UMin; - else - libCall = spv::GLSLstd450SMin; - builder.promoteScalar(precision, operands.front(), operands.back()); - break; - case glslang::EOpModf: - libCall = spv::GLSLstd450Modf; - break; - case glslang::EOpMax: - if (isFloat) - libCall = spv::GLSLstd450FMax; - else if (isUnsigned) - libCall = spv::GLSLstd450UMax; - else - libCall = spv::GLSLstd450SMax; - builder.promoteScalar(precision, operands.front(), operands.back()); - break; - case glslang::EOpPow: - libCall = spv::GLSLstd450Pow; - break; - case glslang::EOpDot: - opCode = spv::OpDot; - break; - case glslang::EOpAtan: - libCall = spv::GLSLstd450Atan2; - break; - - case glslang::EOpClamp: - if (isFloat) - libCall = spv::GLSLstd450FClamp; - else if (isUnsigned) - libCall = spv::GLSLstd450UClamp; - else - libCall = spv::GLSLstd450SClamp; - builder.promoteScalar(precision, operands.front(), operands[1]); - builder.promoteScalar(precision, operands.front(), operands[2]); - break; - case glslang::EOpMix: - if (! builder.isBoolType(builder.getScalarTypeId(builder.getTypeId(operands.back())))) { - assert(isFloat); - libCall = spv::GLSLstd450FMix; - } else { - opCode = spv::OpSelect; - std::swap(operands.front(), operands.back()); - } - builder.promoteScalar(precision, operands.front(), operands.back()); - break; - case glslang::EOpStep: - libCall = spv::GLSLstd450Step; - builder.promoteScalar(precision, operands.front(), operands.back()); - break; - case glslang::EOpSmoothStep: - libCall = spv::GLSLstd450SmoothStep; - builder.promoteScalar(precision, operands[0], operands[2]); - builder.promoteScalar(precision, operands[1], operands[2]); - break; - - case glslang::EOpDistance: - libCall = spv::GLSLstd450Distance; - break; - case glslang::EOpCross: - libCall = spv::GLSLstd450Cross; - break; - case glslang::EOpFaceForward: - libCall = spv::GLSLstd450FaceForward; - break; - case glslang::EOpReflect: - libCall = spv::GLSLstd450Reflect; - break; - case glslang::EOpRefract: - libCall = spv::GLSLstd450Refract; - break; - case glslang::EOpInterpolateAtSample: - builder.addCapability(spv::CapabilityInterpolationFunction); - libCall = spv::GLSLstd450InterpolateAtSample; - break; - case glslang::EOpInterpolateAtOffset: - builder.addCapability(spv::CapabilityInterpolationFunction); - libCall = spv::GLSLstd450InterpolateAtOffset; - break; - case glslang::EOpAddCarry: - opCode = spv::OpIAddCarry; - typeId = builder.makeStructResultType(typeId0, typeId0); - consumedOperands = 2; - break; - case glslang::EOpSubBorrow: - opCode = spv::OpISubBorrow; - typeId = builder.makeStructResultType(typeId0, typeId0); - consumedOperands = 2; - break; - case glslang::EOpUMulExtended: - opCode = spv::OpUMulExtended; - typeId = builder.makeStructResultType(typeId0, typeId0); - consumedOperands = 2; - break; - case glslang::EOpIMulExtended: - opCode = spv::OpSMulExtended; - typeId = builder.makeStructResultType(typeId0, typeId0); - consumedOperands = 2; - break; - case glslang::EOpBitfieldExtract: - if (isUnsigned) - opCode = spv::OpBitFieldUExtract; - else - opCode = spv::OpBitFieldSExtract; - break; - case glslang::EOpBitfieldInsert: - opCode = spv::OpBitFieldInsert; - break; - - case glslang::EOpFma: - libCall = spv::GLSLstd450Fma; - break; - case glslang::EOpFrexp: - { - libCall = spv::GLSLstd450FrexpStruct; - assert(builder.isPointerType(typeId1)); - typeId1 = builder.getContainedTypeId(typeId1); -#ifdef AMD_EXTENSIONS - int width = builder.getScalarTypeWidth(typeId1); -#else - int width = 32; -#endif - if (builder.getNumComponents(operands[0]) == 1) - frexpIntType = builder.makeIntegerType(width, true); - else - frexpIntType = builder.makeVectorType(builder.makeIntegerType(width, true), builder.getNumComponents(operands[0])); - typeId = builder.makeStructResultType(typeId0, frexpIntType); - consumedOperands = 1; - } - break; - case glslang::EOpLdexp: - libCall = spv::GLSLstd450Ldexp; - break; - - case glslang::EOpReadInvocation: - return createInvocationsOperation(op, typeId, operands, typeProxy); - -#ifdef AMD_EXTENSIONS - case glslang::EOpSwizzleInvocations: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_ballot); - libCall = spv::SwizzleInvocationsAMD; - break; - case glslang::EOpSwizzleInvocationsMasked: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_ballot); - libCall = spv::SwizzleInvocationsMaskedAMD; - break; - case glslang::EOpWriteInvocation: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_ballot); - libCall = spv::WriteInvocationAMD; - break; - - case glslang::EOpMin3: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_trinary_minmax); - if (isFloat) - libCall = spv::FMin3AMD; - else { - if (isUnsigned) - libCall = spv::UMin3AMD; - else - libCall = spv::SMin3AMD; - } - break; - case glslang::EOpMax3: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_trinary_minmax); - if (isFloat) - libCall = spv::FMax3AMD; - else { - if (isUnsigned) - libCall = spv::UMax3AMD; - else - libCall = spv::SMax3AMD; - } - break; - case glslang::EOpMid3: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_trinary_minmax); - if (isFloat) - libCall = spv::FMid3AMD; - else { - if (isUnsigned) - libCall = spv::UMid3AMD; - else - libCall = spv::SMid3AMD; - } - break; - - case glslang::EOpInterpolateAtVertex: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - libCall = spv::InterpolateAtVertexAMD; - break; -#endif - - default: - return 0; - } - - spv::Id id = 0; - if (libCall >= 0) { - // Use an extended instruction from the standard library. - // Construct the call arguments, without modifying the original operands vector. - // We might need the remaining arguments, e.g. in the EOpFrexp case. - std::vector callArguments(operands.begin(), operands.begin() + consumedOperands); - id = builder.createBuiltinCall(typeId, extBuiltins >= 0 ? extBuiltins : stdBuiltins, libCall, callArguments); - } else { - switch (consumedOperands) { - case 0: - // should all be handled by visitAggregate and createNoArgOperation - assert(0); - return 0; - case 1: - // should all be handled by createUnaryOperation - assert(0); - return 0; - case 2: - id = builder.createBinOp(opCode, typeId, operands[0], operands[1]); - break; - default: - // anything 3 or over doesn't have l-value operands, so all should be consumed - assert(consumedOperands == operands.size()); - id = builder.createOp(opCode, typeId, operands); - break; - } - } - - // Decode the return types that were structures - switch (op) { - case glslang::EOpAddCarry: - case glslang::EOpSubBorrow: - builder.createStore(builder.createCompositeExtract(id, typeId0, 1), operands[2]); - id = builder.createCompositeExtract(id, typeId0, 0); - break; - case glslang::EOpUMulExtended: - case glslang::EOpIMulExtended: - builder.createStore(builder.createCompositeExtract(id, typeId0, 0), operands[3]); - builder.createStore(builder.createCompositeExtract(id, typeId0, 1), operands[2]); - break; - case glslang::EOpFrexp: - { - assert(operands.size() == 2); - if (builder.isFloatType(builder.getScalarTypeId(typeId1))) { - // "exp" is floating-point type (from HLSL intrinsic) - spv::Id member1 = builder.createCompositeExtract(id, frexpIntType, 1); - member1 = builder.createUnaryOp(spv::OpConvertSToF, typeId1, member1); - builder.createStore(member1, operands[1]); - } else - // "exp" is integer type (from GLSL built-in function) - builder.createStore(builder.createCompositeExtract(id, frexpIntType, 1), operands[1]); - id = builder.createCompositeExtract(id, typeId0, 0); - } - break; - default: - break; - } - - return builder.setPrecision(id, precision); -} - -// Intrinsics with no arguments (or no return value, and no precision). -spv::Id TGlslangToSpvTraverser::createNoArgOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId) -{ - // TODO: get the barrier operands correct - - switch (op) { - case glslang::EOpEmitVertex: - builder.createNoResultOp(spv::OpEmitVertex); - return 0; - case glslang::EOpEndPrimitive: - builder.createNoResultOp(spv::OpEndPrimitive); - return 0; - case glslang::EOpBarrier: - if (glslangIntermediate->getStage() == EShLangTessControl) { - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeInvocation, spv::MemorySemanticsMaskNone); - // TODO: prefer the following, when available: - // builder.createControlBarrier(spv::ScopePatch, spv::ScopePatch, - // spv::MemorySemanticsPatchMask | - // spv::MemorySemanticsAcquireReleaseMask); - } else { - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup, - spv::MemorySemanticsWorkgroupMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - } - return 0; - case glslang::EOpMemoryBarrier: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAllMemory | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpMemoryBarrierAtomicCounter: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAtomicCounterMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpMemoryBarrierBuffer: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpMemoryBarrierImage: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsImageMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpMemoryBarrierShared: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsWorkgroupMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpGroupMemoryBarrier: - builder.createMemoryBarrier(spv::ScopeWorkgroup, spv::MemorySemanticsAllMemory | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpAllMemoryBarrierWithGroupSync: - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeDevice, - spv::MemorySemanticsAllMemory | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpDeviceMemoryBarrier: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask | - spv::MemorySemanticsImageMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpDeviceMemoryBarrierWithGroupSync: - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask | - spv::MemorySemanticsImageMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpWorkgroupMemoryBarrier: - builder.createMemoryBarrier(spv::ScopeWorkgroup, spv::MemorySemanticsWorkgroupMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpWorkgroupMemoryBarrierWithGroupSync: - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup, - spv::MemorySemanticsWorkgroupMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; -#ifdef AMD_EXTENSIONS - case glslang::EOpTime: - { - std::vector args; // Dummy arguments - spv::Id id = builder.createBuiltinCall(typeId, getExtBuiltins(spv::E_SPV_AMD_gcn_shader), spv::TimeAMD, args); - return builder.setPrecision(id, precision); - } -#endif - default: - logger->missingFunctionality("unknown operation with no arguments"); - return 0; - } -} - -spv::Id TGlslangToSpvTraverser::getSymbolId(const glslang::TIntermSymbol* symbol) -{ - auto iter = symbolValues.find(symbol->getId()); - spv::Id id; - if (symbolValues.end() != iter) { - id = iter->second; - return id; - } - - // it was not found, create it - id = createSpvVariable(symbol); - symbolValues[symbol->getId()] = id; - - if (symbol->getBasicType() != glslang::EbtBlock) { - addDecoration(id, TranslatePrecisionDecoration(symbol->getType())); - addDecoration(id, TranslateInterpolationDecoration(symbol->getType().getQualifier())); - addDecoration(id, TranslateAuxiliaryStorageDecoration(symbol->getType().getQualifier())); - if (symbol->getType().getQualifier().hasSpecConstantId()) - addDecoration(id, spv::DecorationSpecId, symbol->getType().getQualifier().layoutSpecConstantId); - if (symbol->getQualifier().hasIndex()) - builder.addDecoration(id, spv::DecorationIndex, symbol->getQualifier().layoutIndex); - if (symbol->getQualifier().hasComponent()) - builder.addDecoration(id, spv::DecorationComponent, symbol->getQualifier().layoutComponent); - // atomic counters use this: - if (symbol->getQualifier().hasOffset()) - builder.addDecoration(id, spv::DecorationOffset, symbol->getQualifier().layoutOffset); - } - - if (symbol->getQualifier().hasLocation()) - builder.addDecoration(id, spv::DecorationLocation, symbol->getQualifier().layoutLocation); - addDecoration(id, TranslateInvariantDecoration(symbol->getType().getQualifier())); - if (symbol->getQualifier().hasStream() && glslangIntermediate->isMultiStream()) { - builder.addCapability(spv::CapabilityGeometryStreams); - builder.addDecoration(id, spv::DecorationStream, symbol->getQualifier().layoutStream); - } - if (symbol->getQualifier().hasSet()) - builder.addDecoration(id, spv::DecorationDescriptorSet, symbol->getQualifier().layoutSet); - else if (IsDescriptorResource(symbol->getType())) { - // default to 0 - builder.addDecoration(id, spv::DecorationDescriptorSet, 0); - } - if (symbol->getQualifier().hasBinding()) - builder.addDecoration(id, spv::DecorationBinding, symbol->getQualifier().layoutBinding); - if (symbol->getQualifier().hasAttachment()) - builder.addDecoration(id, spv::DecorationInputAttachmentIndex, symbol->getQualifier().layoutAttachment); - if (glslangIntermediate->getXfbMode()) { - builder.addCapability(spv::CapabilityTransformFeedback); - if (symbol->getQualifier().hasXfbStride()) - builder.addDecoration(id, spv::DecorationXfbStride, symbol->getQualifier().layoutXfbStride); - if (symbol->getQualifier().hasXfbBuffer()) { - builder.addDecoration(id, spv::DecorationXfbBuffer, symbol->getQualifier().layoutXfbBuffer); - unsigned stride = glslangIntermediate->getXfbStride(symbol->getQualifier().layoutXfbBuffer); - if (stride != glslang::TQualifier::layoutXfbStrideEnd) - builder.addDecoration(id, spv::DecorationXfbStride, stride); - } - if (symbol->getQualifier().hasXfbOffset()) - builder.addDecoration(id, spv::DecorationOffset, symbol->getQualifier().layoutXfbOffset); - } - - if (symbol->getType().isImage()) { - std::vector memory; - TranslateMemoryDecoration(symbol->getType().getQualifier(), memory); - for (unsigned int i = 0; i < memory.size(); ++i) - addDecoration(id, memory[i]); - } - - // built-in variable decorations - spv::BuiltIn builtIn = TranslateBuiltInDecoration(symbol->getQualifier().builtIn, false); - if (builtIn != spv::BuiltInMax) - addDecoration(id, spv::DecorationBuiltIn, (int)builtIn); - -#ifdef NV_EXTENSIONS - if (builtIn == spv::BuiltInSampleMask) { - spv::Decoration decoration; - // GL_NV_sample_mask_override_coverage extension - if (glslangIntermediate->getLayoutOverrideCoverage()) - decoration = (spv::Decoration)spv::DecorationOverrideCoverageNV; - else - decoration = (spv::Decoration)spv::DecorationMax; - addDecoration(id, decoration); - if (decoration != spv::DecorationMax) { - builder.addExtension(spv::E_SPV_NV_sample_mask_override_coverage); - } - } - else if (builtIn == spv::BuiltInLayer) { - // SPV_NV_viewport_array2 extension - if (symbol->getQualifier().layoutViewportRelative) { - addDecoration(id, (spv::Decoration)spv::DecorationViewportRelativeNV); - builder.addCapability(spv::CapabilityShaderViewportMaskNV); - builder.addExtension(spv::E_SPV_NV_viewport_array2); - } - if (symbol->getQualifier().layoutSecondaryViewportRelativeOffset != -2048) { - addDecoration(id, (spv::Decoration)spv::DecorationSecondaryViewportRelativeNV, symbol->getQualifier().layoutSecondaryViewportRelativeOffset); - builder.addCapability(spv::CapabilityShaderStereoViewNV); - builder.addExtension(spv::E_SPV_NV_stereo_view_rendering); - } - } - - if (symbol->getQualifier().layoutPassthrough) { - addDecoration(id, spv::DecorationPassthroughNV); - builder.addCapability(spv::CapabilityGeometryShaderPassthroughNV); - builder.addExtension(spv::E_SPV_NV_geometry_shader_passthrough); - } -#endif - - return id; -} - -// If 'dec' is valid, add no-operand decoration to an object -void TGlslangToSpvTraverser::addDecoration(spv::Id id, spv::Decoration dec) -{ - if (dec != spv::DecorationMax) - builder.addDecoration(id, dec); -} - -// If 'dec' is valid, add a one-operand decoration to an object -void TGlslangToSpvTraverser::addDecoration(spv::Id id, spv::Decoration dec, unsigned value) -{ - if (dec != spv::DecorationMax) - builder.addDecoration(id, dec, value); -} - -// If 'dec' is valid, add a no-operand decoration to a struct member -void TGlslangToSpvTraverser::addMemberDecoration(spv::Id id, int member, spv::Decoration dec) -{ - if (dec != spv::DecorationMax) - builder.addMemberDecoration(id, (unsigned)member, dec); -} - -// If 'dec' is valid, add a one-operand decoration to a struct member -void TGlslangToSpvTraverser::addMemberDecoration(spv::Id id, int member, spv::Decoration dec, unsigned value) -{ - if (dec != spv::DecorationMax) - builder.addMemberDecoration(id, (unsigned)member, dec, value); -} - -// Make a full tree of instructions to build a SPIR-V specialization constant, -// or regular constant if possible. -// -// TBD: this is not yet done, nor verified to be the best design, it does do the leaf symbols though -// -// Recursively walk the nodes. The nodes form a tree whose leaves are -// regular constants, which themselves are trees that createSpvConstant() -// recursively walks. So, this function walks the "top" of the tree: -// - emit specialization constant-building instructions for specConstant -// - when running into a non-spec-constant, switch to createSpvConstant() -spv::Id TGlslangToSpvTraverser::createSpvConstant(const glslang::TIntermTyped& node) -{ - assert(node.getQualifier().isConstant()); - - // Handle front-end constants first (non-specialization constants). - if (! node.getQualifier().specConstant) { - // hand off to the non-spec-constant path - assert(node.getAsConstantUnion() != nullptr || node.getAsSymbolNode() != nullptr); - int nextConst = 0; - return createSpvConstantFromConstUnionArray(node.getType(), node.getAsConstantUnion() ? node.getAsConstantUnion()->getConstArray() : node.getAsSymbolNode()->getConstArray(), - nextConst, false); - } - - // We now know we have a specialization constant to build - - // gl_WorkGroupSize is a special case until the front-end handles hierarchical specialization constants, - // even then, it's specialization ids are handled by special case syntax in GLSL: layout(local_size_x = ... - if (node.getType().getQualifier().builtIn == glslang::EbvWorkGroupSize) { - std::vector dimConstId; - for (int dim = 0; dim < 3; ++dim) { - bool specConst = (glslangIntermediate->getLocalSizeSpecId(dim) != glslang::TQualifier::layoutNotSet); - dimConstId.push_back(builder.makeUintConstant(glslangIntermediate->getLocalSize(dim), specConst)); - if (specConst) - addDecoration(dimConstId.back(), spv::DecorationSpecId, glslangIntermediate->getLocalSizeSpecId(dim)); - } - return builder.makeCompositeConstant(builder.makeVectorType(builder.makeUintType(32), 3), dimConstId, true); - } - - // An AST node labelled as specialization constant should be a symbol node. - // Its initializer should either be a sub tree with constant nodes, or a constant union array. - if (auto* sn = node.getAsSymbolNode()) { - if (auto* sub_tree = sn->getConstSubtree()) { - // Traverse the constant constructor sub tree like generating normal run-time instructions. - // During the AST traversal, if the node is marked as 'specConstant', SpecConstantOpModeGuard - // will set the builder into spec constant op instruction generating mode. - sub_tree->traverse(this); - return accessChainLoad(sub_tree->getType()); - } else if (auto* const_union_array = &sn->getConstArray()){ - int nextConst = 0; - spv::Id id = createSpvConstantFromConstUnionArray(sn->getType(), *const_union_array, nextConst, true); - builder.addName(id, sn->getName().c_str()); - return id; - } - } - - // Neither a front-end constant node, nor a specialization constant node with constant union array or - // constant sub tree as initializer. - logger->missingFunctionality("Neither a front-end constant nor a spec constant."); - exit(1); - return spv::NoResult; -} - -// Use 'consts' as the flattened glslang source of scalar constants to recursively -// build the aggregate SPIR-V constant. -// -// If there are not enough elements present in 'consts', 0 will be substituted; -// an empty 'consts' can be used to create a fully zeroed SPIR-V constant. -// -spv::Id TGlslangToSpvTraverser::createSpvConstantFromConstUnionArray(const glslang::TType& glslangType, const glslang::TConstUnionArray& consts, int& nextConst, bool specConstant) -{ - // vector of constants for SPIR-V - std::vector spvConsts; - - // Type is used for struct and array constants - spv::Id typeId = convertGlslangToSpvType(glslangType); - - if (glslangType.isArray()) { - glslang::TType elementType(glslangType, 0); - for (int i = 0; i < glslangType.getOuterArraySize(); ++i) - spvConsts.push_back(createSpvConstantFromConstUnionArray(elementType, consts, nextConst, false)); - } else if (glslangType.isMatrix()) { - glslang::TType vectorType(glslangType, 0); - for (int col = 0; col < glslangType.getMatrixCols(); ++col) - spvConsts.push_back(createSpvConstantFromConstUnionArray(vectorType, consts, nextConst, false)); - } else if (glslangType.getStruct()) { - glslang::TVector::const_iterator iter; - for (iter = glslangType.getStruct()->begin(); iter != glslangType.getStruct()->end(); ++iter) - spvConsts.push_back(createSpvConstantFromConstUnionArray(*iter->type, consts, nextConst, false)); - } else if (glslangType.getVectorSize() > 1) { - for (unsigned int i = 0; i < (unsigned int)glslangType.getVectorSize(); ++i) { - bool zero = nextConst >= consts.size(); - switch (glslangType.getBasicType()) { - case glslang::EbtInt: - spvConsts.push_back(builder.makeIntConstant(zero ? 0 : consts[nextConst].getIConst())); - break; - case glslang::EbtUint: - spvConsts.push_back(builder.makeUintConstant(zero ? 0 : consts[nextConst].getUConst())); - break; - case glslang::EbtInt64: - spvConsts.push_back(builder.makeInt64Constant(zero ? 0 : consts[nextConst].getI64Const())); - break; - case glslang::EbtUint64: - spvConsts.push_back(builder.makeUint64Constant(zero ? 0 : consts[nextConst].getU64Const())); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtInt16: - spvConsts.push_back(builder.makeInt16Constant(zero ? 0 : (short)consts[nextConst].getIConst())); - break; - case glslang::EbtUint16: - spvConsts.push_back(builder.makeUint16Constant(zero ? 0 : (unsigned short)consts[nextConst].getUConst())); - break; -#endif - case glslang::EbtFloat: - spvConsts.push_back(builder.makeFloatConstant(zero ? 0.0F : (float)consts[nextConst].getDConst())); - break; - case glslang::EbtDouble: - spvConsts.push_back(builder.makeDoubleConstant(zero ? 0.0 : consts[nextConst].getDConst())); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtFloat16: - spvConsts.push_back(builder.makeFloat16Constant(zero ? 0.0F : (float)consts[nextConst].getDConst())); - break; -#endif - case glslang::EbtBool: - spvConsts.push_back(builder.makeBoolConstant(zero ? false : consts[nextConst].getBConst())); - break; - default: - assert(0); - break; - } - ++nextConst; - } - } else { - // we have a non-aggregate (scalar) constant - bool zero = nextConst >= consts.size(); - spv::Id scalar = 0; - switch (glslangType.getBasicType()) { - case glslang::EbtInt: - scalar = builder.makeIntConstant(zero ? 0 : consts[nextConst].getIConst(), specConstant); - break; - case glslang::EbtUint: - scalar = builder.makeUintConstant(zero ? 0 : consts[nextConst].getUConst(), specConstant); - break; - case glslang::EbtInt64: - scalar = builder.makeInt64Constant(zero ? 0 : consts[nextConst].getI64Const(), specConstant); - break; - case glslang::EbtUint64: - scalar = builder.makeUint64Constant(zero ? 0 : consts[nextConst].getU64Const(), specConstant); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtInt16: - scalar = builder.makeInt16Constant(zero ? 0 : (short)consts[nextConst].getIConst(), specConstant); - break; - case glslang::EbtUint16: - scalar = builder.makeUint16Constant(zero ? 0 : (unsigned short)consts[nextConst].getUConst(), specConstant); - break; -#endif - case glslang::EbtFloat: - scalar = builder.makeFloatConstant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant); - break; - case glslang::EbtDouble: - scalar = builder.makeDoubleConstant(zero ? 0.0 : consts[nextConst].getDConst(), specConstant); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtFloat16: - scalar = builder.makeFloat16Constant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant); - break; -#endif - case glslang::EbtBool: - scalar = builder.makeBoolConstant(zero ? false : consts[nextConst].getBConst(), specConstant); - break; - default: - assert(0); - break; - } - ++nextConst; - return scalar; - } - - return builder.makeCompositeConstant(typeId, spvConsts); -} - -// Return true if the node is a constant or symbol whose reading has no -// non-trivial observable cost or effect. -bool TGlslangToSpvTraverser::isTrivialLeaf(const glslang::TIntermTyped* node) -{ - // don't know what this is - if (node == nullptr) - return false; - - // a constant is safe - if (node->getAsConstantUnion() != nullptr) - return true; - - // not a symbol means non-trivial - if (node->getAsSymbolNode() == nullptr) - return false; - - // a symbol, depends on what's being read - switch (node->getType().getQualifier().storage) { - case glslang::EvqTemporary: - case glslang::EvqGlobal: - case glslang::EvqIn: - case glslang::EvqInOut: - case glslang::EvqConst: - case glslang::EvqConstReadOnly: - case glslang::EvqUniform: - return true; - default: - return false; - } -} - -// A node is trivial if it is a single operation with no side effects. -// HLSL (and/or vectors) are always trivial, as it does not short circuit. -// Otherwise, error on the side of saying non-trivial. -// Return true if trivial. -bool TGlslangToSpvTraverser::isTrivial(const glslang::TIntermTyped* node) -{ - if (node == nullptr) - return false; - - // count non scalars as trivial, as well as anything coming from HLSL - if (! node->getType().isScalarOrVec1() || glslangIntermediate->getSource() == glslang::EShSourceHlsl) - return true; - - // symbols and constants are trivial - if (isTrivialLeaf(node)) - return true; - - // otherwise, it needs to be a simple operation or one or two leaf nodes - - // not a simple operation - const glslang::TIntermBinary* binaryNode = node->getAsBinaryNode(); - const glslang::TIntermUnary* unaryNode = node->getAsUnaryNode(); - if (binaryNode == nullptr && unaryNode == nullptr) - return false; - - // not on leaf nodes - if (binaryNode && (! isTrivialLeaf(binaryNode->getLeft()) || ! isTrivialLeaf(binaryNode->getRight()))) - return false; - - if (unaryNode && ! isTrivialLeaf(unaryNode->getOperand())) { - return false; - } - - switch (node->getAsOperator()->getOp()) { - case glslang::EOpLogicalNot: - case glslang::EOpConvIntToBool: - case glslang::EOpConvUintToBool: - case glslang::EOpConvFloatToBool: - case glslang::EOpConvDoubleToBool: - case glslang::EOpEqual: - case glslang::EOpNotEqual: - case glslang::EOpLessThan: - case glslang::EOpGreaterThan: - case glslang::EOpLessThanEqual: - case glslang::EOpGreaterThanEqual: - case glslang::EOpIndexDirect: - case glslang::EOpIndexDirectStruct: - case glslang::EOpLogicalXor: - case glslang::EOpAny: - case glslang::EOpAll: - return true; - default: - return false; - } -} - -// Emit short-circuiting code, where 'right' is never evaluated unless -// the left side is true (for &&) or false (for ||). -spv::Id TGlslangToSpvTraverser::createShortCircuit(glslang::TOperator op, glslang::TIntermTyped& left, glslang::TIntermTyped& right) -{ - spv::Id boolTypeId = builder.makeBoolType(); - - // emit left operand - builder.clearAccessChain(); - left.traverse(this); - spv::Id leftId = accessChainLoad(left.getType()); - - // Operands to accumulate OpPhi operands - std::vector phiOperands; - // accumulate left operand's phi information - phiOperands.push_back(leftId); - phiOperands.push_back(builder.getBuildPoint()->getId()); - - // Make the two kinds of operation symmetric with a "!" - // || => emit "if (! left) result = right" - // && => emit "if ( left) result = right" - // - // TODO: this runtime "not" for || could be avoided by adding functionality - // to 'builder' to have an "else" without an "then" - if (op == glslang::EOpLogicalOr) - leftId = builder.createUnaryOp(spv::OpLogicalNot, boolTypeId, leftId); - - // make an "if" based on the left value - spv::Builder::If ifBuilder(leftId, spv::SelectionControlMaskNone, builder); - - // emit right operand as the "then" part of the "if" - builder.clearAccessChain(); - right.traverse(this); - spv::Id rightId = accessChainLoad(right.getType()); - - // accumulate left operand's phi information - phiOperands.push_back(rightId); - phiOperands.push_back(builder.getBuildPoint()->getId()); - - // finish the "if" - ifBuilder.makeEndIf(); - - // phi together the two results - return builder.createOp(spv::OpPhi, boolTypeId, phiOperands); -} - -#ifdef AMD_EXTENSIONS -// Return type Id of the imported set of extended instructions corresponds to the name. -// Import this set if it has not been imported yet. -spv::Id TGlslangToSpvTraverser::getExtBuiltins(const char* name) -{ - if (extBuiltinMap.find(name) != extBuiltinMap.end()) - return extBuiltinMap[name]; - else { - builder.addExtension(name); - spv::Id extBuiltins = builder.import(name); - extBuiltinMap[name] = extBuiltins; - return extBuiltins; - } -} -#endif - -}; // end anonymous namespace - -namespace glslang { - -void GetSpirvVersion(std::string& version) -{ - const int bufSize = 100; - char buf[bufSize]; - snprintf(buf, bufSize, "0x%08x, Revision %d", spv::Version, spv::Revision); - version = buf; -} - -// For low-order part of the generator's magic number. Bump up -// when there is a change in the style (e.g., if SSA form changes, -// or a different instruction sequence to do something gets used). -int GetSpirvGeneratorVersion() -{ - // return 1; // start - // return 2; // EOpAtomicCounterDecrement gets a post decrement, to map between GLSL -> SPIR-V - // return 3; // change/correct barrier-instruction operands, to match memory model group decisions - return 4; // some deeper access chains: for dynamic vector component, and local Boolean component -} - -// Write SPIR-V out to a binary file -void OutputSpvBin(const std::vector& spirv, const char* baseName) -{ - std::ofstream out; - out.open(baseName, std::ios::binary | std::ios::out); - if (out.fail()) - printf("ERROR: Failed to open file: %s\n", baseName); - for (int i = 0; i < (int)spirv.size(); ++i) { - unsigned int word = spirv[i]; - out.write((const char*)&word, 4); - } - out.close(); -} - -// Write SPIR-V out to a text file with 32-bit hexadecimal words -void OutputSpvHex(const std::vector& spirv, const char* baseName, const char* varName) -{ - std::ofstream out; - out.open(baseName, std::ios::binary | std::ios::out); - if (out.fail()) - printf("ERROR: Failed to open file: %s\n", baseName); - out << "\t// " GLSLANG_REVISION " " GLSLANG_DATE << std::endl; - if (varName != nullptr) { - out << "\t #pragma once" << std::endl; - out << "const uint32_t " << varName << "[] = {" << std::endl; - } - const int WORDS_PER_LINE = 8; - for (int i = 0; i < (int)spirv.size(); i += WORDS_PER_LINE) { - out << "\t"; - for (int j = 0; j < WORDS_PER_LINE && i + j < (int)spirv.size(); ++j) { - const unsigned int word = spirv[i + j]; - out << "0x" << std::hex << std::setw(8) << std::setfill('0') << word; - if (i + j + 1 < (int)spirv.size()) { - out << ","; - } - } - out << std::endl; - } - if (varName != nullptr) { - out << "};"; - } - out.close(); -} - -#ifdef ENABLE_OPT -void errHandler(const std::string& str) { - std::cerr << str << std::endl; -} -#endif - -// -// Set up the glslang traversal -// -void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, SpvOptions* options) -{ - spv::SpvBuildLogger logger; - GlslangToSpv(intermediate, spirv, &logger, options); -} - -void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, - spv::SpvBuildLogger* logger, SpvOptions* options) -{ - TIntermNode* root = intermediate.getTreeRoot(); - - if (root == 0) - return; - - glslang::SpvOptions defaultOptions; - if (options == nullptr) - options = &defaultOptions; - - glslang::GetThreadPoolAllocator().push(); - - TGlslangToSpvTraverser it(intermediate.getSpv().spv, &intermediate, logger, *options); - root->traverse(&it); - it.finishSpv(); - it.dumpSpv(spirv); - -#ifdef ENABLE_OPT - // If from HLSL, run spirv-opt to "legalize" the SPIR-V for Vulkan - // eg. forward and remove memory writes of opaque types. - if ((intermediate.getSource() == EShSourceHlsl || - options->optimizeSize) && - !options->disableOptimizer) { - spv_target_env target_env = SPV_ENV_UNIVERSAL_1_2; - - spvtools::Optimizer optimizer(target_env); - optimizer.SetMessageConsumer([](spv_message_level_t level, - const char* source, - const spv_position_t& position, - const char* message) { - std::cerr << StringifyMessage(level, source, position, message) - << std::endl; - }); - - optimizer.RegisterPass(CreateInlineExhaustivePass()); - optimizer.RegisterPass(CreateEliminateDeadFunctionsPass()); - optimizer.RegisterPass(CreateScalarReplacementPass()); - optimizer.RegisterPass(CreateLocalAccessChainConvertPass()); - optimizer.RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()); - optimizer.RegisterPass(CreateLocalSingleStoreElimPass()); - optimizer.RegisterPass(CreateInsertExtractElimPass()); - optimizer.RegisterPass(CreateDeadInsertElimPass()); - optimizer.RegisterPass(CreateAggressiveDCEPass()); - optimizer.RegisterPass(CreateDeadBranchElimPass()); - optimizer.RegisterPass(CreateCFGCleanupPass()); - optimizer.RegisterPass(CreateBlockMergePass()); - optimizer.RegisterPass(CreateLocalMultiStoreElimPass()); - optimizer.RegisterPass(CreateInsertExtractElimPass()); - optimizer.RegisterPass(CreateDeadInsertElimPass()); - if (options->optimizeSize) { - optimizer.RegisterPass(CreateRedundancyEliminationPass()); - // TODO(greg-lunarg): Add this when AMD driver issues are resolved - // optimizer.RegisterPass(CreateCommonUniformElimPass()); - } - optimizer.RegisterPass(CreateAggressiveDCEPass()); - - if (!optimizer.Run(spirv.data(), spirv.size(), &spirv)) - return; - - // Remove dead module-level objects: functions, types, vars - // TODO(greg-lunarg): Switch to spirv-opt versions when available - spv::spirvbin_t Remapper(0); - Remapper.registerErrorHandler(errHandler); - Remapper.remap(spirv, spv::spirvbin_t::DCE_ALL); - } -#endif - - glslang::GetThreadPoolAllocator().pop(); -} - -}; // end namespace glslang diff --git a/third_party/glslang-spirv/GlslangToSpv.h b/third_party/glslang-spirv/GlslangToSpv.h deleted file mode 100644 index 3a4371375..000000000 --- a/third_party/glslang-spirv/GlslangToSpv.h +++ /dev/null @@ -1,67 +0,0 @@ -// -// Copyright (C) 2014 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#pragma once - -#if _MSC_VER >= 1900 - #pragma warning(disable : 4464) // relative include path contains '..' -#endif - -#include "../glslang/Include/intermediate.h" - -#include -#include - -#include "Logger.h" - -namespace glslang { - -struct SpvOptions { - SpvOptions() : generateDebugInfo(false), disableOptimizer(true), - optimizeSize(false) { } - bool generateDebugInfo; - bool disableOptimizer; - bool optimizeSize; -}; - -void GetSpirvVersion(std::string&); -int GetSpirvGeneratorVersion(); -void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, - SpvOptions* options = nullptr); -void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, - spv::SpvBuildLogger* logger, SpvOptions* options = nullptr); -void OutputSpvBin(const std::vector& spirv, const char* baseName); -void OutputSpvHex(const std::vector& spirv, const char* baseName, const char* varName); - -} diff --git a/third_party/glslang-spirv/InReadableOrder.cpp b/third_party/glslang-spirv/InReadableOrder.cpp deleted file mode 100644 index 52b29613a..000000000 --- a/third_party/glslang-spirv/InReadableOrder.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// -// Copyright (C) 2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// The SPIR-V spec requires code blocks to appear in an order satisfying the -// dominator-tree direction (ie, dominator before the dominated). This is, -// actually, easy to achieve: any pre-order CFG traversal algorithm will do it. -// Because such algorithms visit a block only after traversing some path to it -// from the root, they necessarily visit the block's idom first. -// -// But not every graph-traversal algorithm outputs blocks in an order that -// appears logical to human readers. The problem is that unrelated branches may -// be interspersed with each other, and merge blocks may come before some of the -// branches being merged. -// -// A good, human-readable order of blocks may be achieved by performing -// depth-first search but delaying merge nodes until after all their branches -// have been visited. This is implemented below by the inReadableOrder() -// function. - -#include "spvIR.h" - -#include -#include - -using spv::Block; -using spv::Id; - -namespace { -// Traverses CFG in a readable order, invoking a pre-set callback on each block. -// Use by calling visit() on the root block. -class ReadableOrderTraverser { -public: - explicit ReadableOrderTraverser(std::function callback) : callback_(callback) {} - // Visits the block if it hasn't been visited already and isn't currently - // being delayed. Invokes callback(block), then descends into its - // successors. Delays merge-block and continue-block processing until all - // the branches have been completed. - void visit(Block* block) - { - assert(block); - if (visited_.count(block) || delayed_.count(block)) - return; - callback_(block); - visited_.insert(block); - Block* mergeBlock = nullptr; - Block* continueBlock = nullptr; - auto mergeInst = block->getMergeInstruction(); - if (mergeInst) { - Id mergeId = mergeInst->getIdOperand(0); - mergeBlock = block->getParent().getParent().getInstruction(mergeId)->getBlock(); - delayed_.insert(mergeBlock); - if (mergeInst->getOpCode() == spv::OpLoopMerge) { - Id continueId = mergeInst->getIdOperand(1); - continueBlock = - block->getParent().getParent().getInstruction(continueId)->getBlock(); - delayed_.insert(continueBlock); - } - } - const auto successors = block->getSuccessors(); - for (auto it = successors.cbegin(); it != successors.cend(); ++it) - visit(*it); - if (continueBlock) { - delayed_.erase(continueBlock); - visit(continueBlock); - } - if (mergeBlock) { - delayed_.erase(mergeBlock); - visit(mergeBlock); - } - } - -private: - std::function callback_; - // Whether a block has already been visited or is being delayed. - std::unordered_set visited_, delayed_; -}; -} - -void spv::inReadableOrder(Block* root, std::function callback) -{ - ReadableOrderTraverser(callback).visit(root); -} diff --git a/third_party/glslang-spirv/Include/BaseTypes.h b/third_party/glslang-spirv/Include/BaseTypes.h deleted file mode 100644 index 050c2c4b9..000000000 --- a/third_party/glslang-spirv/Include/BaseTypes.h +++ /dev/null @@ -1,387 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _BASICTYPES_INCLUDED_ -#define _BASICTYPES_INCLUDED_ - -namespace glslang { - -// -// Basic type. Arrays, vectors, sampler details, etc., are orthogonal to this. -// -enum TBasicType { - EbtVoid, - EbtFloat, - EbtDouble, -#ifdef AMD_EXTENSIONS - EbtFloat16, -#endif - EbtInt, - EbtUint, - EbtInt64, - EbtUint64, -#ifdef AMD_EXTENSIONS - EbtInt16, - EbtUint16, -#endif - EbtBool, - EbtAtomicUint, - EbtSampler, - EbtStruct, - EbtBlock, - - // HLSL types that live only temporarily. - EbtString, - - EbtNumTypes -}; - -// -// Storage qualifiers. Should align with different kinds of storage or -// resource or GLSL storage qualifier. Expansion is deprecated. -// -// N.B.: You probably DON'T want to add anything here, but rather just add it -// to the built-in variables. See the comment above TBuiltInVariable. -// -// A new built-in variable will normally be an existing qualifier, like 'in', 'out', etc. -// DO NOT follow the design pattern of, say EvqInstanceId, etc. -// -enum TStorageQualifier { - EvqTemporary, // For temporaries (within a function), read/write - EvqGlobal, // For globals read/write - EvqConst, // User-defined constant values, will be semantically constant and constant folded - EvqVaryingIn, // pipeline input, read only, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) - EvqVaryingOut, // pipeline output, read/write, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) - EvqUniform, // read only, shared with app - EvqBuffer, // read/write, shared with app - EvqShared, // compute shader's read/write 'shared' qualifier - - // parameters - EvqIn, // also, for 'in' in the grammar before we know if it's a pipeline input or an 'in' parameter - EvqOut, // also, for 'out' in the grammar before we know if it's a pipeline output or an 'out' parameter - EvqInOut, - EvqConstReadOnly, // input; also other read-only types having neither a constant value nor constant-value semantics - - // built-ins read by vertex shader - EvqVertexId, - EvqInstanceId, - - // built-ins written by vertex shader - EvqPosition, - EvqPointSize, - EvqClipVertex, - - // built-ins read by fragment shader - EvqFace, - EvqFragCoord, - EvqPointCoord, - - // built-ins written by fragment shader - EvqFragColor, - EvqFragDepth, - - // end of list - EvqLast -}; - -// -// Subcategories of the TStorageQualifier, simply to give a direct mapping -// between built-in variable names and an numerical value (the enum). -// -// For backward compatibility, there is some redundancy between the -// TStorageQualifier and these. Existing members should both be maintained accurately. -// However, any new built-in variable (and any existing non-redundant one) -// must follow the pattern that the specific built-in is here, and only its -// general qualifier is in TStorageQualifier. -// -// Something like gl_Position, which is sometimes 'in' and sometimes 'out' -// shows up as two different built-in variables in a single stage, but -// only has a single enum in TBuiltInVariable, so both the -// TStorageQualifier and the TBuitinVariable are needed to distinguish -// between them. -// -enum TBuiltInVariable { - EbvNone, - EbvNumWorkGroups, - EbvWorkGroupSize, - EbvWorkGroupId, - EbvLocalInvocationId, - EbvGlobalInvocationId, - EbvLocalInvocationIndex, - EbvSubGroupSize, - EbvSubGroupInvocation, - EbvSubGroupEqMask, - EbvSubGroupGeMask, - EbvSubGroupGtMask, - EbvSubGroupLeMask, - EbvSubGroupLtMask, - EbvVertexId, - EbvInstanceId, - EbvVertexIndex, - EbvInstanceIndex, - EbvBaseVertex, - EbvBaseInstance, - EbvDrawId, - EbvPosition, - EbvPointSize, - EbvClipVertex, - EbvClipDistance, - EbvCullDistance, - EbvNormal, - EbvVertex, - EbvMultiTexCoord0, - EbvMultiTexCoord1, - EbvMultiTexCoord2, - EbvMultiTexCoord3, - EbvMultiTexCoord4, - EbvMultiTexCoord5, - EbvMultiTexCoord6, - EbvMultiTexCoord7, - EbvFrontColor, - EbvBackColor, - EbvFrontSecondaryColor, - EbvBackSecondaryColor, - EbvTexCoord, - EbvFogFragCoord, - EbvInvocationId, - EbvPrimitiveId, - EbvLayer, - EbvViewportIndex, - EbvPatchVertices, - EbvTessLevelOuter, - EbvTessLevelInner, - EbvBoundingBox, - EbvTessCoord, - EbvColor, - EbvSecondaryColor, - EbvFace, - EbvFragCoord, - EbvPointCoord, - EbvFragColor, - EbvFragData, - EbvFragDepth, - EbvFragStencilRef, - EbvSampleId, - EbvSamplePosition, - EbvSampleMask, - EbvHelperInvocation, - -#ifdef AMD_EXTENSIONS - EbvBaryCoordNoPersp, - EbvBaryCoordNoPerspCentroid, - EbvBaryCoordNoPerspSample, - EbvBaryCoordSmooth, - EbvBaryCoordSmoothCentroid, - EbvBaryCoordSmoothSample, - EbvBaryCoordPullModel, -#endif - - EbvViewIndex, - EbvDeviceIndex, - -#ifdef NV_EXTENSIONS - EbvViewportMaskNV, - EbvSecondaryPositionNV, - EbvSecondaryViewportMaskNV, - EbvPositionPerViewNV, - EbvViewportMaskPerViewNV, - EbvFragFullyCoveredNV, -#endif - - // HLSL built-ins that live only temporarily, until they get remapped - // to one of the above. - EbvFragDepthGreater, - EbvFragDepthLesser, - EbvGsOutputStream, - EbvOutputPatch, - EbvInputPatch, - - // structbuffer types - EbvAppendConsume, // no need to differentiate append and consume - EbvRWStructuredBuffer, - EbvStructuredBuffer, - EbvByteAddressBuffer, - EbvRWByteAddressBuffer, - - EbvLast -}; - -// These will show up in error messages -__inline const char* GetStorageQualifierString(TStorageQualifier q) -{ - switch (q) { - case EvqTemporary: return "temp"; break; - case EvqGlobal: return "global"; break; - case EvqConst: return "const"; break; - case EvqConstReadOnly: return "const (read only)"; break; - case EvqVaryingIn: return "in"; break; - case EvqVaryingOut: return "out"; break; - case EvqUniform: return "uniform"; break; - case EvqBuffer: return "buffer"; break; - case EvqShared: return "shared"; break; - case EvqIn: return "in"; break; - case EvqOut: return "out"; break; - case EvqInOut: return "inout"; break; - case EvqVertexId: return "gl_VertexId"; break; - case EvqInstanceId: return "gl_InstanceId"; break; - case EvqPosition: return "gl_Position"; break; - case EvqPointSize: return "gl_PointSize"; break; - case EvqClipVertex: return "gl_ClipVertex"; break; - case EvqFace: return "gl_FrontFacing"; break; - case EvqFragCoord: return "gl_FragCoord"; break; - case EvqPointCoord: return "gl_PointCoord"; break; - case EvqFragColor: return "fragColor"; break; - case EvqFragDepth: return "gl_FragDepth"; break; - default: return "unknown qualifier"; - } -} - -__inline const char* GetBuiltInVariableString(TBuiltInVariable v) -{ - switch (v) { - case EbvNone: return ""; - case EbvNumWorkGroups: return "NumWorkGroups"; - case EbvWorkGroupSize: return "WorkGroupSize"; - case EbvWorkGroupId: return "WorkGroupID"; - case EbvLocalInvocationId: return "LocalInvocationID"; - case EbvGlobalInvocationId: return "GlobalInvocationID"; - case EbvLocalInvocationIndex: return "LocalInvocationIndex"; - case EbvSubGroupSize: return "SubGroupSize"; - case EbvSubGroupInvocation: return "SubGroupInvocation"; - case EbvSubGroupEqMask: return "SubGroupEqMask"; - case EbvSubGroupGeMask: return "SubGroupGeMask"; - case EbvSubGroupGtMask: return "SubGroupGtMask"; - case EbvSubGroupLeMask: return "SubGroupLeMask"; - case EbvSubGroupLtMask: return "SubGroupLtMask"; - case EbvVertexId: return "VertexId"; - case EbvInstanceId: return "InstanceId"; - case EbvVertexIndex: return "VertexIndex"; - case EbvInstanceIndex: return "InstanceIndex"; - case EbvBaseVertex: return "BaseVertex"; - case EbvBaseInstance: return "BaseInstance"; - case EbvDrawId: return "DrawId"; - case EbvPosition: return "Position"; - case EbvPointSize: return "PointSize"; - case EbvClipVertex: return "ClipVertex"; - case EbvClipDistance: return "ClipDistance"; - case EbvCullDistance: return "CullDistance"; - case EbvNormal: return "Normal"; - case EbvVertex: return "Vertex"; - case EbvMultiTexCoord0: return "MultiTexCoord0"; - case EbvMultiTexCoord1: return "MultiTexCoord1"; - case EbvMultiTexCoord2: return "MultiTexCoord2"; - case EbvMultiTexCoord3: return "MultiTexCoord3"; - case EbvMultiTexCoord4: return "MultiTexCoord4"; - case EbvMultiTexCoord5: return "MultiTexCoord5"; - case EbvMultiTexCoord6: return "MultiTexCoord6"; - case EbvMultiTexCoord7: return "MultiTexCoord7"; - case EbvFrontColor: return "FrontColor"; - case EbvBackColor: return "BackColor"; - case EbvFrontSecondaryColor: return "FrontSecondaryColor"; - case EbvBackSecondaryColor: return "BackSecondaryColor"; - case EbvTexCoord: return "TexCoord"; - case EbvFogFragCoord: return "FogFragCoord"; - case EbvInvocationId: return "InvocationID"; - case EbvPrimitiveId: return "PrimitiveID"; - case EbvLayer: return "Layer"; - case EbvViewportIndex: return "ViewportIndex"; - case EbvPatchVertices: return "PatchVertices"; - case EbvTessLevelOuter: return "TessLevelOuter"; - case EbvTessLevelInner: return "TessLevelInner"; - case EbvBoundingBox: return "BoundingBox"; - case EbvTessCoord: return "TessCoord"; - case EbvColor: return "Color"; - case EbvSecondaryColor: return "SecondaryColor"; - case EbvFace: return "Face"; - case EbvFragCoord: return "FragCoord"; - case EbvPointCoord: return "PointCoord"; - case EbvFragColor: return "FragColor"; - case EbvFragData: return "FragData"; - case EbvFragDepth: return "FragDepth"; - case EbvFragStencilRef: return "FragStencilRef"; - case EbvSampleId: return "SampleId"; - case EbvSamplePosition: return "SamplePosition"; - case EbvSampleMask: return "SampleMaskIn"; - case EbvHelperInvocation: return "HelperInvocation"; - -#ifdef AMD_EXTENSIONS - case EbvBaryCoordNoPersp: return "BaryCoordNoPersp"; - case EbvBaryCoordNoPerspCentroid: return "BaryCoordNoPerspCentroid"; - case EbvBaryCoordNoPerspSample: return "BaryCoordNoPerspSample"; - case EbvBaryCoordSmooth: return "BaryCoordSmooth"; - case EbvBaryCoordSmoothCentroid: return "BaryCoordSmoothCentroid"; - case EbvBaryCoordSmoothSample: return "BaryCoordSmoothSample"; - case EbvBaryCoordPullModel: return "BaryCoordPullModel"; -#endif - - case EbvViewIndex: return "ViewIndex"; - case EbvDeviceIndex: return "DeviceIndex"; - -#ifdef NV_EXTENSIONS - case EbvViewportMaskNV: return "ViewportMaskNV"; - case EbvSecondaryPositionNV: return "SecondaryPositionNV"; - case EbvSecondaryViewportMaskNV: return "SecondaryViewportMaskNV"; - case EbvPositionPerViewNV: return "PositionPerViewNV"; - case EbvViewportMaskPerViewNV: return "ViewportMaskPerViewNV"; - case EbvFragFullyCoveredNV: return "FragFullyCoveredNV"; -#endif - default: return "unknown built-in variable"; - } -} - -// In this enum, order matters; users can assume higher precision is a bigger value -// and EpqNone is 0. -enum TPrecisionQualifier { - EpqNone = 0, - EpqLow, - EpqMedium, - EpqHigh -}; - -__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) -{ - switch(p) { - case EpqNone: return ""; break; - case EpqLow: return "lowp"; break; - case EpqMedium: return "mediump"; break; - case EpqHigh: return "highp"; break; - default: return "unknown precision qualifier"; - } -} - -} // end namespace glslang - -#endif // _BASICTYPES_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/Common.h b/third_party/glslang-spirv/Include/Common.h deleted file mode 100644 index 041701774..000000000 --- a/third_party/glslang-spirv/Include/Common.h +++ /dev/null @@ -1,274 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _COMMON_INCLUDED_ -#define _COMMON_INCLUDED_ - -#if (defined(_MSC_VER) && _MSC_VER < 1900 /*vs2015*/) || defined MINGW_HAS_SECURE_API - #include - #define snprintf sprintf_s - #define safe_vsprintf(buf,max,format,args) vsnprintf_s((buf), (max), (max), (format), (args)) -#elif defined (solaris) - #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) - #include - #define UINT_PTR uintptr_t -#else - #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) - #include - #define UINT_PTR uintptr_t -#endif - -#if defined(__ANDROID__) || _MSC_VER < 1700 -#include -namespace std { -template -std::string to_string(const T& val) { - std::ostringstream os; - os << val; - return os.str(); -} -} -#endif - -#if defined(_MSC_VER) && _MSC_VER < 1800 -inline long long int strtoll (const char* str, char** endptr, int base) -{ - return _strtoi64(str, endptr, base); -} -inline unsigned long long int strtoull (const char* str, char** endptr, int base) -{ - return _strtoui64(str, endptr, base); -} -inline long long int atoll (const char* str) -{ - return strtoll(str, NULL, 10); -} -#endif - -#if defined(_MSC_VER) -#define strdup _strdup -#endif - -/* windows only pragma */ -#ifdef _MSC_VER - #pragma warning(disable : 4786) // Don't warn about too long identifiers - #pragma warning(disable : 4514) // unused inline method - #pragma warning(disable : 4201) // nameless union -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "PoolAlloc.h" - -// -// Put POOL_ALLOCATOR_NEW_DELETE in base classes to make them use this scheme. -// -#define POOL_ALLOCATOR_NEW_DELETE(A) \ - void* operator new(size_t s) { return (A).allocate(s); } \ - void* operator new(size_t, void *_Where) { return (_Where); } \ - void operator delete(void*) { } \ - void operator delete(void *, void *) { } \ - void* operator new[](size_t s) { return (A).allocate(s); } \ - void* operator new[](size_t, void *_Where) { return (_Where); } \ - void operator delete[](void*) { } \ - void operator delete[](void *, void *) { } - -namespace glslang { - - // - // Pool version of string. - // - typedef pool_allocator TStringAllocator; - typedef std::basic_string , TStringAllocator> TString; - -} // end namespace glslang - -// Repackage the std::hash for use by unordered map/set with a TString key. -namespace std { - - template<> struct hash { - std::size_t operator()(const glslang::TString& s) const - { - const unsigned _FNV_offset_basis = 2166136261U; - const unsigned _FNV_prime = 16777619U; - unsigned _Val = _FNV_offset_basis; - size_t _Count = s.size(); - const char* _First = s.c_str(); - for (size_t _Next = 0; _Next < _Count; ++_Next) - { - _Val ^= (unsigned)_First[_Next]; - _Val *= _FNV_prime; - } - - return _Val; - } - }; -} - -namespace glslang { - -inline TString* NewPoolTString(const char* s) -{ - void* memory = GetThreadPoolAllocator().allocate(sizeof(TString)); - return new(memory) TString(s); -} - -template inline T* NewPoolObject(T*) -{ - return new(GetThreadPoolAllocator().allocate(sizeof(T))) T; -} - -template inline T* NewPoolObject(T, int instances) -{ - return new(GetThreadPoolAllocator().allocate(instances * sizeof(T))) T[instances]; -} - -// -// Pool allocator versions of vectors, lists, and maps -// -template class TVector : public std::vector > { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - typedef typename std::vector >::size_type size_type; - TVector() : std::vector >() {} - TVector(const pool_allocator& a) : std::vector >(a) {} - TVector(size_type i) : std::vector >(i) {} - TVector(size_type i, const T& val) : std::vector >(i, val) {} -}; - -template class TList : public std::list > { -}; - -template > -class TMap : public std::map > > { -}; - -template , class PRED = std::equal_to > -class TUnorderedMap : public std::unordered_map > > { -}; - -// -// Persistent string memory. Should only be used for strings that survive -// across compiles/links. -// -typedef std::basic_string TPersistString; - -// -// templatized min and max functions. -// -template T Min(const T a, const T b) { return a < b ? a : b; } -template T Max(const T a, const T b) { return a > b ? a : b; } - -// -// Create a TString object from an integer. -// -#if defined _MSC_VER || defined MINGW_HAS_SECURE_API -inline const TString String(const int i, const int base = 10) -{ - char text[16]; // 32 bit ints are at most 10 digits in base 10 - _itoa_s(i, text, sizeof(text), base); - return text; -} -#else -inline const TString String(const int i, const int /*base*/ = 10) -{ - char text[16]; // 32 bit ints are at most 10 digits in base 10 - - // we assume base 10 for all cases - snprintf(text, sizeof(text), "%d", i); - - return text; -} -#endif - -struct TSourceLoc { - void init() { name = nullptr; string = 0; line = 0; column = 0; } - void init(int stringNum) { init(); string = stringNum; } - // Returns the name if it exists. Otherwise, returns the string number. - std::string getStringNameOrNum(bool quoteStringName = true) const - { - if (name != nullptr) - return quoteStringName ? ("\"" + std::string(name) + "\"") : name; - return std::to_string((long long)string); - } - const char* name; // descriptive name for this string - int string; - int line; - int column; -}; - -class TPragmaTable : public TMap { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) -}; - -const int MaxTokenLength = 1024; - -template bool IsPow2(T powerOf2) -{ - if (powerOf2 <= 0) - return false; - - return (powerOf2 & (powerOf2 - 1)) == 0; -} - -// Round number up to a multiple of the given powerOf2, which is not -// a power, just a number that must be a power of 2. -template void RoundToPow2(T& number, int powerOf2) -{ - assert(IsPow2(powerOf2)); - number = (number + powerOf2 - 1) & ~(powerOf2 - 1); -} - -template bool IsMultipleOfPow2(T number, int powerOf2) -{ - assert(IsPow2(powerOf2)); - return ! (number & (powerOf2 - 1)); -} - -} // end namespace glslang - -#endif // _COMMON_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/ConstantUnion.h b/third_party/glslang-spirv/Include/ConstantUnion.h deleted file mode 100644 index 58c6094ea..000000000 --- a/third_party/glslang-spirv/Include/ConstantUnion.h +++ /dev/null @@ -1,625 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _CONSTANT_UNION_INCLUDED_ -#define _CONSTANT_UNION_INCLUDED_ - -#include "../Include/Common.h" -#include "../Include/BaseTypes.h" - -namespace glslang { - -class TConstUnion { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - TConstUnion() : iConst(0), type(EbtInt) { } - - void setIConst(int i) - { - iConst = i; - type = EbtInt; - } - - void setUConst(unsigned int u) - { - uConst = u; - type = EbtUint; - } - - void setI64Const(long long i64) - { - i64Const = i64; - type = EbtInt64; - } - - void setU64Const(unsigned long long u64) - { - u64Const = u64; - type = EbtUint64; - } - - void setDConst(double d) - { - dConst = d; - type = EbtDouble; - } - - void setBConst(bool b) - { - bConst = b; - type = EbtBool; - } - - void setSConst(const TString* s) - { - sConst = s; - type = EbtString; - } - - int getIConst() const { return iConst; } - unsigned int getUConst() const { return uConst; } - long long getI64Const() const { return i64Const; } - unsigned long long getU64Const() const { return u64Const; } - double getDConst() const { return dConst; } - bool getBConst() const { return bConst; } - const TString* getSConst() const { return sConst; } - - bool operator==(const int i) const - { - if (i == iConst) - return true; - - return false; - } - - bool operator==(const unsigned int u) const - { - if (u == uConst) - return true; - - return false; - } - - bool operator==(const long long i64) const - { - if (i64 == i64Const) - return true; - - return false; - } - - bool operator==(const unsigned long long u64) const - { - if (u64 == u64Const) - return true; - - return false; - } - - bool operator==(const double d) const - { - if (d == dConst) - return true; - - return false; - } - - bool operator==(const bool b) const - { - if (b == bConst) - return true; - - return false; - } - - bool operator==(const TConstUnion& constant) const - { - if (constant.type != type) - return false; - - switch (type) { - case EbtInt: - if (constant.iConst == iConst) - return true; - - break; - case EbtUint: - if (constant.uConst == uConst) - return true; - - break; - case EbtInt64: - if (constant.i64Const == i64Const) - return true; - - break; - case EbtUint64: - if (constant.u64Const == u64Const) - return true; - - break; - case EbtDouble: - if (constant.dConst == dConst) - return true; - - break; - case EbtBool: - if (constant.bConst == bConst) - return true; - - break; - default: - assert(false && "Default missing"); - } - - return false; - } - - bool operator!=(const int i) const - { - return !operator==(i); - } - - bool operator!=(const unsigned int u) const - { - return !operator==(u); - } - - bool operator!=(const long long i) const - { - return !operator==(i); - } - - bool operator!=(const unsigned long long u) const - { - return !operator==(u); - } - - bool operator!=(const float f) const - { - return !operator==(f); - } - - bool operator!=(const bool b) const - { - return !operator==(b); - } - - bool operator!=(const TConstUnion& constant) const - { - return !operator==(constant); - } - - bool operator>(const TConstUnion& constant) const - { - assert(type == constant.type); - switch (type) { - case EbtInt: - if (iConst > constant.iConst) - return true; - - return false; - case EbtUint: - if (uConst > constant.uConst) - return true; - - return false; - case EbtInt64: - if (i64Const > constant.i64Const) - return true; - - return false; - case EbtUint64: - if (u64Const > constant.u64Const) - return true; - - return false; - case EbtDouble: - if (dConst > constant.dConst) - return true; - - return false; - default: - assert(false && "Default missing"); - return false; - } - } - - bool operator<(const TConstUnion& constant) const - { - assert(type == constant.type); - switch (type) { - case EbtInt: - if (iConst < constant.iConst) - return true; - - return false; - case EbtUint: - if (uConst < constant.uConst) - return true; - - return false; - case EbtInt64: - if (i64Const < constant.i64Const) - return true; - - return false; - case EbtUint64: - if (u64Const < constant.u64Const) - return true; - - return false; - case EbtDouble: - if (dConst < constant.dConst) - return true; - - return false; - default: - assert(false && "Default missing"); - return false; - } - } - - TConstUnion operator+(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst + constant.iConst); break; - case EbtInt64: returnValue.setI64Const(i64Const + constant.i64Const); break; - case EbtUint: returnValue.setUConst(uConst + constant.uConst); break; - case EbtUint64: returnValue.setU64Const(u64Const + constant.u64Const); break; - case EbtDouble: returnValue.setDConst(dConst + constant.dConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator-(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst - constant.iConst); break; - case EbtInt64: returnValue.setI64Const(i64Const - constant.i64Const); break; - case EbtUint: returnValue.setUConst(uConst - constant.uConst); break; - case EbtUint64: returnValue.setU64Const(u64Const - constant.u64Const); break; - case EbtDouble: returnValue.setDConst(dConst - constant.dConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator*(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst * constant.iConst); break; - case EbtInt64: returnValue.setI64Const(i64Const * constant.i64Const); break; - case EbtUint: returnValue.setUConst(uConst * constant.uConst); break; - case EbtUint64: returnValue.setU64Const(u64Const * constant.u64Const); break; - case EbtDouble: returnValue.setDConst(dConst * constant.dConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator%(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst % constant.iConst); break; - case EbtInt64: returnValue.setI64Const(i64Const % constant.i64Const); break; - case EbtUint: returnValue.setUConst(uConst % constant.uConst); break; - case EbtUint64: returnValue.setU64Const(u64Const % constant.u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator>>(const TConstUnion& constant) const - { - TConstUnion returnValue; - switch (type) { - case EbtInt: - switch (constant.type) { - case EbtInt: returnValue.setIConst(iConst >> constant.iConst); break; - case EbtUint: returnValue.setIConst(iConst >> constant.uConst); break; - case EbtInt64: returnValue.setIConst(iConst >> constant.i64Const); break; - case EbtUint64: returnValue.setIConst(iConst >> constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtUint: - switch (constant.type) { - case EbtInt: returnValue.setUConst(uConst >> constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst >> constant.uConst); break; - case EbtInt64: returnValue.setUConst(uConst >> constant.i64Const); break; - case EbtUint64: returnValue.setUConst(uConst >> constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtInt64: - switch (constant.type) { - case EbtInt: returnValue.setI64Const(i64Const >> constant.iConst); break; - case EbtUint: returnValue.setI64Const(i64Const >> constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const >> constant.i64Const); break; - case EbtUint64: returnValue.setI64Const(i64Const >> constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtUint64: - switch (constant.type) { - case EbtInt: returnValue.setU64Const(u64Const >> constant.iConst); break; - case EbtUint: returnValue.setU64Const(u64Const >> constant.uConst); break; - case EbtInt64: returnValue.setU64Const(u64Const >> constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const >> constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator<<(const TConstUnion& constant) const - { - TConstUnion returnValue; - switch (type) { - case EbtInt: - switch (constant.type) { - case EbtInt: returnValue.setIConst(iConst << constant.iConst); break; - case EbtUint: returnValue.setIConst(iConst << constant.uConst); break; - case EbtInt64: returnValue.setIConst(iConst << constant.i64Const); break; - case EbtUint64: returnValue.setIConst(iConst << constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtUint: - switch (constant.type) { - case EbtInt: returnValue.setUConst(uConst << constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst << constant.uConst); break; - case EbtInt64: returnValue.setUConst(uConst << constant.i64Const); break; - case EbtUint64: returnValue.setUConst(uConst << constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtInt64: - switch (constant.type) { - case EbtInt: returnValue.setI64Const(i64Const << constant.iConst); break; - case EbtUint: returnValue.setI64Const(i64Const << constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const << constant.i64Const); break; - case EbtUint64: returnValue.setI64Const(i64Const << constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtUint64: - switch (constant.type) { - case EbtInt: returnValue.setU64Const(u64Const << constant.iConst); break; - case EbtUint: returnValue.setU64Const(u64Const << constant.uConst); break; - case EbtInt64: returnValue.setU64Const(u64Const << constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const << constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator&(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst & constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst & constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const & constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const & constant.u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator|(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst | constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst | constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const | constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const | constant.u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator^(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst ^ constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst ^ constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const ^ constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const ^ constant.u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator~() const - { - TConstUnion returnValue; - switch (type) { - case EbtInt: returnValue.setIConst(~iConst); break; - case EbtUint: returnValue.setUConst(~uConst); break; - case EbtInt64: returnValue.setI64Const(~i64Const); break; - case EbtUint64: returnValue.setU64Const(~u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator&&(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtBool: returnValue.setBConst(bConst && constant.bConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator||(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtBool: returnValue.setBConst(bConst || constant.bConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TBasicType getType() const { return type; } - -private: - union { - int iConst; // used for ivec, scalar ints - unsigned int uConst; // used for uvec, scalar uints - long long i64Const; // used for i64vec, scalar int64s - unsigned long long u64Const; // used for u64vec, scalar uint64s - bool bConst; // used for bvec, scalar bools - double dConst; // used for vec, dvec, mat, dmat, scalar floats and doubles - const TString* sConst; // string constant - }; - - TBasicType type; -}; - -// Encapsulate having a pointer to an array of TConstUnion, -// which only needs to be allocated if its size is going to be -// bigger than 0. -// -// One convenience is being able to use [] to go inside the array, instead -// of C++ assuming it as an array of pointers to vectors. -// -// General usage is that the size is known up front, and it is -// created once with the proper size. -// -class TConstUnionArray { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - TConstUnionArray() : unionArray(nullptr) { } - virtual ~TConstUnionArray() { } - - explicit TConstUnionArray(int size) - { - if (size == 0) - unionArray = nullptr; - else - unionArray = new TConstUnionVector(size); - } - TConstUnionArray(const TConstUnionArray& a) : unionArray(a.unionArray) { } - TConstUnionArray(const TConstUnionArray& a, int start, int size) - { - unionArray = new TConstUnionVector(size); - for (int i = 0; i < size; ++i) - (*unionArray)[i] = a[start + i]; - } - - // Use this constructor for a smear operation - TConstUnionArray(int size, const TConstUnion& val) - { - unionArray = new TConstUnionVector(size, val); - } - - int size() const { return unionArray ? (int)unionArray->size() : 0; } - TConstUnion& operator[](size_t index) { return (*unionArray)[index]; } - const TConstUnion& operator[](size_t index) const { return (*unionArray)[index]; } - bool operator==(const TConstUnionArray& rhs) const - { - // this includes the case that both are unallocated - if (unionArray == rhs.unionArray) - return true; - - if (! unionArray || ! rhs.unionArray) - return false; - - return *unionArray == *rhs.unionArray; - } - bool operator!=(const TConstUnionArray& rhs) const { return ! operator==(rhs); } - - double dot(const TConstUnionArray& rhs) - { - assert(rhs.unionArray->size() == unionArray->size()); - double sum = 0.0; - - for (size_t comp = 0; comp < unionArray->size(); ++comp) - sum += (*this)[comp].getDConst() * rhs[comp].getDConst(); - - return sum; - } - - bool empty() const { return unionArray == nullptr; } - -protected: - typedef TVector TConstUnionVector; - TConstUnionVector* unionArray; -}; - -} // end namespace glslang - -#endif // _CONSTANT_UNION_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/InfoSink.h b/third_party/glslang-spirv/Include/InfoSink.h deleted file mode 100644 index dceb603cf..000000000 --- a/third_party/glslang-spirv/Include/InfoSink.h +++ /dev/null @@ -1,144 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _INFOSINK_INCLUDED_ -#define _INFOSINK_INCLUDED_ - -#include "../Include/Common.h" -#include - -namespace glslang { - -// -// TPrefixType is used to centralize how info log messages start. -// See below. -// -enum TPrefixType { - EPrefixNone, - EPrefixWarning, - EPrefixError, - EPrefixInternalError, - EPrefixUnimplemented, - EPrefixNote -}; - -enum TOutputStream { - ENull = 0, - EDebugger = 0x01, - EStdOut = 0x02, - EString = 0x04, -}; -// -// Encapsulate info logs for all objects that have them. -// -// The methods are a general set of tools for getting a variety of -// messages and types inserted into the log. -// -class TInfoSinkBase { -public: - TInfoSinkBase() : outputStream(4) {} - void erase() { sink.erase(); } - TInfoSinkBase& operator<<(const TPersistString& t) { append(t); return *this; } - TInfoSinkBase& operator<<(char c) { append(1, c); return *this; } - TInfoSinkBase& operator<<(const char* s) { append(s); return *this; } - TInfoSinkBase& operator<<(int n) { append(String(n)); return *this; } - TInfoSinkBase& operator<<(unsigned int n) { append(String(n)); return *this; } - TInfoSinkBase& operator<<(float n) { const int size = 40; char buf[size]; - snprintf(buf, size, (fabs(n) > 1e-8 && fabs(n) < 1e8) || n == 0.0f ? "%f" : "%g", n); - append(buf); - return *this; } - TInfoSinkBase& operator+(const TPersistString& t) { append(t); return *this; } - TInfoSinkBase& operator+(const TString& t) { append(t); return *this; } - TInfoSinkBase& operator<<(const TString& t) { append(t); return *this; } - TInfoSinkBase& operator+(const char* s) { append(s); return *this; } - const char* c_str() const { return sink.c_str(); } - void prefix(TPrefixType message) { - switch(message) { - case EPrefixNone: break; - case EPrefixWarning: append("WARNING: "); break; - case EPrefixError: append("ERROR: "); break; - case EPrefixInternalError: append("INTERNAL ERROR: "); break; - case EPrefixUnimplemented: append("UNIMPLEMENTED: "); break; - case EPrefixNote: append("NOTE: "); break; - default: append("UNKNOWN ERROR: "); break; - } - } - void location(const TSourceLoc& loc) { - const int maxSize = 24; - char locText[maxSize]; - snprintf(locText, maxSize, ":%d", loc.line); - append(loc.getStringNameOrNum(false).c_str()); - append(locText); - append(": "); - } - void message(TPrefixType message, const char* s) { - prefix(message); - append(s); - append("\n"); - } - void message(TPrefixType message, const char* s, const TSourceLoc& loc) { - prefix(message); - location(loc); - append(s); - append("\n"); - } - - void setOutputStream(int output = 4) - { - outputStream = output; - } - -protected: - void append(const char* s); - - void append(int count, char c); - void append(const TPersistString& t); - void append(const TString& t); - - void checkMem(size_t growth) { if (sink.capacity() < sink.size() + growth + 2) - sink.reserve(sink.capacity() + sink.capacity() / 2); } - void appendToStream(const char* s); - TPersistString sink; - int outputStream; -}; - -} // end namespace glslang - -class TInfoSink { -public: - glslang::TInfoSinkBase info; - glslang::TInfoSinkBase debug; -}; - -#endif // _INFOSINK_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/InitializeGlobals.h b/third_party/glslang-spirv/Include/InitializeGlobals.h deleted file mode 100644 index 95d0a40e9..000000000 --- a/third_party/glslang-spirv/Include/InitializeGlobals.h +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef __INITIALIZE_GLOBALS_INCLUDED_ -#define __INITIALIZE_GLOBALS_INCLUDED_ - -namespace glslang { - -bool InitializePoolIndex(); - -} // end namespace glslang - -#endif // __INITIALIZE_GLOBALS_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/PoolAlloc.h b/third_party/glslang-spirv/Include/PoolAlloc.h deleted file mode 100644 index b8eccb883..000000000 --- a/third_party/glslang-spirv/Include/PoolAlloc.h +++ /dev/null @@ -1,316 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _POOLALLOC_INCLUDED_ -#define _POOLALLOC_INCLUDED_ - -#ifdef _DEBUG -# define GUARD_BLOCKS // define to enable guard block sanity checking -#endif - -// -// This header defines an allocator that can be used to efficiently -// allocate a large number of small requests for heap memory, with the -// intention that they are not individually deallocated, but rather -// collectively deallocated at one time. -// -// This simultaneously -// -// * Makes each individual allocation much more efficient; the -// typical allocation is trivial. -// * Completely avoids the cost of doing individual deallocation. -// * Saves the trouble of tracking down and plugging a large class of leaks. -// -// Individual classes can use this allocator by supplying their own -// new and delete methods. -// -// STL containers can use this allocator by using the pool_allocator -// class as the allocator (second) template argument. -// - -#include -#include -#include - -namespace glslang { - -// If we are using guard blocks, we must track each individual -// allocation. If we aren't using guard blocks, these -// never get instantiated, so won't have any impact. -// - -class TAllocation { -public: - TAllocation(size_t size, unsigned char* mem, TAllocation* prev = 0) : - size(size), mem(mem), prevAlloc(prev) { - // Allocations are bracketed: - // [allocationHeader][initialGuardBlock][userData][finalGuardBlock] - // This would be cleaner with if (guardBlockSize)..., but that - // makes the compiler print warnings about 0 length memsets, - // even with the if() protecting them. -# ifdef GUARD_BLOCKS - memset(preGuard(), guardBlockBeginVal, guardBlockSize); - memset(data(), userDataFill, size); - memset(postGuard(), guardBlockEndVal, guardBlockSize); -# endif - } - - void check() const { - checkGuardBlock(preGuard(), guardBlockBeginVal, "before"); - checkGuardBlock(postGuard(), guardBlockEndVal, "after"); - } - - void checkAllocList() const; - - // Return total size needed to accommodate user buffer of 'size', - // plus our tracking data. - inline static size_t allocationSize(size_t size) { - return size + 2 * guardBlockSize + headerSize(); - } - - // Offset from surrounding buffer to get to user data buffer. - inline static unsigned char* offsetAllocation(unsigned char* m) { - return m + guardBlockSize + headerSize(); - } - -private: - void checkGuardBlock(unsigned char* blockMem, unsigned char val, const char* locText) const; - - // Find offsets to pre and post guard blocks, and user data buffer - unsigned char* preGuard() const { return mem + headerSize(); } - unsigned char* data() const { return preGuard() + guardBlockSize; } - unsigned char* postGuard() const { return data() + size; } - - size_t size; // size of the user data area - unsigned char* mem; // beginning of our allocation (pts to header) - TAllocation* prevAlloc; // prior allocation in the chain - - const static unsigned char guardBlockBeginVal; - const static unsigned char guardBlockEndVal; - const static unsigned char userDataFill; - - const static size_t guardBlockSize; -# ifdef GUARD_BLOCKS - inline static size_t headerSize() { return sizeof(TAllocation); } -# else - inline static size_t headerSize() { return 0; } -# endif -}; - -// -// There are several stacks. One is to track the pushing and popping -// of the user, and not yet implemented. The others are simply a -// repositories of free pages or used pages. -// -// Page stacks are linked together with a simple header at the beginning -// of each allocation obtained from the underlying OS. Multi-page allocations -// are returned to the OS. Individual page allocations are kept for future -// re-use. -// -// The "page size" used is not, nor must it match, the underlying OS -// page size. But, having it be about that size or equal to a set of -// pages is likely most optimal. -// -class TPoolAllocator { -public: - TPoolAllocator(int growthIncrement = 8*1024, int allocationAlignment = 16); - - // - // Don't call the destructor just to free up the memory, call pop() - // - ~TPoolAllocator(); - - // - // Call push() to establish a new place to pop memory too. Does not - // have to be called to get things started. - // - void push(); - - // - // Call pop() to free all memory allocated since the last call to push(), - // or if no last call to push, frees all memory since first allocation. - // - void pop(); - - // - // Call popAll() to free all memory allocated. - // - void popAll(); - - // - // Call allocate() to actually acquire memory. Returns 0 if no memory - // available, otherwise a properly aligned pointer to 'numBytes' of memory. - // - void* allocate(size_t numBytes); - - // - // There is no deallocate. The point of this class is that - // deallocation can be skipped by the user of it, as the model - // of use is to simultaneously deallocate everything at once - // by calling pop(), and to not have to solve memory leak problems. - // - -protected: - friend struct tHeader; - - struct tHeader { - tHeader(tHeader* nextPage, size_t pageCount) : -#ifdef GUARD_BLOCKS - lastAllocation(0), -#endif - nextPage(nextPage), pageCount(pageCount) { } - - ~tHeader() { -#ifdef GUARD_BLOCKS - if (lastAllocation) - lastAllocation->checkAllocList(); -#endif - } - -#ifdef GUARD_BLOCKS - TAllocation* lastAllocation; -#endif - tHeader* nextPage; - size_t pageCount; - }; - - struct tAllocState { - size_t offset; - tHeader* page; - }; - typedef std::vector tAllocStack; - - // Track allocations if and only if we're using guard blocks -#ifndef GUARD_BLOCKS - void* initializeAllocation(tHeader*, unsigned char* memory, size_t) { -#else - void* initializeAllocation(tHeader* block, unsigned char* memory, size_t numBytes) { - new(memory) TAllocation(numBytes, memory, block->lastAllocation); - block->lastAllocation = reinterpret_cast(memory); -#endif - - // This is optimized entirely away if GUARD_BLOCKS is not defined. - return TAllocation::offsetAllocation(memory); - } - - size_t pageSize; // granularity of allocation from the OS - size_t alignment; // all returned allocations will be aligned at - // this granularity, which will be a power of 2 - size_t alignmentMask; - size_t headerSkip; // amount of memory to skip to make room for the - // header (basically, size of header, rounded - // up to make it aligned - size_t currentPageOffset; // next offset in top of inUseList to allocate from - tHeader* freeList; // list of popped memory - tHeader* inUseList; // list of all memory currently being used - tAllocStack stack; // stack of where to allocate from, to partition pool - - int numCalls; // just an interesting statistic - size_t totalBytes; // just an interesting statistic -private: - TPoolAllocator& operator=(const TPoolAllocator&); // don't allow assignment operator - TPoolAllocator(const TPoolAllocator&); // don't allow default copy constructor -}; - -// -// There could potentially be many pools with pops happening at -// different times. But a simple use is to have a global pop -// with everyone using the same global allocator. -// -extern TPoolAllocator& GetThreadPoolAllocator(); -void SetThreadPoolAllocator(TPoolAllocator* poolAllocator); - -// -// This STL compatible allocator is intended to be used as the allocator -// parameter to templatized STL containers, like vector and map. -// -// It will use the pools for allocation, and not -// do any deallocation, but will still do destruction. -// -template -class pool_allocator { -public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T *pointer; - typedef const T *const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef T value_type; - template - struct rebind { - typedef pool_allocator other; - }; - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - - pool_allocator() : allocator(GetThreadPoolAllocator()) { } - pool_allocator(TPoolAllocator& a) : allocator(a) { } - pool_allocator(const pool_allocator& p) : allocator(p.allocator) { } - - template - pool_allocator(const pool_allocator& p) : allocator(p.getAllocator()) { } - - pointer allocate(size_type n) { - return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } - pointer allocate(size_type n, const void*) { - return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } - - void deallocate(void*, size_type) { } - void deallocate(pointer, size_type) { } - - pointer _Charalloc(size_t n) { - return reinterpret_cast(getAllocator().allocate(n)); } - - void construct(pointer p, const T& val) { new ((void *)p) T(val); } - void destroy(pointer p) { p->T::~T(); } - - bool operator==(const pool_allocator& rhs) const { return &getAllocator() == &rhs.getAllocator(); } - bool operator!=(const pool_allocator& rhs) const { return &getAllocator() != &rhs.getAllocator(); } - - size_type max_size() const { return static_cast(-1) / sizeof(T); } - size_type max_size(int size) const { return static_cast(-1) / size; } - - TPoolAllocator& getAllocator() const { return allocator; } - -protected: - pool_allocator& operator=(const pool_allocator&) { return *this; } - TPoolAllocator& allocator; -}; - -} // end namespace glslang - -#endif // _POOLALLOC_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/ResourceLimits.h b/third_party/glslang-spirv/Include/ResourceLimits.h deleted file mode 100644 index 0d07b8c84..000000000 --- a/third_party/glslang-spirv/Include/ResourceLimits.h +++ /dev/null @@ -1,140 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _RESOURCE_LIMITS_INCLUDED_ -#define _RESOURCE_LIMITS_INCLUDED_ - -struct TLimits { - bool nonInductiveForLoops; - bool whileLoops; - bool doWhileLoops; - bool generalUniformIndexing; - bool generalAttributeMatrixVectorIndexing; - bool generalVaryingIndexing; - bool generalSamplerIndexing; - bool generalVariableIndexing; - bool generalConstantMatrixVectorIndexing; -}; - -struct TBuiltInResource { - int maxLights; - int maxClipPlanes; - int maxTextureUnits; - int maxTextureCoords; - int maxVertexAttribs; - int maxVertexUniformComponents; - int maxVaryingFloats; - int maxVertexTextureImageUnits; - int maxCombinedTextureImageUnits; - int maxTextureImageUnits; - int maxFragmentUniformComponents; - int maxDrawBuffers; - int maxVertexUniformVectors; - int maxVaryingVectors; - int maxFragmentUniformVectors; - int maxVertexOutputVectors; - int maxFragmentInputVectors; - int minProgramTexelOffset; - int maxProgramTexelOffset; - int maxClipDistances; - int maxComputeWorkGroupCountX; - int maxComputeWorkGroupCountY; - int maxComputeWorkGroupCountZ; - int maxComputeWorkGroupSizeX; - int maxComputeWorkGroupSizeY; - int maxComputeWorkGroupSizeZ; - int maxComputeUniformComponents; - int maxComputeTextureImageUnits; - int maxComputeImageUniforms; - int maxComputeAtomicCounters; - int maxComputeAtomicCounterBuffers; - int maxVaryingComponents; - int maxVertexOutputComponents; - int maxGeometryInputComponents; - int maxGeometryOutputComponents; - int maxFragmentInputComponents; - int maxImageUnits; - int maxCombinedImageUnitsAndFragmentOutputs; - int maxCombinedShaderOutputResources; - int maxImageSamples; - int maxVertexImageUniforms; - int maxTessControlImageUniforms; - int maxTessEvaluationImageUniforms; - int maxGeometryImageUniforms; - int maxFragmentImageUniforms; - int maxCombinedImageUniforms; - int maxGeometryTextureImageUnits; - int maxGeometryOutputVertices; - int maxGeometryTotalOutputComponents; - int maxGeometryUniformComponents; - int maxGeometryVaryingComponents; - int maxTessControlInputComponents; - int maxTessControlOutputComponents; - int maxTessControlTextureImageUnits; - int maxTessControlUniformComponents; - int maxTessControlTotalOutputComponents; - int maxTessEvaluationInputComponents; - int maxTessEvaluationOutputComponents; - int maxTessEvaluationTextureImageUnits; - int maxTessEvaluationUniformComponents; - int maxTessPatchComponents; - int maxPatchVertices; - int maxTessGenLevel; - int maxViewports; - int maxVertexAtomicCounters; - int maxTessControlAtomicCounters; - int maxTessEvaluationAtomicCounters; - int maxGeometryAtomicCounters; - int maxFragmentAtomicCounters; - int maxCombinedAtomicCounters; - int maxAtomicCounterBindings; - int maxVertexAtomicCounterBuffers; - int maxTessControlAtomicCounterBuffers; - int maxTessEvaluationAtomicCounterBuffers; - int maxGeometryAtomicCounterBuffers; - int maxFragmentAtomicCounterBuffers; - int maxCombinedAtomicCounterBuffers; - int maxAtomicCounterBufferSize; - int maxTransformFeedbackBuffers; - int maxTransformFeedbackInterleavedComponents; - int maxCullDistances; - int maxCombinedClipAndCullDistances; - int maxSamples; - - TLimits limits; -}; - -#endif // _RESOURCE_LIMITS_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/ShHandle.h b/third_party/glslang-spirv/Include/ShHandle.h deleted file mode 100644 index df07bd8ed..000000000 --- a/third_party/glslang-spirv/Include/ShHandle.h +++ /dev/null @@ -1,176 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _SHHANDLE_INCLUDED_ -#define _SHHANDLE_INCLUDED_ - -// -// Machine independent part of the compiler private objects -// sent as ShHandle to the driver. -// -// This should not be included by driver code. -// - -#define SH_EXPORTING -#include "../Public/ShaderLang.h" -#include "../MachineIndependent/Versions.h" -#include "InfoSink.h" - -class TCompiler; -class TLinker; -class TUniformMap; - -// -// The base class used to back handles returned to the driver. -// -class TShHandleBase { -public: - TShHandleBase() { pool = new glslang::TPoolAllocator; } - virtual ~TShHandleBase() { delete pool; } - virtual TCompiler* getAsCompiler() { return 0; } - virtual TLinker* getAsLinker() { return 0; } - virtual TUniformMap* getAsUniformMap() { return 0; } - virtual glslang::TPoolAllocator* getPool() const { return pool; } -private: - glslang::TPoolAllocator* pool; -}; - -// -// The base class for the machine dependent linker to derive from -// for managing where uniforms live. -// -class TUniformMap : public TShHandleBase { -public: - TUniformMap() { } - virtual ~TUniformMap() { } - virtual TUniformMap* getAsUniformMap() { return this; } - virtual int getLocation(const char* name) = 0; - virtual TInfoSink& getInfoSink() { return infoSink; } - TInfoSink infoSink; -}; - -class TIntermNode; - -// -// The base class for the machine dependent compiler to derive from -// for managing object code from the compile. -// -class TCompiler : public TShHandleBase { -public: - TCompiler(EShLanguage l, TInfoSink& sink) : infoSink(sink) , language(l), haveValidObjectCode(false) { } - virtual ~TCompiler() { } - EShLanguage getLanguage() { return language; } - virtual TInfoSink& getInfoSink() { return infoSink; } - - virtual bool compile(TIntermNode* root, int version = 0, EProfile profile = ENoProfile) = 0; - - virtual TCompiler* getAsCompiler() { return this; } - virtual bool linkable() { return haveValidObjectCode; } - - TInfoSink& infoSink; -protected: - TCompiler& operator=(TCompiler&); - - EShLanguage language; - bool haveValidObjectCode; -}; - -// -// Link operations are based on a list of compile results... -// -typedef glslang::TVector TCompilerList; -typedef glslang::TVector THandleList; - -// -// The base class for the machine dependent linker to derive from -// to manage the resulting executable. -// - -class TLinker : public TShHandleBase { -public: - TLinker(EShExecutable e, TInfoSink& iSink) : - infoSink(iSink), - executable(e), - haveReturnableObjectCode(false), - appAttributeBindings(0), - fixedAttributeBindings(0), - excludedAttributes(0), - excludedCount(0), - uniformBindings(0) { } - virtual TLinker* getAsLinker() { return this; } - virtual ~TLinker() { } - virtual bool link(TCompilerList&, TUniformMap*) = 0; - virtual bool link(THandleList&) { return false; } - virtual void setAppAttributeBindings(const ShBindingTable* t) { appAttributeBindings = t; } - virtual void setFixedAttributeBindings(const ShBindingTable* t) { fixedAttributeBindings = t; } - virtual void getAttributeBindings(ShBindingTable const **t) const = 0; - virtual void setExcludedAttributes(const int* attributes, int count) { excludedAttributes = attributes; excludedCount = count; } - virtual ShBindingTable* getUniformBindings() const { return uniformBindings; } - virtual const void* getObjectCode() const { return 0; } // a real compiler would be returning object code here - virtual TInfoSink& getInfoSink() { return infoSink; } - TInfoSink& infoSink; -protected: - TLinker& operator=(TLinker&); - EShExecutable executable; - bool haveReturnableObjectCode; // true when objectCode is acceptable to send to driver - - const ShBindingTable* appAttributeBindings; - const ShBindingTable* fixedAttributeBindings; - const int* excludedAttributes; - int excludedCount; - ShBindingTable* uniformBindings; // created by the linker -}; - -// -// This is the interface between the machine independent code -// and the machine dependent code. -// -// The machine dependent code should derive from the classes -// above. Then Construct*() and Delete*() will create and -// destroy the machine dependent objects, which contain the -// above machine independent information. -// -TCompiler* ConstructCompiler(EShLanguage, int); - -TShHandleBase* ConstructLinker(EShExecutable, int); -TShHandleBase* ConstructBindings(); -void DeleteLinker(TShHandleBase*); -void DeleteBindingList(TShHandleBase* bindingList); - -TUniformMap* ConstructUniformMap(); -void DeleteCompiler(TCompiler*); - -void DeleteUniformMap(TUniformMap*); - -#endif // _SHHANDLE_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/Types.h b/third_party/glslang-spirv/Include/Types.h deleted file mode 100644 index cc847b5e7..000000000 --- a/third_party/glslang-spirv/Include/Types.h +++ /dev/null @@ -1,1924 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2016 LunarG, Inc. -// Copyright (C) 2015-2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _TYPES_INCLUDED -#define _TYPES_INCLUDED - -#include "../Include/Common.h" -#include "../Include/BaseTypes.h" -#include "../Public/ShaderLang.h" -#include "arrays.h" - -#include - -namespace glslang { - -const int GlslangMaxTypeLength = 200; // TODO: need to print block/struct one member per line, so this can stay bounded - -const char* const AnonymousPrefix = "anon@"; // for something like a block whose members can be directly accessed -inline bool IsAnonymous(const TString& name) -{ - return name.compare(0, 5, AnonymousPrefix) == 0; -} - -// -// Details within a sampler type -// -enum TSamplerDim { - EsdNone, - Esd1D, - Esd2D, - Esd3D, - EsdCube, - EsdRect, - EsdBuffer, - EsdSubpass, // goes only with non-sampled image (image is true) - EsdNumDims -}; - -struct TSampler { // misnomer now; includes images, textures without sampler, and textures with sampler - TBasicType type : 8; // type returned by sampler - TSamplerDim dim : 8; - bool arrayed : 1; - bool shadow : 1; - bool ms : 1; - bool image : 1; // image, combined should be false - bool combined : 1; // true means texture is combined with a sampler, false means texture with no sampler - bool sampler : 1; // true means a pure sampler, other fields should be clear() - bool external : 1; // GL_OES_EGL_image_external - unsigned int vectorSize : 3; // vector return type size. - - // Some languages support structures as sample results. Storing the whole structure in the - // TSampler is too large, so there is an index to a separate table. - static const unsigned structReturnIndexBits = 4; // number of index bits to use. - static const unsigned structReturnSlots = (1< TTypeList; - -typedef TVector TIdentifierList; - -// -// Following are a series of helper enums for managing layouts and qualifiers, -// used for TPublicType, TType, others. -// - -enum TLayoutPacking { - ElpNone, - ElpShared, // default, but different than saying nothing - ElpStd140, - ElpStd430, - ElpPacked, - ElpCount // If expanding, see bitfield width below -}; - -enum TLayoutMatrix { - ElmNone, - ElmRowMajor, - ElmColumnMajor, // default, but different than saying nothing - ElmCount // If expanding, see bitfield width below -}; - -// Union of geometry shader and tessellation shader geometry types. -// They don't go into TType, but rather have current state per shader or -// active parser type (TPublicType). -enum TLayoutGeometry { - ElgNone, - ElgPoints, - ElgLines, - ElgLinesAdjacency, - ElgLineStrip, - ElgTriangles, - ElgTrianglesAdjacency, - ElgTriangleStrip, - ElgQuads, - ElgIsolines, -}; - -enum TVertexSpacing { - EvsNone, - EvsEqual, - EvsFractionalEven, - EvsFractionalOdd -}; - -enum TVertexOrder { - EvoNone, - EvoCw, - EvoCcw -}; - -// Note: order matters, as type of format is done by comparison. -enum TLayoutFormat { - ElfNone, - - // Float image - ElfRgba32f, - ElfRgba16f, - ElfR32f, - ElfRgba8, - ElfRgba8Snorm, - - ElfEsFloatGuard, // to help with comparisons - - ElfRg32f, - ElfRg16f, - ElfR11fG11fB10f, - ElfR16f, - ElfRgba16, - ElfRgb10A2, - ElfRg16, - ElfRg8, - ElfR16, - ElfR8, - ElfRgba16Snorm, - ElfRg16Snorm, - ElfRg8Snorm, - ElfR16Snorm, - ElfR8Snorm, - - ElfFloatGuard, // to help with comparisons - - // Int image - ElfRgba32i, - ElfRgba16i, - ElfRgba8i, - ElfR32i, - - ElfEsIntGuard, // to help with comparisons - - ElfRg32i, - ElfRg16i, - ElfRg8i, - ElfR16i, - ElfR8i, - - ElfIntGuard, // to help with comparisons - - // Uint image - ElfRgba32ui, - ElfRgba16ui, - ElfRgba8ui, - ElfR32ui, - - ElfEsUintGuard, // to help with comparisons - - ElfRg32ui, - ElfRg16ui, - ElfRgb10a2ui, - ElfRg8ui, - ElfR16ui, - ElfR8ui, - - ElfCount -}; - -enum TLayoutDepth { - EldNone, - EldAny, - EldGreater, - EldLess, - EldUnchanged, - - EldCount -}; - -enum TBlendEquationShift { - // No 'EBlendNone': - // These are used as bit-shift amounts. A mask of such shifts will have type 'int', - // and in that space, 0 means no bits set, or none. In this enum, 0 means (1 << 0), a bit is set. - EBlendMultiply, - EBlendScreen, - EBlendOverlay, - EBlendDarken, - EBlendLighten, - EBlendColordodge, - EBlendColorburn, - EBlendHardlight, - EBlendSoftlight, - EBlendDifference, - EBlendExclusion, - EBlendHslHue, - EBlendHslSaturation, - EBlendHslColor, - EBlendHslLuminosity, - EBlendAllEquations, - - EBlendCount -}; - -class TQualifier { -public: - static const int layoutNotSet = -1; - - void clear() - { - precision = EpqNone; - invariant = false; - noContraction = false; - makeTemporary(); - declaredBuiltIn = EbvNone; - } - - // drop qualifiers that don't belong in a temporary variable - void makeTemporary() - { - semanticName = nullptr; - storage = EvqTemporary; - builtIn = EbvNone; - clearInterstage(); - clearMemory(); - specConstant = false; - clearLayout(); - } - - void clearInterstage() - { - clearInterpolation(); - patch = false; - sample = false; - } - - void clearInterpolation() - { - centroid = false; - smooth = false; - flat = false; - nopersp = false; -#ifdef AMD_EXTENSIONS - explicitInterp = false; -#endif - } - - void clearMemory() - { - coherent = false; - volatil = false; - restrict = false; - readonly = false; - writeonly = false; - } - - // Drop just the storage qualification, which perhaps should - // never be done, as it is fundamentally inconsistent, but need to - // explore what downstream consumers need. - // E.g., in a deference, it is an inconsistency between: - // A) partially dereferenced resource is still in the storage class it started in - // B) partially dereferenced resource is a new temporary object - // If A, then nothing should change, if B, then everything should change, but this is half way. - void makePartialTemporary() - { - storage = EvqTemporary; - specConstant = false; - } - - const char* semanticName; - TStorageQualifier storage : 6; - TBuiltInVariable builtIn : 8; - TBuiltInVariable declaredBuiltIn : 8; - TPrecisionQualifier precision : 3; - bool invariant : 1; // require canonical treatment for cross-shader invariance - bool noContraction: 1; // prevent contraction and reassociation, e.g., for 'precise' keyword, and expressions it affects - bool centroid : 1; - bool smooth : 1; - bool flat : 1; - bool nopersp : 1; -#ifdef AMD_EXTENSIONS - bool explicitInterp : 1; -#endif - bool patch : 1; - bool sample : 1; - bool coherent : 1; - bool volatil : 1; - bool restrict : 1; - bool readonly : 1; - bool writeonly : 1; - bool specConstant : 1; // having a constant_id is not sufficient: expressions have no id, but are still specConstant - - bool isMemory() const - { - return coherent || volatil || restrict || readonly || writeonly; - } - bool isInterpolation() const - { -#ifdef AMD_EXTENSIONS - return flat || smooth || nopersp || explicitInterp; -#else - return flat || smooth || nopersp; -#endif - } - bool isAuxiliary() const - { - return centroid || patch || sample; - } - - bool isPipeInput() const - { - switch (storage) { - case EvqVaryingIn: - case EvqFragCoord: - case EvqPointCoord: - case EvqFace: - case EvqVertexId: - case EvqInstanceId: - return true; - default: - return false; - } - } - - bool isPipeOutput() const - { - switch (storage) { - case EvqPosition: - case EvqPointSize: - case EvqClipVertex: - case EvqVaryingOut: - case EvqFragColor: - case EvqFragDepth: - return true; - default: - return false; - } - } - - bool isParamInput() const - { - switch (storage) { - case EvqIn: - case EvqInOut: - case EvqConstReadOnly: - return true; - default: - return false; - } - } - - bool isParamOutput() const - { - switch (storage) { - case EvqOut: - case EvqInOut: - return true; - default: - return false; - } - } - - bool isUniformOrBuffer() const - { - switch (storage) { - case EvqUniform: - case EvqBuffer: - return true; - default: - return false; - } - } - - bool isIo() const - { - switch (storage) { - case EvqUniform: - case EvqBuffer: - case EvqVaryingIn: - case EvqFragCoord: - case EvqPointCoord: - case EvqFace: - case EvqVertexId: - case EvqInstanceId: - case EvqPosition: - case EvqPointSize: - case EvqClipVertex: - case EvqVaryingOut: - case EvqFragColor: - case EvqFragDepth: - return true; - default: - return false; - } - } - - // True if this type of IO is supposed to be arrayed with extra level for per-vertex data - bool isArrayedIo(EShLanguage language) const - { - switch (language) { - case EShLangGeometry: - return isPipeInput(); - case EShLangTessControl: - return ! patch && (isPipeInput() || isPipeOutput()); - case EShLangTessEvaluation: - return ! patch && isPipeInput(); - default: - return false; - } - } - - // Implementing an embedded layout-qualifier class here, since C++ can't have a real class bitfield - void clearLayout() // all layout - { - clearUniformLayout(); - - layoutPushConstant = false; -#ifdef NV_EXTENSIONS - layoutPassthrough = false; - layoutViewportRelative = false; - // -2048 as the default value indicating layoutSecondaryViewportRelative is not set - layoutSecondaryViewportRelativeOffset = -2048; -#endif - - clearInterstageLayout(); - - layoutSpecConstantId = layoutSpecConstantIdEnd; - - layoutFormat = ElfNone; - } - void clearInterstageLayout() - { - layoutLocation = layoutLocationEnd; - layoutComponent = layoutComponentEnd; - layoutIndex = layoutIndexEnd; - clearStreamLayout(); - clearXfbLayout(); - } - void clearStreamLayout() - { - layoutStream = layoutStreamEnd; - } - void clearXfbLayout() - { - layoutXfbBuffer = layoutXfbBufferEnd; - layoutXfbStride = layoutXfbStrideEnd; - layoutXfbOffset = layoutXfbOffsetEnd; - } - - bool hasNonXfbLayout() const - { - return hasUniformLayout() || - hasAnyLocation() || - hasStream() || - hasFormat() || - layoutPushConstant; - } - bool hasLayout() const - { - return hasNonXfbLayout() || - hasXfb(); - } - TLayoutMatrix layoutMatrix : 3; - TLayoutPacking layoutPacking : 4; - int layoutOffset; - int layoutAlign; - - unsigned int layoutLocation :12; - static const unsigned int layoutLocationEnd = 0xFFF; - - unsigned int layoutComponent : 3; - static const unsigned int layoutComponentEnd = 4; - - unsigned int layoutSet : 7; - static const unsigned int layoutSetEnd = 0x3F; - - unsigned int layoutBinding : 16; - static const unsigned int layoutBindingEnd = 0xFFFF; - - unsigned int layoutIndex : 8; - static const unsigned int layoutIndexEnd = 0xFF; - - unsigned int layoutStream : 8; - static const unsigned int layoutStreamEnd = 0xFF; - - unsigned int layoutXfbBuffer : 4; - static const unsigned int layoutXfbBufferEnd = 0xF; - - unsigned int layoutXfbStride : 10; - static const unsigned int layoutXfbStrideEnd = 0x3FF; - - unsigned int layoutXfbOffset : 10; - static const unsigned int layoutXfbOffsetEnd = 0x3FF; - - unsigned int layoutAttachment : 8; // for input_attachment_index - static const unsigned int layoutAttachmentEnd = 0XFF; - - unsigned int layoutSpecConstantId : 11; - static const unsigned int layoutSpecConstantIdEnd = 0x7FF; - - TLayoutFormat layoutFormat : 8; - - bool layoutPushConstant; - -#ifdef NV_EXTENSIONS - bool layoutPassthrough; - bool layoutViewportRelative; - int layoutSecondaryViewportRelativeOffset; -#endif - - bool hasUniformLayout() const - { - return hasMatrix() || - hasPacking() || - hasOffset() || - hasBinding() || - hasSet() || - hasAlign(); - } - void clearUniformLayout() // only uniform specific - { - layoutMatrix = ElmNone; - layoutPacking = ElpNone; - layoutOffset = layoutNotSet; - layoutAlign = layoutNotSet; - - layoutSet = layoutSetEnd; - layoutBinding = layoutBindingEnd; - layoutAttachment = layoutAttachmentEnd; - } - - bool hasMatrix() const - { - return layoutMatrix != ElmNone; - } - bool hasPacking() const - { - return layoutPacking != ElpNone; - } - bool hasOffset() const - { - return layoutOffset != layoutNotSet; - } - bool hasAlign() const - { - return layoutAlign != layoutNotSet; - } - bool hasAnyLocation() const - { - return hasLocation() || - hasComponent() || - hasIndex(); - } - bool hasLocation() const - { - return layoutLocation != layoutLocationEnd; - } - bool hasComponent() const - { - return layoutComponent != layoutComponentEnd; - } - bool hasIndex() const - { - return layoutIndex != layoutIndexEnd; - } - bool hasSet() const - { - return layoutSet != layoutSetEnd; - } - bool hasBinding() const - { - return layoutBinding != layoutBindingEnd; - } - bool hasStream() const - { - return layoutStream != layoutStreamEnd; - } - bool hasFormat() const - { - return layoutFormat != ElfNone; - } - bool hasXfb() const - { - return hasXfbBuffer() || - hasXfbStride() || - hasXfbOffset(); - } - bool hasXfbBuffer() const - { - return layoutXfbBuffer != layoutXfbBufferEnd; - } - bool hasXfbStride() const - { - return layoutXfbStride != layoutXfbStrideEnd; - } - bool hasXfbOffset() const - { - return layoutXfbOffset != layoutXfbOffsetEnd; - } - bool hasAttachment() const - { - return layoutAttachment != layoutAttachmentEnd; - } - bool hasSpecConstantId() const - { - // Not the same thing as being a specialization constant, this - // is just whether or not it was declared with an ID. - return layoutSpecConstantId != layoutSpecConstantIdEnd; - } - bool isSpecConstant() const - { - // True if type is a specialization constant, whether or not it - // had a specialization-constant ID, and false if it is not a - // true front-end constant. - return specConstant; - } - bool isFrontEndConstant() const - { - // True if the front-end knows the final constant value. - // This allows front-end constant folding. - return storage == EvqConst && ! specConstant; - } - bool isConstant() const - { - // True if is either kind of constant; specialization or regular. - return isFrontEndConstant() || isSpecConstant(); - } - void makeSpecConstant() - { - storage = EvqConst; - specConstant = true; - } - static const char* getLayoutPackingString(TLayoutPacking packing) - { - switch (packing) { - case ElpPacked: return "packed"; - case ElpShared: return "shared"; - case ElpStd140: return "std140"; - case ElpStd430: return "std430"; - default: return "none"; - } - } - static const char* getLayoutMatrixString(TLayoutMatrix m) - { - switch (m) { - case ElmColumnMajor: return "column_major"; - case ElmRowMajor: return "row_major"; - default: return "none"; - } - } - static const char* getLayoutFormatString(TLayoutFormat f) - { - switch (f) { - case ElfRgba32f: return "rgba32f"; - case ElfRgba16f: return "rgba16f"; - case ElfRg32f: return "rg32f"; - case ElfRg16f: return "rg16f"; - case ElfR11fG11fB10f: return "r11f_g11f_b10f"; - case ElfR32f: return "r32f"; - case ElfR16f: return "r16f"; - case ElfRgba16: return "rgba16"; - case ElfRgb10A2: return "rgb10_a2"; - case ElfRgba8: return "rgba8"; - case ElfRg16: return "rg16"; - case ElfRg8: return "rg8"; - case ElfR16: return "r16"; - case ElfR8: return "r8"; - case ElfRgba16Snorm: return "rgba16_snorm"; - case ElfRgba8Snorm: return "rgba8_snorm"; - case ElfRg16Snorm: return "rg16_snorm"; - case ElfRg8Snorm: return "rg8_snorm"; - case ElfR16Snorm: return "r16_snorm"; - case ElfR8Snorm: return "r8_snorm"; - - case ElfRgba32i: return "rgba32i"; - case ElfRgba16i: return "rgba16i"; - case ElfRgba8i: return "rgba8i"; - case ElfRg32i: return "rg32i"; - case ElfRg16i: return "rg16i"; - case ElfRg8i: return "rg8i"; - case ElfR32i: return "r32i"; - case ElfR16i: return "r16i"; - case ElfR8i: return "r8i"; - - case ElfRgba32ui: return "rgba32ui"; - case ElfRgba16ui: return "rgba16ui"; - case ElfRgba8ui: return "rgba8ui"; - case ElfRg32ui: return "rg32ui"; - case ElfRg16ui: return "rg16ui"; - case ElfRgb10a2ui: return "rgb10_a2ui"; - case ElfRg8ui: return "rg8ui"; - case ElfR32ui: return "r32ui"; - case ElfR16ui: return "r16ui"; - case ElfR8ui: return "r8ui"; - default: return "none"; - } - } - static const char* getLayoutDepthString(TLayoutDepth d) - { - switch (d) { - case EldAny: return "depth_any"; - case EldGreater: return "depth_greater"; - case EldLess: return "depth_less"; - case EldUnchanged: return "depth_unchanged"; - default: return "none"; - } - } - static const char* getBlendEquationString(TBlendEquationShift e) - { - switch (e) { - case EBlendMultiply: return "blend_support_multiply"; - case EBlendScreen: return "blend_support_screen"; - case EBlendOverlay: return "blend_support_overlay"; - case EBlendDarken: return "blend_support_darken"; - case EBlendLighten: return "blend_support_lighten"; - case EBlendColordodge: return "blend_support_colordodge"; - case EBlendColorburn: return "blend_support_colorburn"; - case EBlendHardlight: return "blend_support_hardlight"; - case EBlendSoftlight: return "blend_support_softlight"; - case EBlendDifference: return "blend_support_difference"; - case EBlendExclusion: return "blend_support_exclusion"; - case EBlendHslHue: return "blend_support_hsl_hue"; - case EBlendHslSaturation: return "blend_support_hsl_saturation"; - case EBlendHslColor: return "blend_support_hsl_color"; - case EBlendHslLuminosity: return "blend_support_hsl_luminosity"; - case EBlendAllEquations: return "blend_support_all_equations"; - default: return "unknown"; - } - } - static const char* getGeometryString(TLayoutGeometry geometry) - { - switch (geometry) { - case ElgPoints: return "points"; - case ElgLines: return "lines"; - case ElgLinesAdjacency: return "lines_adjacency"; - case ElgLineStrip: return "line_strip"; - case ElgTriangles: return "triangles"; - case ElgTrianglesAdjacency: return "triangles_adjacency"; - case ElgTriangleStrip: return "triangle_strip"; - case ElgQuads: return "quads"; - case ElgIsolines: return "isolines"; - default: return "none"; - } - } - static const char* getVertexSpacingString(TVertexSpacing spacing) - { - switch (spacing) { - case EvsEqual: return "equal_spacing"; - case EvsFractionalEven: return "fractional_even_spacing"; - case EvsFractionalOdd: return "fractional_odd_spacing"; - default: return "none"; - } - } - static const char* getVertexOrderString(TVertexOrder order) - { - switch (order) { - case EvoCw: return "cw"; - case EvoCcw: return "ccw"; - default: return "none"; - } - } - static int mapGeometryToSize(TLayoutGeometry geometry) - { - switch (geometry) { - case ElgPoints: return 1; - case ElgLines: return 2; - case ElgLinesAdjacency: return 4; - case ElgTriangles: return 3; - case ElgTrianglesAdjacency: return 6; - default: return 0; - } - } -}; - -// Qualifiers that don't need to be keep per object. They have shader scope, not object scope. -// So, they will not be part of TType, TQualifier, etc. -struct TShaderQualifiers { - TLayoutGeometry geometry; // geometry/tessellation shader in/out primitives - bool pixelCenterInteger; // fragment shader - bool originUpperLeft; // fragment shader - int invocations; - int vertices; // both for tessellation "vertices" and geometry "max_vertices" - TVertexSpacing spacing; - TVertexOrder order; - bool pointMode; - int localSize[3]; // compute shader - int localSizeSpecId[3]; // compute shader specialization id for gl_WorkGroupSize - bool earlyFragmentTests; // fragment input - bool postDepthCoverage; // fragment input - TLayoutDepth layoutDepth; - bool blendEquation; // true if any blend equation was specified - int numViews; // multiview extenstions - -#ifdef NV_EXTENSIONS - bool layoutOverrideCoverage; // true if layout override_coverage set -#endif - - void init() - { - geometry = ElgNone; - originUpperLeft = false; - pixelCenterInteger = false; - invocations = TQualifier::layoutNotSet; - vertices = TQualifier::layoutNotSet; - spacing = EvsNone; - order = EvoNone; - pointMode = false; - localSize[0] = 1; - localSize[1] = 1; - localSize[2] = 1; - localSizeSpecId[0] = TQualifier::layoutNotSet; - localSizeSpecId[1] = TQualifier::layoutNotSet; - localSizeSpecId[2] = TQualifier::layoutNotSet; - earlyFragmentTests = false; - postDepthCoverage = false; - layoutDepth = EldNone; - blendEquation = false; - numViews = TQualifier::layoutNotSet; -#ifdef NV_EXTENSIONS - layoutOverrideCoverage = false; -#endif - } - - // Merge in characteristics from the 'src' qualifier. They can override when - // set, but never erase when not set. - void merge(const TShaderQualifiers& src) - { - if (src.geometry != ElgNone) - geometry = src.geometry; - if (src.pixelCenterInteger) - pixelCenterInteger = src.pixelCenterInteger; - if (src.originUpperLeft) - originUpperLeft = src.originUpperLeft; - if (src.invocations != TQualifier::layoutNotSet) - invocations = src.invocations; - if (src.vertices != TQualifier::layoutNotSet) - vertices = src.vertices; - if (src.spacing != EvsNone) - spacing = src.spacing; - if (src.order != EvoNone) - order = src.order; - if (src.pointMode) - pointMode = true; - for (int i = 0; i < 3; ++i) { - if (src.localSize[i] > 1) - localSize[i] = src.localSize[i]; - } - for (int i = 0; i < 3; ++i) { - if (src.localSizeSpecId[i] != TQualifier::layoutNotSet) - localSizeSpecId[i] = src.localSizeSpecId[i]; - } - if (src.earlyFragmentTests) - earlyFragmentTests = true; - if (src.postDepthCoverage) - postDepthCoverage = true; - if (src.layoutDepth) - layoutDepth = src.layoutDepth; - if (src.blendEquation) - blendEquation = src.blendEquation; - if (src.numViews != TQualifier::layoutNotSet) - numViews = src.numViews; -#ifdef NV_EXTENSIONS - if (src.layoutOverrideCoverage) - layoutOverrideCoverage = src.layoutOverrideCoverage; -#endif - } -}; - -// -// TPublicType is just temporarily used while parsing and not quite the same -// information kept per node in TType. Due to the bison stack, it can't have -// types that it thinks have non-trivial constructors. It should -// just be used while recognizing the grammar, not anything else. -// Once enough is known about the situation, the proper information -// moved into a TType, or the parse context, etc. -// -class TPublicType { -public: - TBasicType basicType; - TSampler sampler; - TQualifier qualifier; - TShaderQualifiers shaderQualifiers; - int vectorSize : 4; - int matrixCols : 4; - int matrixRows : 4; - TArraySizes* arraySizes; - const TType* userDef; - TSourceLoc loc; - - void initType(const TSourceLoc& l) - { - basicType = EbtVoid; - vectorSize = 1; - matrixRows = 0; - matrixCols = 0; - arraySizes = nullptr; - userDef = nullptr; - loc = l; - } - - void initQualifiers(bool global = false) - { - qualifier.clear(); - if (global) - qualifier.storage = EvqGlobal; - } - - void init(const TSourceLoc& l, bool global = false) - { - initType(l); - sampler.clear(); - initQualifiers(global); - shaderQualifiers.init(); - } - - void setVector(int s) - { - matrixRows = 0; - matrixCols = 0; - vectorSize = s; - } - - void setMatrix(int c, int r) - { - matrixRows = r; - matrixCols = c; - vectorSize = 0; - } - - bool isScalar() const - { - return matrixCols == 0 && vectorSize == 1 && arraySizes == nullptr && userDef == nullptr; - } - - // "Image" is a superset of "Subpass" - bool isImage() const { return basicType == EbtSampler && sampler.isImage(); } - bool isSubpass() const { return basicType == EbtSampler && sampler.isSubpass(); } -}; - -// -// Base class for things that have a type. -// -class TType { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - // for "empty" type (no args) or simple scalar/vector/matrix - explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0, - bool isVector = false) : - basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), - arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr) - { - sampler.clear(); - qualifier.clear(); - qualifier.storage = q; - assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices - } - // for explicit precision qualifier - TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0, - bool isVector = false) : - basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), - arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr) - { - sampler.clear(); - qualifier.clear(); - qualifier.storage = q; - qualifier.precision = p; - assert(p >= EpqNone && p <= EpqHigh); - assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices - } - // for turning a TPublicType into a TType, using a shallow copy - explicit TType(const TPublicType& p) : - basicType(p.basicType), - vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), vector1(false), - arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr) - { - if (basicType == EbtSampler) - sampler = p.sampler; - else - sampler.clear(); - qualifier = p.qualifier; - if (p.userDef) { - structure = p.userDef->getWritableStruct(); // public type is short-lived; there are no sharing issues - typeName = NewPoolTString(p.userDef->getTypeName().c_str()); - } - } - // for construction of sampler types - TType(const TSampler& sampler, TStorageQualifier q = EvqUniform, TArraySizes* as = nullptr) : - basicType(EbtSampler), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), - arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr), - sampler(sampler) - { - qualifier.clear(); - qualifier.storage = q; - } - // to efficiently make a dereferenced type - // without ever duplicating the outer structure that will be thrown away - // and using only shallow copy - TType(const TType& type, int derefIndex, bool rowMajor = false) - { - if (type.isArray()) { - shallowCopy(type); - if (type.getArraySizes()->getNumDims() == 1) { - arraySizes = nullptr; - } else { - // want our own copy of the array, so we can edit it - arraySizes = new TArraySizes; - arraySizes->copyDereferenced(*type.arraySizes); - } - } else if (type.basicType == EbtStruct || type.basicType == EbtBlock) { - // do a structure dereference - const TTypeList& memberList = *type.getStruct(); - shallowCopy(*memberList[derefIndex].type); - return; - } else { - // do a vector/matrix dereference - shallowCopy(type); - if (matrixCols > 0) { - // dereference from matrix to vector - if (rowMajor) - vectorSize = matrixCols; - else - vectorSize = matrixRows; - matrixCols = 0; - matrixRows = 0; - if (vectorSize == 1) - vector1 = true; - } else if (isVector()) { - // dereference from vector to scalar - vectorSize = 1; - vector1 = false; - } - } - } - // for making structures, ... - TType(TTypeList* userDef, const TString& n) : - basicType(EbtStruct), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), - arraySizes(nullptr), structure(userDef), fieldName(nullptr) - { - sampler.clear(); - qualifier.clear(); - typeName = NewPoolTString(n.c_str()); - } - // For interface blocks - TType(TTypeList* userDef, const TString& n, const TQualifier& q) : - basicType(EbtBlock), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), - qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr) - { - sampler.clear(); - typeName = NewPoolTString(n.c_str()); - } - virtual ~TType() {} - - // Not for use across pool pops; it will cause multiple instances of TType to point to the same information. - // This only works if that information (like a structure's list of types) does not change and - // the instances are sharing the same pool. - void shallowCopy(const TType& copyOf) - { - basicType = copyOf.basicType; - sampler = copyOf.sampler; - qualifier = copyOf.qualifier; - vectorSize = copyOf.vectorSize; - matrixCols = copyOf.matrixCols; - matrixRows = copyOf.matrixRows; - vector1 = copyOf.vector1; - arraySizes = copyOf.arraySizes; // copying the pointer only, not the contents - structure = copyOf.structure; - fieldName = copyOf.fieldName; - typeName = copyOf.typeName; - } - - // Make complete copy of the whole type graph rooted at 'copyOf'. - void deepCopy(const TType& copyOf) - { - TMap copied; // to enable copying a type graph as a graph, not a tree - deepCopy(copyOf, copied); - } - - // Recursively make temporary - void makeTemporary() - { - getQualifier().makeTemporary(); - - if (isStruct()) - for (unsigned int i = 0; i < structure->size(); ++i) - (*structure)[i].type->makeTemporary(); - } - - TType* clone() const - { - TType *newType = new TType(); - newType->deepCopy(*this); - - return newType; - } - - void makeVector() { vector1 = true; } - - // Merge type from parent, where a parentType is at the beginning of a declaration, - // establishing some characteristics for all subsequent names, while this type - // is on the individual names. - void mergeType(const TPublicType& parentType) - { - // arrayness is currently the only child aspect that has to be preserved - basicType = parentType.basicType; - vectorSize = parentType.vectorSize; - matrixCols = parentType.matrixCols; - matrixRows = parentType.matrixRows; - vector1 = false; // TPublicType is only GLSL which so far has no vec1 - qualifier = parentType.qualifier; - sampler = parentType.sampler; - if (parentType.arraySizes) - newArraySizes(*parentType.arraySizes); - if (parentType.userDef) { - structure = parentType.userDef->getWritableStruct(); - setTypeName(parentType.userDef->getTypeName()); - } - } - - virtual void hideMember() { basicType = EbtVoid; vectorSize = 1; } - virtual bool hiddenMember() const { return basicType == EbtVoid; } - - virtual void setTypeName(const TString& n) { typeName = NewPoolTString(n.c_str()); } - virtual void setFieldName(const TString& n) { fieldName = NewPoolTString(n.c_str()); } - virtual const TString& getTypeName() const - { - assert(typeName); - return *typeName; - } - - virtual const TString& getFieldName() const - { - assert(fieldName); - return *fieldName; - } - - virtual TBasicType getBasicType() const { return basicType; } - virtual const TSampler& getSampler() const { return sampler; } - virtual TSampler& getSampler() { return sampler; } - - virtual TQualifier& getQualifier() { return qualifier; } - virtual const TQualifier& getQualifier() const { return qualifier; } - - virtual int getVectorSize() const { return vectorSize; } // returns 1 for either scalar or vector of size 1, valid for both - virtual int getMatrixCols() const { return matrixCols; } - virtual int getMatrixRows() const { return matrixRows; } - virtual int getOuterArraySize() const { return arraySizes->getOuterSize(); } - virtual TIntermTyped* getOuterArrayNode() const { return arraySizes->getOuterNode(); } - virtual int getCumulativeArraySize() const { return arraySizes->getCumulativeSize(); } - virtual bool isArrayOfArrays() const { return arraySizes != nullptr && arraySizes->getNumDims() > 1; } - virtual int getImplicitArraySize() const { return arraySizes->getImplicitSize(); } - virtual const TArraySizes* getArraySizes() const { return arraySizes; } - virtual TArraySizes& getArraySizes() { assert(arraySizes != nullptr); return *arraySizes; } - - virtual bool isScalar() const { return ! isVector() && ! isMatrix() && ! isStruct() && ! isArray(); } - virtual bool isScalarOrVec1() const { return isScalar() || vector1; } - virtual bool isVector() const { return vectorSize > 1 || vector1; } - virtual bool isMatrix() const { return matrixCols ? true : false; } - virtual bool isArray() const { return arraySizes != nullptr; } - virtual bool isExplicitlySizedArray() const { return isArray() && getOuterArraySize() != UnsizedArraySize; } - virtual bool isImplicitlySizedArray() const { return isArray() && getOuterArraySize() == UnsizedArraySize && qualifier.storage != EvqBuffer; } - virtual bool isRuntimeSizedArray() const { return isArray() && getOuterArraySize() == UnsizedArraySize && qualifier.storage == EvqBuffer; } - virtual bool isStruct() const { return structure != nullptr; } -#ifdef AMD_EXTENSIONS - virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16; } -#else - virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble; } -#endif - virtual bool isIntegerDomain() const - { - switch (basicType) { - case EbtInt: - case EbtUint: - case EbtInt64: - case EbtUint64: -#ifdef AMD_EXTENSIONS - case EbtInt16: - case EbtUint16: -#endif - case EbtAtomicUint: - return true; - default: - break; - } - return false; - } - virtual bool isOpaque() const { return basicType == EbtSampler || basicType == EbtAtomicUint; } - virtual bool isBuiltIn() const { return getQualifier().builtIn != EbvNone; } - - // "Image" is a superset of "Subpass" - virtual bool isImage() const { return basicType == EbtSampler && getSampler().isImage(); } - virtual bool isSubpass() const { return basicType == EbtSampler && getSampler().isSubpass(); } - - // return true if this type contains any subtype which satisfies the given predicate. - template - bool contains(P predicate) const - { - if (predicate(this)) - return true; - - const auto hasa = [predicate](const TTypeLoc& tl) { return tl.type->contains(predicate); }; - - return structure && std::any_of(structure->begin(), structure->end(), hasa); - } - - // Recursively checks if the type contains the given basic type - virtual bool containsBasicType(TBasicType checkType) const - { - return contains([checkType](const TType* t) { return t->basicType == checkType; } ); - } - - // Recursively check the structure for any arrays, needed for some error checks - virtual bool containsArray() const - { - return contains([](const TType* t) { return t->isArray(); } ); - } - - // Check the structure for any structures, needed for some error checks - virtual bool containsStructure() const - { - return contains([this](const TType* t) { return t != this && t->isStruct(); } ); - } - - // Recursively check the structure for any implicitly-sized arrays, needed for triggering a copyUp(). - virtual bool containsImplicitlySizedArray() const - { - return contains([](const TType* t) { return t->isImplicitlySizedArray(); } ); - } - - virtual bool containsOpaque() const - { - return contains([](const TType* t) { return t->isOpaque(); } ); - } - - // Recursively checks if the type contains a built-in variable - virtual bool containsBuiltIn() const - { - return contains([](const TType* t) { return t->isBuiltIn(); } ); - } - - virtual bool containsNonOpaque() const - { - const auto nonOpaque = [](const TType* t) { - switch (t->basicType) { - case EbtVoid: - case EbtFloat: - case EbtDouble: -#ifdef AMD_EXTENSIONS - case EbtFloat16: -#endif - case EbtInt: - case EbtUint: - case EbtInt64: - case EbtUint64: -#ifdef AMD_EXTENSIONS - case EbtInt16: - case EbtUint16: -#endif - case EbtBool: - return true; - default: - return false; - } - }; - - return contains(nonOpaque); - } - - virtual bool containsSpecializationSize() const - { - return contains([](const TType* t) { return t->isArray() && t->arraySizes->isOuterSpecialization(); } ); - } - - // Array editing methods. Array descriptors can be shared across - // type instances. This allows all uses of the same array - // to be updated at once. E.g., all nodes can be explicitly sized - // by tracking and correcting one implicit size. Or, all nodes - // can get the explicit size on a redeclaration that gives size. - // - // N.B.: Don't share with the shared symbol tables (symbols are - // marked as isReadOnly(). Such symbols with arrays that will be - // edited need to copyUp() on first use, so that - // A) the edits don't effect the shared symbol table, and - // B) the edits are shared across all users. - void updateArraySizes(const TType& type) - { - // For when we may already be sharing existing array descriptors, - // keeping the pointers the same, just updating the contents. - assert(arraySizes != nullptr); - assert(type.arraySizes != nullptr); - *arraySizes = *type.arraySizes; - } - void newArraySizes(const TArraySizes& s) - { - // For setting a fresh new set of array sizes, not yet worrying about sharing. - arraySizes = new TArraySizes; - *arraySizes = s; - } - void clearArraySizes() - { - arraySizes = 0; - } - void addArrayOuterSizes(const TArraySizes& s) - { - if (arraySizes == nullptr) - newArraySizes(s); - else - arraySizes->addOuterSizes(s); - } - void changeOuterArraySize(int s) { arraySizes->changeOuterSize(s); } - void setImplicitArraySize(int s) { arraySizes->setImplicitSize(s); } - - // Recursively make the implicit array size the explicit array size, through the type tree. - void adoptImplicitArraySizes() - { - if (isImplicitlySizedArray()) - changeOuterArraySize(getImplicitArraySize()); - if (isStruct()) { - for (int i = 0; i < (int)structure->size(); ++i) - (*structure)[i].type->adoptImplicitArraySizes(); - } - } - - const char* getBasicString() const - { - return TType::getBasicString(basicType); - } - - static const char* getBasicString(TBasicType t) - { - switch (t) { - case EbtVoid: return "void"; - case EbtFloat: return "float"; - case EbtDouble: return "double"; -#ifdef AMD_EXTENSIONS - case EbtFloat16: return "float16_t"; -#endif - case EbtInt: return "int"; - case EbtUint: return "uint"; - case EbtInt64: return "int64_t"; - case EbtUint64: return "uint64_t"; -#ifdef AMD_EXTENSIONS - case EbtInt16: return "int16_t"; - case EbtUint16: return "uint16_t"; -#endif - case EbtBool: return "bool"; - case EbtAtomicUint: return "atomic_uint"; - case EbtSampler: return "sampler/image"; - case EbtStruct: return "structure"; - case EbtBlock: return "block"; - default: return "unknown type"; - } - } - - TString getCompleteString() const - { - TString typeString; - - const auto appendStr = [&](const char* s) { typeString.append(s); }; - const auto appendUint = [&](unsigned int u) { typeString.append(std::to_string(u).c_str()); }; - const auto appendInt = [&](int i) { typeString.append(std::to_string(i).c_str()); }; - - if (qualifier.hasLayout()) { - // To reduce noise, skip this if the only layout is an xfb_buffer - // with no triggering xfb_offset. - TQualifier noXfbBuffer = qualifier; - noXfbBuffer.layoutXfbBuffer = TQualifier::layoutXfbBufferEnd; - if (noXfbBuffer.hasLayout()) { - appendStr("layout("); - if (qualifier.hasAnyLocation()) { - appendStr(" location="); - appendUint(qualifier.layoutLocation); - if (qualifier.hasComponent()) { - appendStr(" component="); - appendUint(qualifier.layoutComponent); - } - if (qualifier.hasIndex()) { - appendStr(" index="); - appendUint(qualifier.layoutIndex); - } - } - if (qualifier.hasSet()) { - appendStr(" set="); - appendUint(qualifier.layoutSet); - } - if (qualifier.hasBinding()) { - appendStr(" binding="); - appendUint(qualifier.layoutBinding); - } - if (qualifier.hasStream()) { - appendStr(" stream="); - appendUint(qualifier.layoutStream); - } - if (qualifier.hasMatrix()) { - appendStr(" "); - appendStr(TQualifier::getLayoutMatrixString(qualifier.layoutMatrix)); - } - if (qualifier.hasPacking()) { - appendStr(" "); - appendStr(TQualifier::getLayoutPackingString(qualifier.layoutPacking)); - } - if (qualifier.hasOffset()) { - appendStr(" offset="); - appendInt(qualifier.layoutOffset); - } - if (qualifier.hasAlign()) { - appendStr(" align="); - appendInt(qualifier.layoutAlign); - } - if (qualifier.hasFormat()) { - appendStr(" "); - appendStr(TQualifier::getLayoutFormatString(qualifier.layoutFormat)); - } - if (qualifier.hasXfbBuffer() && qualifier.hasXfbOffset()) { - appendStr(" xfb_buffer="); - appendUint(qualifier.layoutXfbBuffer); - } - if (qualifier.hasXfbOffset()) { - appendStr(" xfb_offset="); - appendUint(qualifier.layoutXfbOffset); - } - if (qualifier.hasXfbStride()) { - appendStr(" xfb_stride="); - appendUint(qualifier.layoutXfbStride); - } - if (qualifier.hasAttachment()) { - appendStr(" input_attachment_index="); - appendUint(qualifier.layoutAttachment); - } - if (qualifier.hasSpecConstantId()) { - appendStr(" constant_id="); - appendUint(qualifier.layoutSpecConstantId); - } - if (qualifier.layoutPushConstant) - appendStr(" push_constant"); - -#ifdef NV_EXTENSIONS - if (qualifier.layoutPassthrough) - appendStr(" passthrough"); - if (qualifier.layoutViewportRelative) - appendStr(" layoutViewportRelative"); - if (qualifier.layoutSecondaryViewportRelativeOffset != -2048) { - appendStr(" layoutSecondaryViewportRelativeOffset="); - appendInt(qualifier.layoutSecondaryViewportRelativeOffset); - } -#endif - - appendStr(")"); - } - } - - if (qualifier.invariant) - appendStr(" invariant"); - if (qualifier.noContraction) - appendStr(" noContraction"); - if (qualifier.centroid) - appendStr(" centroid"); - if (qualifier.smooth) - appendStr(" smooth"); - if (qualifier.flat) - appendStr(" flat"); - if (qualifier.nopersp) - appendStr(" noperspective"); -#ifdef AMD_EXTENSIONS - if (qualifier.explicitInterp) - appendStr(" __explicitInterpAMD"); -#endif - if (qualifier.patch) - appendStr(" patch"); - if (qualifier.sample) - appendStr(" sample"); - if (qualifier.coherent) - appendStr(" coherent"); - if (qualifier.volatil) - appendStr(" volatile"); - if (qualifier.restrict) - appendStr(" restrict"); - if (qualifier.readonly) - appendStr(" readonly"); - if (qualifier.writeonly) - appendStr(" writeonly"); - if (qualifier.specConstant) - appendStr(" specialization-constant"); - appendStr(" "); - appendStr(getStorageQualifierString()); - if (isArray()) { - for(int i = 0; i < (int)arraySizes->getNumDims(); ++i) { - int size = arraySizes->getDimSize(i); - if (size == 0) - appendStr(" implicitly-sized array of"); - else { - appendStr(" "); - appendInt(arraySizes->getDimSize(i)); - appendStr("-element array of"); - } - } - } - if (qualifier.precision != EpqNone) { - appendStr(" "); - appendStr(getPrecisionQualifierString()); - } - if (isMatrix()) { - appendStr(" "); - appendInt(matrixCols); - appendStr("X"); - appendInt(matrixRows); - appendStr(" matrix of"); - } else if (isVector()) { - appendStr(" "); - appendInt(vectorSize); - appendStr("-component vector of"); - } - - appendStr(" "); - typeString.append(getBasicTypeString()); - - if (qualifier.builtIn != EbvNone) { - appendStr(" "); - appendStr(getBuiltInVariableString()); - } - - // Add struct/block members - if (structure) { - appendStr("{"); - for (size_t i = 0; i < structure->size(); ++i) { - if (! (*structure)[i].type->hiddenMember()) { - typeString.append((*structure)[i].type->getCompleteString()); - typeString.append(" "); - typeString.append((*structure)[i].type->getFieldName()); - if (i < structure->size() - 1) - appendStr(", "); - } - } - appendStr("}"); - } - - return typeString; - } - - TString getBasicTypeString() const - { - if (basicType == EbtSampler) - return sampler.getString(); - else - return getBasicString(); - } - - const char* getStorageQualifierString() const { return GetStorageQualifierString(qualifier.storage); } - const char* getBuiltInVariableString() const { return GetBuiltInVariableString(qualifier.builtIn); } - const char* getPrecisionQualifierString() const { return GetPrecisionQualifierString(qualifier.precision); } - const TTypeList* getStruct() const { return structure; } - void setStruct(TTypeList* s) { structure = s; } - TTypeList* getWritableStruct() const { return structure; } // This should only be used when known to not be sharing with other threads - - int computeNumComponents() const - { - int components = 0; - - if (getBasicType() == EbtStruct || getBasicType() == EbtBlock) { - for (TTypeList::const_iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++) - components += ((*tl).type)->computeNumComponents(); - } else if (matrixCols) - components = matrixCols * matrixRows; - else - components = vectorSize; - - if (arraySizes != nullptr) { - components *= arraySizes->getCumulativeSize(); - } - - return components; - } - - // append this type's mangled name to the passed in 'name' - void appendMangledName(TString& name) const - { - buildMangledName(name); - name += ';' ; - } - - // Do two structure types match? They could be declared independently, - // in different places, but still might satisfy the definition of matching. - // From the spec: - // - // "Structures must have the same name, sequence of type names, and - // type definitions, and member names to be considered the same type. - // This rule applies recursively for nested or embedded types." - // - bool sameStructType(const TType& right) const - { - // Most commonly, they are both nullptr, or the same pointer to the same actual structure - if (structure == right.structure) - return true; - - // Both being nullptr was caught above, now they both have to be structures of the same number of elements - if (structure == nullptr || right.structure == nullptr || - structure->size() != right.structure->size()) - return false; - - // Structure names have to match - if (*typeName != *right.typeName) - return false; - - // Compare the names and types of all the members, which have to match - for (unsigned int i = 0; i < structure->size(); ++i) { - if ((*structure)[i].type->getFieldName() != (*right.structure)[i].type->getFieldName()) - return false; - - if (*(*structure)[i].type != *(*right.structure)[i].type) - return false; - } - - return true; - } - - // See if two types match, in all aspects except arrayness - bool sameElementType(const TType& right) const - { - return basicType == right.basicType && sameElementShape(right); - } - - // See if two type's arrayness match - bool sameArrayness(const TType& right) const - { - return ((arraySizes == nullptr && right.arraySizes == nullptr) || - (arraySizes != nullptr && right.arraySizes != nullptr && *arraySizes == *right.arraySizes)); - } - - // See if two type's arrayness match in everything except their outer dimension - bool sameInnerArrayness(const TType& right) const - { - assert(arraySizes != nullptr && right.arraySizes != nullptr); - return arraySizes->sameInnerArrayness(*right.arraySizes); - } - - // See if two type's elements match in all ways except basic type - bool sameElementShape(const TType& right) const - { - return sampler == right.sampler && - vectorSize == right.vectorSize && - matrixCols == right.matrixCols && - matrixRows == right.matrixRows && - vector1 == right.vector1 && - sameStructType(right); - } - - // See if two types match in all ways (just the actual type, not qualification) - bool operator==(const TType& right) const - { - return sameElementType(right) && sameArrayness(right); - } - - bool operator!=(const TType& right) const - { - return ! operator==(right); - } - -protected: - // Require consumer to pick between deep copy and shallow copy. - TType(const TType& type); - TType& operator=(const TType& type); - - // Recursively copy a type graph, while preserving the graph-like - // quality. That is, don't make more than one copy of a structure that - // gets reused multiple times in the type graph. - void deepCopy(const TType& copyOf, TMap& copiedMap) - { - shallowCopy(copyOf); - - if (copyOf.arraySizes) { - arraySizes = new TArraySizes; - *arraySizes = *copyOf.arraySizes; - } - - if (copyOf.structure) { - auto prevCopy = copiedMap.find(copyOf.structure); - if (prevCopy != copiedMap.end()) - structure = prevCopy->second; - else { - structure = new TTypeList; - copiedMap[copyOf.structure] = structure; - for (unsigned int i = 0; i < copyOf.structure->size(); ++i) { - TTypeLoc typeLoc; - typeLoc.loc = (*copyOf.structure)[i].loc; - typeLoc.type = new TType(); - typeLoc.type->deepCopy(*(*copyOf.structure)[i].type, copiedMap); - structure->push_back(typeLoc); - } - } - } - - if (copyOf.fieldName) - fieldName = NewPoolTString(copyOf.fieldName->c_str()); - if (copyOf.typeName) - typeName = NewPoolTString(copyOf.typeName->c_str()); - } - - - void buildMangledName(TString&) const; - - TBasicType basicType : 8; - int vectorSize : 4; // 1 means either scalar or 1-component vector; see vector1 to disambiguate. - int matrixCols : 4; - int matrixRows : 4; - bool vector1 : 1; // Backward-compatible tracking of a 1-component vector distinguished from a scalar. - // GLSL 4.5 never has a 1-component vector; so this will always be false until such - // functionality is added. - // HLSL does have a 1-component vectors, so this will be true to disambiguate - // from a scalar. - TQualifier qualifier; - - TArraySizes* arraySizes; // nullptr unless an array; can be shared across types - TTypeList* structure; // nullptr unless this is a struct; can be shared across types - TString *fieldName; // for structure field names - TString *typeName; // for structure type name - TSampler sampler; -}; - -} // end namespace glslang - -#endif // _TYPES_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/arrays.h b/third_party/glslang-spirv/Include/arrays.h deleted file mode 100644 index 1660a99f9..000000000 --- a/third_party/glslang-spirv/Include/arrays.h +++ /dev/null @@ -1,329 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -// -// Implement types for tracking GLSL arrays, arrays of arrays, etc. -// - -#ifndef _ARRAYS_INCLUDED -#define _ARRAYS_INCLUDED - -namespace glslang { - -// This is used to mean there is no size yet (unsized), it is waiting to get a size from somewhere else. -const int UnsizedArraySize = 0; - -class TIntermTyped; -extern bool SameSpecializationConstants(TIntermTyped*, TIntermTyped*); - -// Specialization constants need both a nominal size and a node that defines -// the specialization constant being used. Array types are the same when their -// size and specialization constant nodes are the same. -struct TArraySize { - unsigned int size; - TIntermTyped* node; // nullptr means no specialization constant node - bool operator==(const TArraySize& rhs) const - { - if (size != rhs.size) - return false; - if (node == nullptr || rhs.node == nullptr) - return node == rhs.node; - - return SameSpecializationConstants(node, rhs.node); - } -}; - -// -// TSmallArrayVector is used as the container for the set of sizes in TArraySizes. -// It has generic-container semantics, while TArraySizes has array-of-array semantics. -// That is, TSmallArrayVector should be more focused on mechanism and TArraySizes on policy. -// -struct TSmallArrayVector { - // - // TODO: memory: TSmallArrayVector is intended to be smaller. - // Almost all arrays could be handled by two sizes each fitting - // in 16 bits, needing a real vector only in the cases where there - // are more than 3 sizes or a size needing more than 16 bits. - // - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - TSmallArrayVector() : sizes(nullptr) { } - virtual ~TSmallArrayVector() { dealloc(); } - - // For breaking into two non-shared copies, independently modifiable. - TSmallArrayVector& operator=(const TSmallArrayVector& from) - { - if (from.sizes == nullptr) - sizes = nullptr; - else { - alloc(); - *sizes = *from.sizes; - } - - return *this; - } - - int size() const - { - if (sizes == nullptr) - return 0; - return (int)sizes->size(); - } - - unsigned int frontSize() const - { - assert(sizes != nullptr && sizes->size() > 0); - return sizes->front().size; - } - - TIntermTyped* frontNode() const - { - assert(sizes != nullptr && sizes->size() > 0); - return sizes->front().node; - } - - void changeFront(unsigned int s) - { - assert(sizes != nullptr); - // this should only happen for implicitly sized arrays, not specialization constants - assert(sizes->front().node == nullptr); - sizes->front().size = s; - } - - void push_back(unsigned int e, TIntermTyped* n) - { - alloc(); - TArraySize pair = { e, n }; - sizes->push_back(pair); - } - - void push_front(const TSmallArrayVector& newDims) - { - alloc(); - sizes->insert(sizes->begin(), newDims.sizes->begin(), newDims.sizes->end()); - } - - void pop_front() - { - assert(sizes != nullptr && sizes->size() > 0); - if (sizes->size() == 1) - dealloc(); - else - sizes->erase(sizes->begin()); - } - - // 'this' should currently not be holding anything, and copyNonFront - // will make it hold a copy of all but the first element of rhs. - // (This would be useful for making a type that is dereferenced by - // one dimension.) - void copyNonFront(const TSmallArrayVector& rhs) - { - assert(sizes == nullptr); - if (rhs.size() > 1) { - alloc(); - sizes->insert(sizes->begin(), rhs.sizes->begin() + 1, rhs.sizes->end()); - } - } - - unsigned int getDimSize(int i) const - { - assert(sizes != nullptr && (int)sizes->size() > i); - return (*sizes)[i].size; - } - - void setDimSize(int i, unsigned int size) const - { - assert(sizes != nullptr && (int)sizes->size() > i); - assert((*sizes)[i].node == nullptr); - (*sizes)[i].size = size; - } - - TIntermTyped* getDimNode(int i) const - { - assert(sizes != nullptr && (int)sizes->size() > i); - return (*sizes)[i].node; - } - - bool operator==(const TSmallArrayVector& rhs) const - { - if (sizes == nullptr && rhs.sizes == nullptr) - return true; - if (sizes == nullptr || rhs.sizes == nullptr) - return false; - return *sizes == *rhs.sizes; - } - bool operator!=(const TSmallArrayVector& rhs) const { return ! operator==(rhs); } - -protected: - TSmallArrayVector(const TSmallArrayVector&); - - void alloc() - { - if (sizes == nullptr) - sizes = new TVector; - } - void dealloc() - { - delete sizes; - sizes = nullptr; - } - - TVector* sizes; // will either hold such a pointer, or in the future, hold the two array sizes -}; - -// -// Represent an array, or array of arrays, to arbitrary depth. This is not -// done through a hierarchy of types in a type tree, rather all contiguous arrayness -// in the type hierarchy is localized into this single cumulative object. -// -// The arrayness in TTtype is a pointer, so that it can be non-allocated and zero -// for the vast majority of types that are non-array types. -// -// Order Policy: these are all identical: -// - left to right order within a contiguous set of ...[..][..][..]... in the source language -// - index order 0, 1, 2, ... within the 'sizes' member below -// - outer-most to inner-most -// -struct TArraySizes { - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - TArraySizes() : implicitArraySize(1) { } - - // For breaking into two non-shared copies, independently modifiable. - TArraySizes& operator=(const TArraySizes& from) - { - implicitArraySize = from.implicitArraySize; - sizes = from.sizes; - - return *this; - } - - // translate from array-of-array semantics to container semantics - int getNumDims() const { return sizes.size(); } - int getDimSize(int dim) const { return sizes.getDimSize(dim); } - TIntermTyped* getDimNode(int dim) const { return sizes.getDimNode(dim); } - void setDimSize(int dim, int size) { sizes.setDimSize(dim, size); } - int getOuterSize() const { return sizes.frontSize(); } - TIntermTyped* getOuterNode() const { return sizes.frontNode(); } - int getCumulativeSize() const - { - int size = 1; - for (int d = 0; d < sizes.size(); ++d) { - // this only makes sense in paths that have a known array size - assert(sizes.getDimSize(d) != UnsizedArraySize); - size *= sizes.getDimSize(d); - } - return size; - } - void addInnerSize() { addInnerSize((unsigned)UnsizedArraySize); } - void addInnerSize(int s) { addInnerSize((unsigned)s, nullptr); } - void addInnerSize(int s, TIntermTyped* n) { sizes.push_back((unsigned)s, n); } - void addInnerSize(TArraySize pair) { sizes.push_back(pair.size, pair.node); } - void changeOuterSize(int s) { sizes.changeFront((unsigned)s); } - int getImplicitSize() const { return (int)implicitArraySize; } - void setImplicitSize(int s) { implicitArraySize = s; } - bool isInnerImplicit() const - { - for (int d = 1; d < sizes.size(); ++d) { - if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) - return true; - } - - return false; - } - bool clearInnerImplicit() - { - for (int d = 1; d < sizes.size(); ++d) { - if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) - setDimSize(d, 1); - } - - return false; - } - bool isInnerSpecialization() const - { - for (int d = 1; d < sizes.size(); ++d) { - if (sizes.getDimNode(d) != nullptr) - return true; - } - - return false; - } - bool isOuterSpecialization() - { - return sizes.getDimNode(0) != nullptr; - } - - bool isImplicit() const { return getOuterSize() == UnsizedArraySize || isInnerImplicit(); } - void addOuterSizes(const TArraySizes& s) { sizes.push_front(s.sizes); } - void dereference() { sizes.pop_front(); } - void copyDereferenced(const TArraySizes& rhs) - { - assert(sizes.size() == 0); - if (rhs.sizes.size() > 1) - sizes.copyNonFront(rhs.sizes); - } - - bool sameInnerArrayness(const TArraySizes& rhs) const - { - if (sizes.size() != rhs.sizes.size()) - return false; - - for (int d = 1; d < sizes.size(); ++d) { - if (sizes.getDimSize(d) != rhs.sizes.getDimSize(d) || - sizes.getDimNode(d) != rhs.sizes.getDimNode(d)) - return false; - } - - return true; - } - - bool operator==(const TArraySizes& rhs) { return sizes == rhs.sizes; } - bool operator!=(const TArraySizes& rhs) { return sizes != rhs.sizes; } - -protected: - TSmallArrayVector sizes; - - TArraySizes(const TArraySizes&); - - // for tracking maximum referenced index, before an explicit size is given - // applies only to the outer-most dimension - int implicitArraySize; -}; - -} // end namespace glslang - -#endif // _ARRAYS_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/intermediate.h b/third_party/glslang-spirv/Include/intermediate.h deleted file mode 100644 index 51ac45c37..000000000 --- a/third_party/glslang-spirv/Include/intermediate.h +++ /dev/null @@ -1,1486 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2016 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -// -// Definition of the in-memory high-level intermediate representation -// of shaders. This is a tree that parser creates. -// -// Nodes in the tree are defined as a hierarchy of classes derived from -// TIntermNode. Each is a node in a tree. There is no preset branching factor; -// each node can have it's own type of list of children. -// - -#ifndef __INTERMEDIATE_H -#define __INTERMEDIATE_H - -#if _MSC_VER >= 1900 - #pragma warning(disable : 4464) // relative include path contains '..' - #pragma warning(disable : 5026) // 'glslang::TIntermUnary': move constructor was implicitly defined as deleted -#endif - -#include "../Include/Common.h" -#include "../Include/Types.h" -#include "../Include/ConstantUnion.h" - -namespace glslang { - -class TIntermediate; - -// -// Operators used by the high-level (parse tree) representation. -// -enum TOperator { - EOpNull, // if in a node, should only mean a node is still being built - EOpSequence, // denotes a list of statements, or parameters, etc. - EOpLinkerObjects, // for aggregate node of objects the linker may need, if not reference by the rest of the AST - EOpFunctionCall, - EOpFunction, // For function definition - EOpParameters, // an aggregate listing the parameters to a function - - // - // Unary operators - // - - EOpNegative, - EOpLogicalNot, - EOpVectorLogicalNot, - EOpBitwiseNot, - - EOpPostIncrement, - EOpPostDecrement, - EOpPreIncrement, - EOpPreDecrement, - - EOpConvIntToBool, - EOpConvUintToBool, - EOpConvFloatToBool, - EOpConvDoubleToBool, - EOpConvInt64ToBool, - EOpConvUint64ToBool, - EOpConvBoolToFloat, - EOpConvIntToFloat, - EOpConvUintToFloat, - EOpConvDoubleToFloat, - EOpConvInt64ToFloat, - EOpConvUint64ToFloat, - EOpConvUintToInt, - EOpConvFloatToInt, - EOpConvBoolToInt, - EOpConvDoubleToInt, - EOpConvInt64ToInt, - EOpConvUint64ToInt, - EOpConvIntToUint, - EOpConvFloatToUint, - EOpConvBoolToUint, - EOpConvDoubleToUint, - EOpConvInt64ToUint, - EOpConvUint64ToUint, - EOpConvIntToDouble, - EOpConvUintToDouble, - EOpConvFloatToDouble, - EOpConvBoolToDouble, - EOpConvInt64ToDouble, - EOpConvUint64ToDouble, - EOpConvBoolToInt64, - EOpConvIntToInt64, - EOpConvUintToInt64, - EOpConvFloatToInt64, - EOpConvDoubleToInt64, - EOpConvUint64ToInt64, - EOpConvBoolToUint64, - EOpConvIntToUint64, - EOpConvUintToUint64, - EOpConvFloatToUint64, - EOpConvDoubleToUint64, - EOpConvInt64ToUint64, -#ifdef AMD_EXTENSIONS - EOpConvBoolToFloat16, - EOpConvIntToFloat16, - EOpConvUintToFloat16, - EOpConvFloatToFloat16, - EOpConvDoubleToFloat16, - EOpConvInt64ToFloat16, - EOpConvUint64ToFloat16, - EOpConvFloat16ToBool, - EOpConvFloat16ToInt, - EOpConvFloat16ToUint, - EOpConvFloat16ToFloat, - EOpConvFloat16ToDouble, - EOpConvFloat16ToInt64, - EOpConvFloat16ToUint64, - - EOpConvBoolToInt16, - EOpConvIntToInt16, - EOpConvUintToInt16, - EOpConvFloatToInt16, - EOpConvDoubleToInt16, - EOpConvFloat16ToInt16, - EOpConvInt64ToInt16, - EOpConvUint64ToInt16, - EOpConvUint16ToInt16, - EOpConvInt16ToBool, - EOpConvInt16ToInt, - EOpConvInt16ToUint, - EOpConvInt16ToFloat, - EOpConvInt16ToDouble, - EOpConvInt16ToFloat16, - EOpConvInt16ToInt64, - EOpConvInt16ToUint64, - - EOpConvBoolToUint16, - EOpConvIntToUint16, - EOpConvUintToUint16, - EOpConvFloatToUint16, - EOpConvDoubleToUint16, - EOpConvFloat16ToUint16, - EOpConvInt64ToUint16, - EOpConvUint64ToUint16, - EOpConvInt16ToUint16, - EOpConvUint16ToBool, - EOpConvUint16ToInt, - EOpConvUint16ToUint, - EOpConvUint16ToFloat, - EOpConvUint16ToDouble, - EOpConvUint16ToFloat16, - EOpConvUint16ToInt64, - EOpConvUint16ToUint64, -#endif - - // - // binary operations - // - - EOpAdd, - EOpSub, - EOpMul, - EOpDiv, - EOpMod, - EOpRightShift, - EOpLeftShift, - EOpAnd, - EOpInclusiveOr, - EOpExclusiveOr, - EOpEqual, - EOpNotEqual, - EOpVectorEqual, - EOpVectorNotEqual, - EOpLessThan, - EOpGreaterThan, - EOpLessThanEqual, - EOpGreaterThanEqual, - EOpComma, - - EOpVectorTimesScalar, - EOpVectorTimesMatrix, - EOpMatrixTimesVector, - EOpMatrixTimesScalar, - - EOpLogicalOr, - EOpLogicalXor, - EOpLogicalAnd, - - EOpIndexDirect, - EOpIndexIndirect, - EOpIndexDirectStruct, - - EOpVectorSwizzle, - - EOpMethod, - EOpScoping, - - // - // Built-in functions mapped to operators - // - - EOpRadians, - EOpDegrees, - EOpSin, - EOpCos, - EOpTan, - EOpAsin, - EOpAcos, - EOpAtan, - EOpSinh, - EOpCosh, - EOpTanh, - EOpAsinh, - EOpAcosh, - EOpAtanh, - - EOpPow, - EOpExp, - EOpLog, - EOpExp2, - EOpLog2, - EOpSqrt, - EOpInverseSqrt, - - EOpAbs, - EOpSign, - EOpFloor, - EOpTrunc, - EOpRound, - EOpRoundEven, - EOpCeil, - EOpFract, - EOpModf, - EOpMin, - EOpMax, - EOpClamp, - EOpMix, - EOpStep, - EOpSmoothStep, - - EOpIsNan, - EOpIsInf, - - EOpFma, - - EOpFrexp, - EOpLdexp, - - EOpFloatBitsToInt, - EOpFloatBitsToUint, - EOpIntBitsToFloat, - EOpUintBitsToFloat, - EOpDoubleBitsToInt64, - EOpDoubleBitsToUint64, - EOpInt64BitsToDouble, - EOpUint64BitsToDouble, -#ifdef AMD_EXTENSIONS - EOpFloat16BitsToInt16, - EOpFloat16BitsToUint16, - EOpInt16BitsToFloat16, - EOpUint16BitsToFloat16, -#endif - EOpPackSnorm2x16, - EOpUnpackSnorm2x16, - EOpPackUnorm2x16, - EOpUnpackUnorm2x16, - EOpPackSnorm4x8, - EOpUnpackSnorm4x8, - EOpPackUnorm4x8, - EOpUnpackUnorm4x8, - EOpPackHalf2x16, - EOpUnpackHalf2x16, - EOpPackDouble2x32, - EOpUnpackDouble2x32, - EOpPackInt2x32, - EOpUnpackInt2x32, - EOpPackUint2x32, - EOpUnpackUint2x32, -#ifdef AMD_EXTENSIONS - EOpPackFloat2x16, - EOpUnpackFloat2x16, - EOpPackInt2x16, - EOpUnpackInt2x16, - EOpPackUint2x16, - EOpUnpackUint2x16, - EOpPackInt4x16, - EOpUnpackInt4x16, - EOpPackUint4x16, - EOpUnpackUint4x16, -#endif - - EOpLength, - EOpDistance, - EOpDot, - EOpCross, - EOpNormalize, - EOpFaceForward, - EOpReflect, - EOpRefract, - -#ifdef AMD_EXTENSIONS - EOpMin3, - EOpMax3, - EOpMid3, -#endif - - EOpDPdx, // Fragment only - EOpDPdy, // Fragment only - EOpFwidth, // Fragment only - EOpDPdxFine, // Fragment only - EOpDPdyFine, // Fragment only - EOpFwidthFine, // Fragment only - EOpDPdxCoarse, // Fragment only - EOpDPdyCoarse, // Fragment only - EOpFwidthCoarse, // Fragment only - - EOpInterpolateAtCentroid, // Fragment only - EOpInterpolateAtSample, // Fragment only - EOpInterpolateAtOffset, // Fragment only - -#ifdef AMD_EXTENSIONS - EOpInterpolateAtVertex, -#endif - - EOpMatrixTimesMatrix, - EOpOuterProduct, - EOpDeterminant, - EOpMatrixInverse, - EOpTranspose, - - EOpFtransform, - - EOpNoise, - - EOpEmitVertex, // geometry only - EOpEndPrimitive, // geometry only - EOpEmitStreamVertex, // geometry only - EOpEndStreamPrimitive, // geometry only - - EOpBarrier, - EOpMemoryBarrier, - EOpMemoryBarrierAtomicCounter, - EOpMemoryBarrierBuffer, - EOpMemoryBarrierImage, - EOpMemoryBarrierShared, // compute only - EOpGroupMemoryBarrier, // compute only - - EOpBallot, - EOpReadInvocation, - EOpReadFirstInvocation, - - EOpAnyInvocation, - EOpAllInvocations, - EOpAllInvocationsEqual, - -#ifdef AMD_EXTENSIONS - EOpMinInvocations, - EOpMaxInvocations, - EOpAddInvocations, - EOpMinInvocationsNonUniform, - EOpMaxInvocationsNonUniform, - EOpAddInvocationsNonUniform, - EOpMinInvocationsInclusiveScan, - EOpMaxInvocationsInclusiveScan, - EOpAddInvocationsInclusiveScan, - EOpMinInvocationsInclusiveScanNonUniform, - EOpMaxInvocationsInclusiveScanNonUniform, - EOpAddInvocationsInclusiveScanNonUniform, - EOpMinInvocationsExclusiveScan, - EOpMaxInvocationsExclusiveScan, - EOpAddInvocationsExclusiveScan, - EOpMinInvocationsExclusiveScanNonUniform, - EOpMaxInvocationsExclusiveScanNonUniform, - EOpAddInvocationsExclusiveScanNonUniform, - EOpSwizzleInvocations, - EOpSwizzleInvocationsMasked, - EOpWriteInvocation, - EOpMbcnt, - - EOpCubeFaceIndex, - EOpCubeFaceCoord, - EOpTime, -#endif - - EOpAtomicAdd, - EOpAtomicMin, - EOpAtomicMax, - EOpAtomicAnd, - EOpAtomicOr, - EOpAtomicXor, - EOpAtomicExchange, - EOpAtomicCompSwap, - - EOpAtomicCounterIncrement, // results in pre-increment value - EOpAtomicCounterDecrement, // results in post-decrement value - EOpAtomicCounter, - EOpAtomicCounterAdd, - EOpAtomicCounterSubtract, - EOpAtomicCounterMin, - EOpAtomicCounterMax, - EOpAtomicCounterAnd, - EOpAtomicCounterOr, - EOpAtomicCounterXor, - EOpAtomicCounterExchange, - EOpAtomicCounterCompSwap, - - EOpAny, - EOpAll, - - // - // Branch - // - - EOpKill, // Fragment only - EOpReturn, - EOpBreak, - EOpContinue, - EOpCase, - EOpDefault, - - // - // Constructors - // - - EOpConstructGuardStart, - EOpConstructInt, // these first scalar forms also identify what implicit conversion is needed - EOpConstructUint, - EOpConstructInt64, - EOpConstructUint64, -#ifdef AMD_EXTENSIONS - EOpConstructInt16, - EOpConstructUint16, -#endif - EOpConstructBool, - EOpConstructFloat, - EOpConstructDouble, -#ifdef AMD_EXTENSIONS - EOpConstructFloat16, -#endif - EOpConstructVec2, - EOpConstructVec3, - EOpConstructVec4, - EOpConstructDVec2, - EOpConstructDVec3, - EOpConstructDVec4, -#ifdef AMD_EXTENSIONS - EOpConstructF16Vec2, - EOpConstructF16Vec3, - EOpConstructF16Vec4, -#endif - EOpConstructBVec2, - EOpConstructBVec3, - EOpConstructBVec4, - EOpConstructIVec2, - EOpConstructIVec3, - EOpConstructIVec4, - EOpConstructUVec2, - EOpConstructUVec3, - EOpConstructUVec4, - EOpConstructI64Vec2, - EOpConstructI64Vec3, - EOpConstructI64Vec4, - EOpConstructU64Vec2, - EOpConstructU64Vec3, - EOpConstructU64Vec4, -#ifdef AMD_EXTENSIONS - EOpConstructI16Vec2, - EOpConstructI16Vec3, - EOpConstructI16Vec4, - EOpConstructU16Vec2, - EOpConstructU16Vec3, - EOpConstructU16Vec4, -#endif - EOpConstructMat2x2, - EOpConstructMat2x3, - EOpConstructMat2x4, - EOpConstructMat3x2, - EOpConstructMat3x3, - EOpConstructMat3x4, - EOpConstructMat4x2, - EOpConstructMat4x3, - EOpConstructMat4x4, - EOpConstructDMat2x2, - EOpConstructDMat2x3, - EOpConstructDMat2x4, - EOpConstructDMat3x2, - EOpConstructDMat3x3, - EOpConstructDMat3x4, - EOpConstructDMat4x2, - EOpConstructDMat4x3, - EOpConstructDMat4x4, - EOpConstructIMat2x2, - EOpConstructIMat2x3, - EOpConstructIMat2x4, - EOpConstructIMat3x2, - EOpConstructIMat3x3, - EOpConstructIMat3x4, - EOpConstructIMat4x2, - EOpConstructIMat4x3, - EOpConstructIMat4x4, - EOpConstructUMat2x2, - EOpConstructUMat2x3, - EOpConstructUMat2x4, - EOpConstructUMat3x2, - EOpConstructUMat3x3, - EOpConstructUMat3x4, - EOpConstructUMat4x2, - EOpConstructUMat4x3, - EOpConstructUMat4x4, - EOpConstructBMat2x2, - EOpConstructBMat2x3, - EOpConstructBMat2x4, - EOpConstructBMat3x2, - EOpConstructBMat3x3, - EOpConstructBMat3x4, - EOpConstructBMat4x2, - EOpConstructBMat4x3, - EOpConstructBMat4x4, -#ifdef AMD_EXTENSIONS - EOpConstructF16Mat2x2, - EOpConstructF16Mat2x3, - EOpConstructF16Mat2x4, - EOpConstructF16Mat3x2, - EOpConstructF16Mat3x3, - EOpConstructF16Mat3x4, - EOpConstructF16Mat4x2, - EOpConstructF16Mat4x3, - EOpConstructF16Mat4x4, -#endif - EOpConstructStruct, - EOpConstructTextureSampler, - EOpConstructGuardEnd, - - // - // moves - // - - EOpAssign, - EOpAddAssign, - EOpSubAssign, - EOpMulAssign, - EOpVectorTimesMatrixAssign, - EOpVectorTimesScalarAssign, - EOpMatrixTimesScalarAssign, - EOpMatrixTimesMatrixAssign, - EOpDivAssign, - EOpModAssign, - EOpAndAssign, - EOpInclusiveOrAssign, - EOpExclusiveOrAssign, - EOpLeftShiftAssign, - EOpRightShiftAssign, - - // - // Array operators - // - - EOpArrayLength, // "Array" distinguishes from length(v) built-in function, but it applies to vectors and matrices as well. - - // - // Image operations - // - - EOpImageGuardBegin, - - EOpImageQuerySize, - EOpImageQuerySamples, - EOpImageLoad, - EOpImageStore, -#ifdef AMD_EXTENSIONS - EOpImageLoadLod, - EOpImageStoreLod, -#endif - EOpImageAtomicAdd, - EOpImageAtomicMin, - EOpImageAtomicMax, - EOpImageAtomicAnd, - EOpImageAtomicOr, - EOpImageAtomicXor, - EOpImageAtomicExchange, - EOpImageAtomicCompSwap, - - EOpSubpassLoad, - EOpSubpassLoadMS, - EOpSparseImageLoad, -#ifdef AMD_EXTENSIONS - EOpSparseImageLoadLod, -#endif - - EOpImageGuardEnd, - - // - // Texture operations - // - - EOpTextureGuardBegin, - - EOpTextureQuerySize, - EOpTextureQueryLod, - EOpTextureQueryLevels, - EOpTextureQuerySamples, - - EOpSamplingGuardBegin, - - EOpTexture, - EOpTextureProj, - EOpTextureLod, - EOpTextureOffset, - EOpTextureFetch, - EOpTextureFetchOffset, - EOpTextureProjOffset, - EOpTextureLodOffset, - EOpTextureProjLod, - EOpTextureProjLodOffset, - EOpTextureGrad, - EOpTextureGradOffset, - EOpTextureProjGrad, - EOpTextureProjGradOffset, - EOpTextureGather, - EOpTextureGatherOffset, - EOpTextureGatherOffsets, - EOpTextureClamp, - EOpTextureOffsetClamp, - EOpTextureGradClamp, - EOpTextureGradOffsetClamp, -#ifdef AMD_EXTENSIONS - EOpTextureGatherLod, - EOpTextureGatherLodOffset, - EOpTextureGatherLodOffsets, - EOpFragmentMaskFetch, - EOpFragmentFetch, -#endif - - EOpSparseTextureGuardBegin, - - EOpSparseTexture, - EOpSparseTextureLod, - EOpSparseTextureOffset, - EOpSparseTextureFetch, - EOpSparseTextureFetchOffset, - EOpSparseTextureLodOffset, - EOpSparseTextureGrad, - EOpSparseTextureGradOffset, - EOpSparseTextureGather, - EOpSparseTextureGatherOffset, - EOpSparseTextureGatherOffsets, - EOpSparseTexelsResident, - EOpSparseTextureClamp, - EOpSparseTextureOffsetClamp, - EOpSparseTextureGradClamp, - EOpSparseTextureGradOffsetClamp, -#ifdef AMD_EXTENSIONS - EOpSparseTextureGatherLod, - EOpSparseTextureGatherLodOffset, - EOpSparseTextureGatherLodOffsets, -#endif - - EOpSparseTextureGuardEnd, - EOpSamplingGuardEnd, - EOpTextureGuardEnd, - - // - // Integer operations - // - - EOpAddCarry, - EOpSubBorrow, - EOpUMulExtended, - EOpIMulExtended, - EOpBitfieldExtract, - EOpBitfieldInsert, - EOpBitFieldReverse, - EOpBitCount, - EOpFindLSB, - EOpFindMSB, - - // - // HLSL operations - // - - EOpClip, // discard if input value < 0 - EOpIsFinite, - EOpLog10, // base 10 log - EOpRcp, // 1/x - EOpSaturate, // clamp from 0 to 1 - EOpSinCos, // sin and cos in out parameters - EOpGenMul, // mul(x,y) on any of mat/vec/scalars - EOpDst, // x = 1, y=src0.y * src1.y, z=src0.z, w=src1.w - EOpInterlockedAdd, // atomic ops, but uses [optional] out arg instead of return - EOpInterlockedAnd, // ... - EOpInterlockedCompareExchange, // ... - EOpInterlockedCompareStore, // ... - EOpInterlockedExchange, // ... - EOpInterlockedMax, // ... - EOpInterlockedMin, // ... - EOpInterlockedOr, // ... - EOpInterlockedXor, // ... - EOpAllMemoryBarrierWithGroupSync, // memory barriers without non-hlsl AST equivalents - EOpDeviceMemoryBarrier, // ... - EOpDeviceMemoryBarrierWithGroupSync, // ... - EOpWorkgroupMemoryBarrier, // ... - EOpWorkgroupMemoryBarrierWithGroupSync, // ... - EOpEvaluateAttributeSnapped, // InterpolateAtOffset with int position on 16x16 grid - EOpF32tof16, // HLSL conversion: half of a PackHalf2x16 - EOpF16tof32, // HLSL conversion: half of an UnpackHalf2x16 - EOpLit, // HLSL lighting coefficient vector - EOpTextureBias, // HLSL texture bias: will be lowered to EOpTexture - EOpAsDouble, // slightly different from EOpUint64BitsToDouble - EOpD3DCOLORtoUBYTE4, // convert and swizzle 4-component color to UBYTE4 range - - EOpMethodSample, // Texture object methods. These are translated to existing - EOpMethodSampleBias, // AST methods, and exist to represent HLSL semantics until that - EOpMethodSampleCmp, // translation is performed. See HlslParseContext::decomposeSampleMethods(). - EOpMethodSampleCmpLevelZero, // ... - EOpMethodSampleGrad, // ... - EOpMethodSampleLevel, // ... - EOpMethodLoad, // ... - EOpMethodGetDimensions, // ... - EOpMethodGetSamplePosition, // ... - EOpMethodGather, // ... - EOpMethodCalculateLevelOfDetail, // ... - EOpMethodCalculateLevelOfDetailUnclamped, // ... - - // Load already defined above for textures - EOpMethodLoad2, // Structure buffer object methods. These are translated to existing - EOpMethodLoad3, // AST methods, and exist to represent HLSL semantics until that - EOpMethodLoad4, // translation is performed. See HlslParseContext::decomposeSampleMethods(). - EOpMethodStore, // ... - EOpMethodStore2, // ... - EOpMethodStore3, // ... - EOpMethodStore4, // ... - EOpMethodIncrementCounter, // ... - EOpMethodDecrementCounter, // ... - // EOpMethodAppend is defined for geo shaders below - EOpMethodConsume, - - // SM5 texture methods - EOpMethodGatherRed, // These are covered under the above EOpMethodSample comment about - EOpMethodGatherGreen, // translation to existing AST opcodes. They exist temporarily - EOpMethodGatherBlue, // because HLSL arguments are slightly different. - EOpMethodGatherAlpha, // ... - EOpMethodGatherCmp, // ... - EOpMethodGatherCmpRed, // ... - EOpMethodGatherCmpGreen, // ... - EOpMethodGatherCmpBlue, // ... - EOpMethodGatherCmpAlpha, // ... - - // geometry methods - EOpMethodAppend, // Geometry shader methods - EOpMethodRestartStrip, // ... - - // matrix - EOpMatrixSwizzle, // select multiple matrix components (non-column) -}; - -class TIntermTraverser; -class TIntermOperator; -class TIntermAggregate; -class TIntermUnary; -class TIntermBinary; -class TIntermConstantUnion; -class TIntermSelection; -class TIntermSwitch; -class TIntermBranch; -class TIntermTyped; -class TIntermMethod; -class TIntermSymbol; -class TIntermLoop; - -} // end namespace glslang - -// -// Base class for the tree nodes -// -// (Put outside the glslang namespace, as it's used as part of the external interface.) -// -class TIntermNode { -public: - POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) - - TIntermNode() { loc.init(); } - virtual const glslang::TSourceLoc& getLoc() const { return loc; } - virtual void setLoc(const glslang::TSourceLoc& l) { loc = l; } - virtual void traverse(glslang::TIntermTraverser*) = 0; - virtual glslang::TIntermTyped* getAsTyped() { return 0; } - virtual glslang::TIntermOperator* getAsOperator() { return 0; } - virtual glslang::TIntermConstantUnion* getAsConstantUnion() { return 0; } - virtual glslang::TIntermAggregate* getAsAggregate() { return 0; } - virtual glslang::TIntermUnary* getAsUnaryNode() { return 0; } - virtual glslang::TIntermBinary* getAsBinaryNode() { return 0; } - virtual glslang::TIntermSelection* getAsSelectionNode() { return 0; } - virtual glslang::TIntermSwitch* getAsSwitchNode() { return 0; } - virtual glslang::TIntermMethod* getAsMethodNode() { return 0; } - virtual glslang::TIntermSymbol* getAsSymbolNode() { return 0; } - virtual glslang::TIntermBranch* getAsBranchNode() { return 0; } - virtual glslang::TIntermLoop* getAsLoopNode() { return 0; } - - virtual const glslang::TIntermTyped* getAsTyped() const { return 0; } - virtual const glslang::TIntermOperator* getAsOperator() const { return 0; } - virtual const glslang::TIntermConstantUnion* getAsConstantUnion() const { return 0; } - virtual const glslang::TIntermAggregate* getAsAggregate() const { return 0; } - virtual const glslang::TIntermUnary* getAsUnaryNode() const { return 0; } - virtual const glslang::TIntermBinary* getAsBinaryNode() const { return 0; } - virtual const glslang::TIntermSelection* getAsSelectionNode() const { return 0; } - virtual const glslang::TIntermSwitch* getAsSwitchNode() const { return 0; } - virtual const glslang::TIntermMethod* getAsMethodNode() const { return 0; } - virtual const glslang::TIntermSymbol* getAsSymbolNode() const { return 0; } - virtual const glslang::TIntermBranch* getAsBranchNode() const { return 0; } - virtual const glslang::TIntermLoop* getAsLoopNode() const { return 0; } - virtual ~TIntermNode() { } - -protected: - TIntermNode(const TIntermNode&); - TIntermNode& operator=(const TIntermNode&); - glslang::TSourceLoc loc; -}; - -namespace glslang { - -// -// This is just to help yacc. -// -struct TIntermNodePair { - TIntermNode* node1; - TIntermNode* node2; -}; - -// -// Intermediate class for nodes that have a type. -// -class TIntermTyped : public TIntermNode { -public: - TIntermTyped(const TType& t) { type.shallowCopy(t); } - TIntermTyped(TBasicType basicType) { TType bt(basicType); type.shallowCopy(bt); } - virtual TIntermTyped* getAsTyped() { return this; } - virtual const TIntermTyped* getAsTyped() const { return this; } - virtual void setType(const TType& t) { type.shallowCopy(t); } - virtual const TType& getType() const { return type; } - virtual TType& getWritableType() { return type; } - - virtual TBasicType getBasicType() const { return type.getBasicType(); } - virtual TQualifier& getQualifier() { return type.getQualifier(); } - virtual const TQualifier& getQualifier() const { return type.getQualifier(); } - virtual void propagatePrecision(TPrecisionQualifier); - virtual int getVectorSize() const { return type.getVectorSize(); } - virtual int getMatrixCols() const { return type.getMatrixCols(); } - virtual int getMatrixRows() const { return type.getMatrixRows(); } - virtual bool isMatrix() const { return type.isMatrix(); } - virtual bool isArray() const { return type.isArray(); } - virtual bool isVector() const { return type.isVector(); } - virtual bool isScalar() const { return type.isScalar(); } - virtual bool isStruct() const { return type.isStruct(); } - virtual bool isFloatingDomain() const { return type.isFloatingDomain(); } - virtual bool isIntegerDomain() const { return type.isIntegerDomain(); } - TString getCompleteString() const { return type.getCompleteString(); } - -protected: - TIntermTyped& operator=(const TIntermTyped&); - TType type; -}; - -// -// Handle for, do-while, and while loops. -// -class TIntermLoop : public TIntermNode { -public: - TIntermLoop(TIntermNode* aBody, TIntermTyped* aTest, TIntermTyped* aTerminal, bool testFirst) : - body(aBody), - test(aTest), - terminal(aTerminal), - first(testFirst), - unroll(false), - dontUnroll(false), - dependency(0) - { } - - virtual TIntermLoop* getAsLoopNode() { return this; } - virtual const TIntermLoop* getAsLoopNode() const { return this; } - virtual void traverse(TIntermTraverser*); - TIntermNode* getBody() const { return body; } - TIntermTyped* getTest() const { return test; } - TIntermTyped* getTerminal() const { return terminal; } - bool testFirst() const { return first; } - - void setUnroll() { unroll = true; } - void setDontUnroll() { dontUnroll = true; } - bool getUnroll() const { return unroll; } - bool getDontUnroll() const { return dontUnroll; } - - static const unsigned int dependencyInfinite = 0xFFFFFFFF; - void setLoopDependency(int d) { dependency = d; } - int getLoopDependency() const { return dependency; } - -protected: - TIntermNode* body; // code to loop over - TIntermTyped* test; // exit condition associated with loop, could be 0 for 'for' loops - TIntermTyped* terminal; // exists for for-loops - bool first; // true for while and for, not for do-while - bool unroll; // true if unroll requested - bool dontUnroll; // true if request to not unroll - unsigned int dependency; // loop dependency hint; 0 means not set or unknown -}; - -// -// Handle case, break, continue, return, and kill. -// -class TIntermBranch : public TIntermNode { -public: - TIntermBranch(TOperator op, TIntermTyped* e) : - flowOp(op), - expression(e) { } - virtual TIntermBranch* getAsBranchNode() { return this; } - virtual const TIntermBranch* getAsBranchNode() const { return this; } - virtual void traverse(TIntermTraverser*); - TOperator getFlowOp() const { return flowOp; } - TIntermTyped* getExpression() const { return expression; } -protected: - TOperator flowOp; - TIntermTyped* expression; -}; - -// -// Represent method names before seeing their calling signature -// or resolving them to operations. Just an expression as the base object -// and a textural name. -// -class TIntermMethod : public TIntermTyped { -public: - TIntermMethod(TIntermTyped* o, const TType& t, const TString& m) : TIntermTyped(t), object(o), method(m) { } - virtual TIntermMethod* getAsMethodNode() { return this; } - virtual const TIntermMethod* getAsMethodNode() const { return this; } - virtual const TString& getMethodName() const { return method; } - virtual TIntermTyped* getObject() const { return object; } - virtual void traverse(TIntermTraverser*); -protected: - TIntermTyped* object; - TString method; -}; - -// -// Nodes that correspond to symbols or constants in the source code. -// -class TIntermSymbol : public TIntermTyped { -public: - // if symbol is initialized as symbol(sym), the memory comes from the pool allocator of sym. If sym comes from - // per process threadPoolAllocator, then it causes increased memory usage per compile - // it is essential to use "symbol = sym" to assign to symbol - TIntermSymbol(int i, const TString& n, const TType& t) - : TIntermTyped(t), id(i), -#ifdef ENABLE_HLSL - flattenSubset(-1), -#endif - constSubtree(nullptr) - { name = n; } - virtual int getId() const { return id; } - virtual const TString& getName() const { return name; } - virtual void traverse(TIntermTraverser*); - virtual TIntermSymbol* getAsSymbolNode() { return this; } - virtual const TIntermSymbol* getAsSymbolNode() const { return this; } - void setConstArray(const TConstUnionArray& c) { constArray = c; } - const TConstUnionArray& getConstArray() const { return constArray; } - void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } - TIntermTyped* getConstSubtree() const { return constSubtree; } -#ifdef ENABLE_HLSL - void setFlattenSubset(int subset) { flattenSubset = subset; } - int getFlattenSubset() const { return flattenSubset; } // -1 means full object -#endif - - // This is meant for cases where a node has already been constructed, and - // later on, it becomes necessary to switch to a different symbol. - virtual void switchId(int newId) { id = newId; } - -protected: - int id; // the unique id of the symbol this node represents -#ifdef ENABLE_HLSL - int flattenSubset; // how deeply the flattened object rooted at id has been dereferenced -#endif - TString name; // the name of the symbol this node represents - TConstUnionArray constArray; // if the symbol is a front-end compile-time constant, this is its value - TIntermTyped* constSubtree; -}; - -class TIntermConstantUnion : public TIntermTyped { -public: - TIntermConstantUnion(const TConstUnionArray& ua, const TType& t) : TIntermTyped(t), constArray(ua), literal(false) { } - const TConstUnionArray& getConstArray() const { return constArray; } - virtual TIntermConstantUnion* getAsConstantUnion() { return this; } - virtual const TIntermConstantUnion* getAsConstantUnion() const { return this; } - virtual void traverse(TIntermTraverser*); - virtual TIntermTyped* fold(TOperator, const TIntermTyped*) const; - virtual TIntermTyped* fold(TOperator, const TType&) const; - void setLiteral() { literal = true; } - void setExpression() { literal = false; } - bool isLiteral() const { return literal; } - -protected: - TIntermConstantUnion& operator=(const TIntermConstantUnion&); - - const TConstUnionArray constArray; - bool literal; // true if node represents a literal in the source code -}; - -// Represent the independent aspects of a texturing TOperator -struct TCrackedTextureOp { - bool query; - bool proj; - bool lod; - bool fetch; - bool offset; - bool offsets; - bool gather; - bool grad; - bool subpass; - bool lodClamp; -#ifdef AMD_EXTENSIONS - bool fragMask; -#endif -}; - -// -// Intermediate class for node types that hold operators. -// -class TIntermOperator : public TIntermTyped { -public: - virtual TIntermOperator* getAsOperator() { return this; } - virtual const TIntermOperator* getAsOperator() const { return this; } - TOperator getOp() const { return op; } - void setOp(TOperator newOp) { op = newOp; } - bool modifiesState() const; - bool isConstructor() const; - bool isTexture() const { return op > EOpTextureGuardBegin && op < EOpTextureGuardEnd; } - bool isSampling() const { return op > EOpSamplingGuardBegin && op < EOpSamplingGuardEnd; } - bool isImage() const { return op > EOpImageGuardBegin && op < EOpImageGuardEnd; } - bool isSparseTexture() const { return op > EOpSparseTextureGuardBegin && op < EOpSparseTextureGuardEnd; } - bool isSparseImage() const { return op == EOpSparseImageLoad; } - - void setOperationPrecision(TPrecisionQualifier p) { operationPrecision = p; } - TPrecisionQualifier getOperationPrecision() const { return operationPrecision != EpqNone ? - operationPrecision : - type.getQualifier().precision; } - TString getCompleteString() const - { - TString cs = type.getCompleteString(); - if (getOperationPrecision() != type.getQualifier().precision) { - cs += ", operation at "; - cs += GetPrecisionQualifierString(getOperationPrecision()); - } - - return cs; - } - - // Crack the op into the individual dimensions of texturing operation. - void crackTexture(TSampler sampler, TCrackedTextureOp& cracked) const - { - cracked.query = false; - cracked.proj = false; - cracked.lod = false; - cracked.fetch = false; - cracked.offset = false; - cracked.offsets = false; - cracked.gather = false; - cracked.grad = false; - cracked.subpass = false; - cracked.lodClamp = false; -#ifdef AMD_EXTENSIONS - cracked.fragMask = false; -#endif - - switch (op) { - case EOpImageQuerySize: - case EOpImageQuerySamples: - case EOpTextureQuerySize: - case EOpTextureQueryLod: - case EOpTextureQueryLevels: - case EOpTextureQuerySamples: - case EOpSparseTexelsResident: - cracked.query = true; - break; - case EOpTexture: - case EOpSparseTexture: - break; - case EOpTextureClamp: - case EOpSparseTextureClamp: - cracked.lodClamp = true; - break; - case EOpTextureProj: - cracked.proj = true; - break; - case EOpTextureLod: - case EOpSparseTextureLod: - cracked.lod = true; - break; - case EOpTextureOffset: - case EOpSparseTextureOffset: - cracked.offset = true; - break; - case EOpTextureOffsetClamp: - case EOpSparseTextureOffsetClamp: - cracked.offset = true; - cracked.lodClamp = true; - break; - case EOpTextureFetch: - case EOpSparseTextureFetch: - cracked.fetch = true; - if (sampler.dim == Esd1D || (sampler.dim == Esd2D && ! sampler.ms) || sampler.dim == Esd3D) - cracked.lod = true; - break; - case EOpTextureFetchOffset: - case EOpSparseTextureFetchOffset: - cracked.fetch = true; - cracked.offset = true; - if (sampler.dim == Esd1D || (sampler.dim == Esd2D && ! sampler.ms) || sampler.dim == Esd3D) - cracked.lod = true; - break; - case EOpTextureProjOffset: - cracked.offset = true; - cracked.proj = true; - break; - case EOpTextureLodOffset: - case EOpSparseTextureLodOffset: - cracked.offset = true; - cracked.lod = true; - break; - case EOpTextureProjLod: - cracked.lod = true; - cracked.proj = true; - break; - case EOpTextureProjLodOffset: - cracked.offset = true; - cracked.lod = true; - cracked.proj = true; - break; - case EOpTextureGrad: - case EOpSparseTextureGrad: - cracked.grad = true; - break; - case EOpTextureGradClamp: - case EOpSparseTextureGradClamp: - cracked.grad = true; - cracked.lodClamp = true; - break; - case EOpTextureGradOffset: - case EOpSparseTextureGradOffset: - cracked.grad = true; - cracked.offset = true; - break; - case EOpTextureProjGrad: - cracked.grad = true; - cracked.proj = true; - break; - case EOpTextureProjGradOffset: - cracked.grad = true; - cracked.offset = true; - cracked.proj = true; - break; - case EOpTextureGradOffsetClamp: - case EOpSparseTextureGradOffsetClamp: - cracked.grad = true; - cracked.offset = true; - cracked.lodClamp = true; - break; - case EOpTextureGather: - case EOpSparseTextureGather: - cracked.gather = true; - break; - case EOpTextureGatherOffset: - case EOpSparseTextureGatherOffset: - cracked.gather = true; - cracked.offset = true; - break; - case EOpTextureGatherOffsets: - case EOpSparseTextureGatherOffsets: - cracked.gather = true; - cracked.offsets = true; - break; -#ifdef AMD_EXTENSIONS - case EOpTextureGatherLod: - case EOpSparseTextureGatherLod: - cracked.gather = true; - cracked.lod = true; - break; - case EOpTextureGatherLodOffset: - case EOpSparseTextureGatherLodOffset: - cracked.gather = true; - cracked.offset = true; - cracked.lod = true; - break; - case EOpTextureGatherLodOffsets: - case EOpSparseTextureGatherLodOffsets: - cracked.gather = true; - cracked.offsets = true; - cracked.lod = true; - break; - case EOpImageLoadLod: - case EOpImageStoreLod: - case EOpSparseImageLoadLod: - cracked.lod = true; - break; - case EOpFragmentMaskFetch: - cracked.subpass = sampler.dim == EsdSubpass; - cracked.fragMask = true; - break; - case EOpFragmentFetch: - cracked.subpass = sampler.dim == EsdSubpass; - cracked.fragMask = true; - break; -#endif - case EOpSubpassLoad: - case EOpSubpassLoadMS: - cracked.subpass = true; - break; - default: - break; - } - } - -protected: - TIntermOperator(TOperator o) : TIntermTyped(EbtFloat), op(o), operationPrecision(EpqNone) {} - TIntermOperator(TOperator o, TType& t) : TIntermTyped(t), op(o), operationPrecision(EpqNone) {} - TOperator op; - // The result precision is in the inherited TType, and is usually meant to be both - // the operation precision and the result precision. However, some more complex things, - // like built-in function calls, distinguish between the two, in which case non-EqpNone - // 'operationPrecision' overrides the result precision as far as operation precision - // is concerned. - TPrecisionQualifier operationPrecision; -}; - -// -// Nodes for all the basic binary math operators. -// -class TIntermBinary : public TIntermOperator { -public: - TIntermBinary(TOperator o) : TIntermOperator(o) {} - virtual void traverse(TIntermTraverser*); - virtual void setLeft(TIntermTyped* n) { left = n; } - virtual void setRight(TIntermTyped* n) { right = n; } - virtual TIntermTyped* getLeft() const { return left; } - virtual TIntermTyped* getRight() const { return right; } - virtual TIntermBinary* getAsBinaryNode() { return this; } - virtual const TIntermBinary* getAsBinaryNode() const { return this; } - virtual void updatePrecision(); -protected: - TIntermTyped* left; - TIntermTyped* right; -}; - -// -// Nodes for unary math operators. -// -class TIntermUnary : public TIntermOperator { -public: - TIntermUnary(TOperator o, TType& t) : TIntermOperator(o, t), operand(0) {} - TIntermUnary(TOperator o) : TIntermOperator(o), operand(0) {} - virtual void traverse(TIntermTraverser*); - virtual void setOperand(TIntermTyped* o) { operand = o; } - virtual TIntermTyped* getOperand() { return operand; } - virtual const TIntermTyped* getOperand() const { return operand; } - virtual TIntermUnary* getAsUnaryNode() { return this; } - virtual const TIntermUnary* getAsUnaryNode() const { return this; } - virtual void updatePrecision(); -protected: - TIntermTyped* operand; -}; - -typedef TVector TIntermSequence; -typedef TVector TQualifierList; -// -// Nodes that operate on an arbitrary sized set of children. -// -class TIntermAggregate : public TIntermOperator { -public: - TIntermAggregate() : TIntermOperator(EOpNull), userDefined(false), pragmaTable(nullptr) { } - TIntermAggregate(TOperator o) : TIntermOperator(o), pragmaTable(nullptr) { } - ~TIntermAggregate() { delete pragmaTable; } - virtual TIntermAggregate* getAsAggregate() { return this; } - virtual const TIntermAggregate* getAsAggregate() const { return this; } - virtual void setOperator(TOperator o) { op = o; } - virtual TIntermSequence& getSequence() { return sequence; } - virtual const TIntermSequence& getSequence() const { return sequence; } - virtual void setName(const TString& n) { name = n; } - virtual const TString& getName() const { return name; } - virtual void traverse(TIntermTraverser*); - virtual void setUserDefined() { userDefined = true; } - virtual bool isUserDefined() { return userDefined; } - virtual TQualifierList& getQualifierList() { return qualifier; } - virtual const TQualifierList& getQualifierList() const { return qualifier; } - void setOptimize(bool o) { optimize = o; } - void setDebug(bool d) { debug = d; } - bool getOptimize() const { return optimize; } - bool getDebug() const { return debug; } - void setPragmaTable(const TPragmaTable& pTable); - const TPragmaTable& getPragmaTable() const { return *pragmaTable; } -protected: - TIntermAggregate(const TIntermAggregate&); // disallow copy constructor - TIntermAggregate& operator=(const TIntermAggregate&); // disallow assignment operator - TIntermSequence sequence; - TQualifierList qualifier; - TString name; - bool userDefined; // used for user defined function names - bool optimize; - bool debug; - TPragmaTable* pragmaTable; -}; - -// -// For if tests. -// -class TIntermSelection : public TIntermTyped { -public: - TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB) : - TIntermTyped(EbtVoid), condition(cond), trueBlock(trueB), falseBlock(falseB), - flatten(false), dontFlatten(false) {} - TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB, const TType& type) : - TIntermTyped(type), condition(cond), trueBlock(trueB), falseBlock(falseB), - flatten(false), dontFlatten(false) {} - virtual void traverse(TIntermTraverser*); - virtual TIntermTyped* getCondition() const { return condition; } - virtual TIntermNode* getTrueBlock() const { return trueBlock; } - virtual TIntermNode* getFalseBlock() const { return falseBlock; } - virtual TIntermSelection* getAsSelectionNode() { return this; } - virtual const TIntermSelection* getAsSelectionNode() const { return this; } - - void setFlatten() { flatten = true; } - void setDontFlatten() { dontFlatten = true; } - bool getFlatten() const { return flatten; } - bool getDontFlatten() const { return dontFlatten; } - -protected: - TIntermTyped* condition; - TIntermNode* trueBlock; - TIntermNode* falseBlock; - bool flatten; // true if flatten requested - bool dontFlatten; // true if requested to not flatten -}; - -// -// For switch statements. Designed use is that a switch will have sequence of nodes -// that are either case/default nodes or a *single* node that represents all the code -// in between (if any) consecutive case/defaults. So, a traversal need only deal with -// 0 or 1 nodes per case/default statement. -// -class TIntermSwitch : public TIntermNode { -public: - TIntermSwitch(TIntermTyped* cond, TIntermAggregate* b) : condition(cond), body(b), - flatten(false), dontFlatten(false) {} - virtual void traverse(TIntermTraverser*); - virtual TIntermNode* getCondition() const { return condition; } - virtual TIntermAggregate* getBody() const { return body; } - virtual TIntermSwitch* getAsSwitchNode() { return this; } - virtual const TIntermSwitch* getAsSwitchNode() const { return this; } - - void setFlatten() { flatten = true; } - void setDontFlatten() { dontFlatten = true; } - bool getFlatten() const { return flatten; } - bool getDontFlatten() const { return dontFlatten; } - -protected: - TIntermTyped* condition; - TIntermAggregate* body; - bool flatten; // true if flatten requested - bool dontFlatten; // true if requested to not flatten -}; - -enum TVisit -{ - EvPreVisit, - EvInVisit, - EvPostVisit -}; - -// -// For traversing the tree. User should derive from this, -// put their traversal specific data in it, and then pass -// it to a Traverse method. -// -// When using this, just fill in the methods for nodes you want visited. -// Return false from a pre-visit to skip visiting that node's subtree. -// -// Explicitly set postVisit to true if you want post visiting, otherwise, -// filled in methods will only be called at pre-visit time (before processing -// the subtree). Similarly for inVisit for in-order visiting of nodes with -// multiple children. -// -// If you only want post-visits, explicitly turn off preVisit (and inVisit) -// and turn on postVisit. -// -// In general, for the visit*() methods, return true from interior nodes -// to have the traversal continue on to children. -// -// If you process children yourself, or don't want them processed, return false. -// -class TIntermTraverser { -public: - POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) - TIntermTraverser(bool preVisit = true, bool inVisit = false, bool postVisit = false, bool rightToLeft = false) : - preVisit(preVisit), - inVisit(inVisit), - postVisit(postVisit), - rightToLeft(rightToLeft), - depth(0), - maxDepth(0) { } - virtual ~TIntermTraverser() { } - - virtual void visitSymbol(TIntermSymbol*) { } - virtual void visitConstantUnion(TIntermConstantUnion*) { } - virtual bool visitBinary(TVisit, TIntermBinary*) { return true; } - virtual bool visitUnary(TVisit, TIntermUnary*) { return true; } - virtual bool visitSelection(TVisit, TIntermSelection*) { return true; } - virtual bool visitAggregate(TVisit, TIntermAggregate*) { return true; } - virtual bool visitLoop(TVisit, TIntermLoop*) { return true; } - virtual bool visitBranch(TVisit, TIntermBranch*) { return true; } - virtual bool visitSwitch(TVisit, TIntermSwitch*) { return true; } - - int getMaxDepth() const { return maxDepth; } - - void incrementDepth(TIntermNode *current) - { - depth++; - maxDepth = (std::max)(maxDepth, depth); - path.push_back(current); - } - - void decrementDepth() - { - depth--; - path.pop_back(); - } - - TIntermNode *getParentNode() - { - return path.size() == 0 ? NULL : path.back(); - } - - const bool preVisit; - const bool inVisit; - const bool postVisit; - const bool rightToLeft; - -protected: - TIntermTraverser& operator=(TIntermTraverser&); - - int depth; - int maxDepth; - - // All the nodes from root to the current node's parent during traversing. - TVector path; -}; - -// KHR_vulkan_glsl says "Two arrays sized with specialization constants are the same type only if -// sized with the same symbol, involving no operations" -inline bool SameSpecializationConstants(TIntermTyped* node1, TIntermTyped* node2) -{ - return node1->getAsSymbolNode() && node2->getAsSymbolNode() && - node1->getAsSymbolNode()->getId() == node2->getAsSymbolNode()->getId(); -} - -} // end namespace glslang - -#endif // __INTERMEDIATE_H diff --git a/third_party/glslang-spirv/Include/revision.h b/third_party/glslang-spirv/Include/revision.h deleted file mode 100644 index 218f8b67f..000000000 --- a/third_party/glslang-spirv/Include/revision.h +++ /dev/null @@ -1,6 +0,0 @@ -// This header is generated by the make-revision script. -// For the version, it uses the latest git tag followed by the number of commits. -// For the date, it uses the current date (when then script is run). - -#define GLSLANG_REVISION "Overload400-PrecQual.2000" -#define GLSLANG_DATE "12-Apr-2017" diff --git a/third_party/glslang-spirv/Include/revision.template b/third_party/glslang-spirv/Include/revision.template deleted file mode 100644 index 4a16beeb0..000000000 --- a/third_party/glslang-spirv/Include/revision.template +++ /dev/null @@ -1,13 +0,0 @@ -// The file revision.h should be updated to the latest version, somehow, on -// check-in, if glslang has changed. -// -// revision.template is the source for revision.h when using SubWCRev as the -// method of updating revision.h. You don't have to do it this way, the -// requirement is only that revision.h gets updated. -// -// revision.h is under source control so that not all consumers of glslang -// source have to figure out how to create revision.h just to get a build -// going. However, if it is not updated, it can be a version behind. - -#define GLSLANG_REVISION "$WCREV$" -#define GLSLANG_DATE "$WCDATE$" diff --git a/third_party/glslang-spirv/Logger.cpp b/third_party/glslang-spirv/Logger.cpp deleted file mode 100644 index 48bd4e3ad..000000000 --- a/third_party/glslang-spirv/Logger.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// -// Copyright (C) 2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#include "Logger.h" - -#include -#include -#include - -namespace spv { - -void SpvBuildLogger::tbdFunctionality(const std::string& f) -{ - if (std::find(std::begin(tbdFeatures), std::end(tbdFeatures), f) == std::end(tbdFeatures)) - tbdFeatures.push_back(f); -} - -void SpvBuildLogger::missingFunctionality(const std::string& f) -{ - if (std::find(std::begin(missingFeatures), std::end(missingFeatures), f) == std::end(missingFeatures)) - missingFeatures.push_back(f); -} - -std::string SpvBuildLogger::getAllMessages() const { - std::ostringstream messages; - for (auto it = tbdFeatures.cbegin(); it != tbdFeatures.cend(); ++it) - messages << "TBD functionality: " << *it << "\n"; - for (auto it = missingFeatures.cbegin(); it != missingFeatures.cend(); ++it) - messages << "Missing functionality: " << *it << "\n"; - for (auto it = warnings.cbegin(); it != warnings.cend(); ++it) - messages << "warning: " << *it << "\n"; - for (auto it = errors.cbegin(); it != errors.cend(); ++it) - messages << "error: " << *it << "\n"; - return messages.str(); -} - -} // end spv namespace diff --git a/third_party/glslang-spirv/Logger.h b/third_party/glslang-spirv/Logger.h deleted file mode 100644 index 2e4ddaf51..000000000 --- a/third_party/glslang-spirv/Logger.h +++ /dev/null @@ -1,74 +0,0 @@ -// -// Copyright (C) 2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#ifndef GLSLANG_SPIRV_LOGGER_H -#define GLSLANG_SPIRV_LOGGER_H - -#include -#include - -namespace spv { - -// A class for holding all SPIR-V build status messages, including -// missing/TBD functionalities, warnings, and errors. -class SpvBuildLogger { -public: - SpvBuildLogger() {} - - // Registers a TBD functionality. - void tbdFunctionality(const std::string& f); - // Registers a missing functionality. - void missingFunctionality(const std::string& f); - - // Logs a warning. - void warning(const std::string& w) { warnings.push_back(w); } - // Logs an error. - void error(const std::string& e) { errors.push_back(e); } - - // Returns all messages accumulated in the order of: - // TBD functionalities, missing functionalities, warnings, errors. - std::string getAllMessages() const; - -private: - SpvBuildLogger(const SpvBuildLogger&); - - std::vector tbdFeatures; - std::vector missingFeatures; - std::vector warnings; - std::vector errors; -}; - -} // end spv namespace - -#endif // GLSLANG_SPIRV_LOGGER_H diff --git a/third_party/glslang-spirv/SPVRemapper.cpp b/third_party/glslang-spirv/SPVRemapper.cpp deleted file mode 100644 index f14d85729..000000000 --- a/third_party/glslang-spirv/SPVRemapper.cpp +++ /dev/null @@ -1,1479 +0,0 @@ -// -// Copyright (C) 2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#include "SPVRemapper.h" -#include "doc.h" - -#if !defined (use_cpp11) -// ... not supported before C++11 -#else // defined (use_cpp11) - -#include -#include -#include "Include/Common.h" - -namespace spv { - - // By default, just abort on error. Can be overridden via RegisterErrorHandler - spirvbin_t::errorfn_t spirvbin_t::errorHandler = [](const std::string&) { exit(5); }; - // By default, eat log messages. Can be overridden via RegisterLogHandler - spirvbin_t::logfn_t spirvbin_t::logHandler = [](const std::string&) { }; - - // This can be overridden to provide other message behavior if needed - void spirvbin_t::msg(int minVerbosity, int indent, const std::string& txt) const - { - if (verbose >= minVerbosity) - logHandler(std::string(indent, ' ') + txt); - } - - // hash opcode, with special handling for OpExtInst - std::uint32_t spirvbin_t::asOpCodeHash(unsigned word) - { - const spv::Op opCode = asOpCode(word); - - std::uint32_t offset = 0; - - switch (opCode) { - case spv::OpExtInst: - offset += asId(word + 4); break; - default: - break; - } - - return opCode * 19 + offset; // 19 = small prime - } - - spirvbin_t::range_t spirvbin_t::literalRange(spv::Op opCode) const - { - static const int maxCount = 1<<30; - - switch (opCode) { - case spv::OpTypeFloat: // fall through... - case spv::OpTypePointer: return range_t(2, 3); - case spv::OpTypeInt: return range_t(2, 4); - // TODO: case spv::OpTypeImage: - // TODO: case spv::OpTypeSampledImage: - case spv::OpTypeSampler: return range_t(3, 8); - case spv::OpTypeVector: // fall through - case spv::OpTypeMatrix: // ... - case spv::OpTypePipe: return range_t(3, 4); - case spv::OpConstant: return range_t(3, maxCount); - default: return range_t(0, 0); - } - } - - spirvbin_t::range_t spirvbin_t::typeRange(spv::Op opCode) const - { - static const int maxCount = 1<<30; - - if (isConstOp(opCode)) - return range_t(1, 2); - - switch (opCode) { - case spv::OpTypeVector: // fall through - case spv::OpTypeMatrix: // ... - case spv::OpTypeSampler: // ... - case spv::OpTypeArray: // ... - case spv::OpTypeRuntimeArray: // ... - case spv::OpTypePipe: return range_t(2, 3); - case spv::OpTypeStruct: // fall through - case spv::OpTypeFunction: return range_t(2, maxCount); - case spv::OpTypePointer: return range_t(3, 4); - default: return range_t(0, 0); - } - } - - spirvbin_t::range_t spirvbin_t::constRange(spv::Op opCode) const - { - static const int maxCount = 1<<30; - - switch (opCode) { - case spv::OpTypeArray: // fall through... - case spv::OpTypeRuntimeArray: return range_t(3, 4); - case spv::OpConstantComposite: return range_t(3, maxCount); - default: return range_t(0, 0); - } - } - - // Return the size of a type in 32-bit words. This currently only - // handles ints and floats, and is only invoked by queries which must be - // integer types. If ever needed, it can be generalized. - unsigned spirvbin_t::typeSizeInWords(spv::Id id) const - { - const unsigned typeStart = idPos(id); - const spv::Op opCode = asOpCode(typeStart); - - if (errorLatch) - return 0; - - switch (opCode) { - case spv::OpTypeInt: // fall through... - case spv::OpTypeFloat: return (spv[typeStart+2]+31)/32; - default: - return 0; - } - } - - // Looks up the type of a given const or variable ID, and - // returns its size in 32-bit words. - unsigned spirvbin_t::idTypeSizeInWords(spv::Id id) const - { - const auto tid_it = idTypeSizeMap.find(id); - if (tid_it == idTypeSizeMap.end()) { - error("type size for ID not found"); - return 0; - } - - return tid_it->second; - } - - // Is this an opcode we should remove when using --strip? - bool spirvbin_t::isStripOp(spv::Op opCode) const - { - switch (opCode) { - case spv::OpSource: - case spv::OpSourceExtension: - case spv::OpName: - case spv::OpMemberName: - case spv::OpLine: return true; - default: return false; - } - } - - // Return true if this opcode is flow control - bool spirvbin_t::isFlowCtrl(spv::Op opCode) const - { - switch (opCode) { - case spv::OpBranchConditional: - case spv::OpBranch: - case spv::OpSwitch: - case spv::OpLoopMerge: - case spv::OpSelectionMerge: - case spv::OpLabel: - case spv::OpFunction: - case spv::OpFunctionEnd: return true; - default: return false; - } - } - - // Return true if this opcode defines a type - bool spirvbin_t::isTypeOp(spv::Op opCode) const - { - switch (opCode) { - case spv::OpTypeVoid: - case spv::OpTypeBool: - case spv::OpTypeInt: - case spv::OpTypeFloat: - case spv::OpTypeVector: - case spv::OpTypeMatrix: - case spv::OpTypeImage: - case spv::OpTypeSampler: - case spv::OpTypeArray: - case spv::OpTypeRuntimeArray: - case spv::OpTypeStruct: - case spv::OpTypeOpaque: - case spv::OpTypePointer: - case spv::OpTypeFunction: - case spv::OpTypeEvent: - case spv::OpTypeDeviceEvent: - case spv::OpTypeReserveId: - case spv::OpTypeQueue: - case spv::OpTypeSampledImage: - case spv::OpTypePipe: return true; - default: return false; - } - } - - // Return true if this opcode defines a constant - bool spirvbin_t::isConstOp(spv::Op opCode) const - { - switch (opCode) { - case spv::OpConstantNull: - case spv::OpConstantSampler: - error("unimplemented constant type"); - return true; - - case spv::OpConstantTrue: - case spv::OpConstantFalse: - case spv::OpConstantComposite: - case spv::OpConstant: - return true; - - default: - return false; - } - } - - const auto inst_fn_nop = [](spv::Op, unsigned) { return false; }; - const auto op_fn_nop = [](spv::Id&) { }; - - // g++ doesn't like these defined in the class proper in an anonymous namespace. - // Dunno why. Also MSVC doesn't like the constexpr keyword. Also dunno why. - // Defining them externally seems to please both compilers, so, here they are. - const spv::Id spirvbin_t::unmapped = spv::Id(-10000); - const spv::Id spirvbin_t::unused = spv::Id(-10001); - const int spirvbin_t::header_size = 5; - - spv::Id spirvbin_t::nextUnusedId(spv::Id id) - { - while (isNewIdMapped(id)) // search for an unused ID - ++id; - - return id; - } - - spv::Id spirvbin_t::localId(spv::Id id, spv::Id newId) - { - assert(id != spv::NoResult && newId != spv::NoResult); - - if (id > bound()) { - error(std::string("ID out of range: ") + std::to_string(id)); - return spirvbin_t::unused; - } - - if (id >= idMapL.size()) - idMapL.resize(id+1, unused); - - if (newId != unmapped && newId != unused) { - if (isOldIdUnused(id)) { - error(std::string("ID unused in module: ") + std::to_string(id)); - return spirvbin_t::unused; - } - - if (!isOldIdUnmapped(id)) { - error(std::string("ID already mapped: ") + std::to_string(id) + " -> " - + std::to_string(localId(id))); - - return spirvbin_t::unused; - } - - if (isNewIdMapped(newId)) { - error(std::string("ID already used in module: ") + std::to_string(newId)); - return spirvbin_t::unused; - } - - msg(4, 4, std::string("map: ") + std::to_string(id) + " -> " + std::to_string(newId)); - setMapped(newId); - largestNewId = std::max(largestNewId, newId); - } - - return idMapL[id] = newId; - } - - // Parse a literal string from the SPIR binary and return it as an std::string - // Due to C++11 RValue references, this doesn't copy the result string. - std::string spirvbin_t::literalString(unsigned word) const - { - std::string literal; - - literal.reserve(16); - - const char* bytes = reinterpret_cast(spv.data() + word); - - while (bytes && *bytes) - literal += *bytes++; - - return literal; - } - - void spirvbin_t::applyMap() - { - msg(3, 2, std::string("Applying map: ")); - - // Map local IDs through the ID map - process(inst_fn_nop, // ignore instructions - [this](spv::Id& id) { - id = localId(id); - - if (errorLatch) - return; - - assert(id != unused && id != unmapped); - } - ); - } - - // Find free IDs for anything we haven't mapped - void spirvbin_t::mapRemainder() - { - msg(3, 2, std::string("Remapping remainder: ")); - - spv::Id unusedId = 1; // can't use 0: that's NoResult - spirword_t maxBound = 0; - - for (spv::Id id = 0; id < idMapL.size(); ++id) { - if (isOldIdUnused(id)) - continue; - - // Find a new mapping for any used but unmapped IDs - if (isOldIdUnmapped(id)) { - localId(id, unusedId = nextUnusedId(unusedId)); - if (errorLatch) - return; - } - - if (isOldIdUnmapped(id)) { - error(std::string("old ID not mapped: ") + std::to_string(id)); - return; - } - - // Track max bound - maxBound = std::max(maxBound, localId(id) + 1); - - if (errorLatch) - return; - } - - bound(maxBound); // reset header ID bound to as big as it now needs to be - } - - // Mark debug instructions for stripping - void spirvbin_t::stripDebug() - { - // Strip instructions in the stripOp set: debug info. - process( - [&](spv::Op opCode, unsigned start) { - // remember opcodes we want to strip later - if (isStripOp(opCode)) - stripInst(start); - return true; - }, - op_fn_nop); - } - - // Mark instructions that refer to now-removed IDs for stripping - void spirvbin_t::stripDeadRefs() - { - process( - [&](spv::Op opCode, unsigned start) { - // strip opcodes pointing to removed data - switch (opCode) { - case spv::OpName: - case spv::OpMemberName: - case spv::OpDecorate: - case spv::OpMemberDecorate: - if (idPosR.find(asId(start+1)) == idPosR.end()) - stripInst(start); - break; - default: - break; // leave it alone - } - - return true; - }, - op_fn_nop); - - strip(); - } - - // Update local maps of ID, type, etc positions - void spirvbin_t::buildLocalMaps() - { - msg(2, 2, std::string("build local maps: ")); - - mapped.clear(); - idMapL.clear(); -// preserve nameMap, so we don't clear that. - fnPos.clear(); - fnCalls.clear(); - typeConstPos.clear(); - idPosR.clear(); - entryPoint = spv::NoResult; - largestNewId = 0; - - idMapL.resize(bound(), unused); - - int fnStart = 0; - spv::Id fnRes = spv::NoResult; - - // build local Id and name maps - process( - [&](spv::Op opCode, unsigned start) { - unsigned word = start+1; - spv::Id typeId = spv::NoResult; - - if (spv::InstructionDesc[opCode].hasType()) - typeId = asId(word++); - - // If there's a result ID, remember the size of its type - if (spv::InstructionDesc[opCode].hasResult()) { - const spv::Id resultId = asId(word++); - idPosR[resultId] = start; - - if (typeId != spv::NoResult) { - const unsigned idTypeSize = typeSizeInWords(typeId); - - if (errorLatch) - return false; - - if (idTypeSize != 0) - idTypeSizeMap[resultId] = idTypeSize; - } - } - - if (opCode == spv::Op::OpName) { - const spv::Id target = asId(start+1); - const std::string name = literalString(start+2); - nameMap[name] = target; - - } else if (opCode == spv::Op::OpFunctionCall) { - ++fnCalls[asId(start + 3)]; - } else if (opCode == spv::Op::OpEntryPoint) { - entryPoint = asId(start + 2); - } else if (opCode == spv::Op::OpFunction) { - if (fnStart != 0) { - error("nested function found"); - return false; - } - - fnStart = start; - fnRes = asId(start + 2); - } else if (opCode == spv::Op::OpFunctionEnd) { - assert(fnRes != spv::NoResult); - if (fnStart == 0) { - error("function end without function start"); - return false; - } - - fnPos[fnRes] = range_t(fnStart, start + asWordCount(start)); - fnStart = 0; - } else if (isConstOp(opCode)) { - if (errorLatch) - return false; - - assert(asId(start + 2) != spv::NoResult); - typeConstPos.insert(start); - } else if (isTypeOp(opCode)) { - assert(asId(start + 1) != spv::NoResult); - typeConstPos.insert(start); - } - - return false; - }, - - [this](spv::Id& id) { localId(id, unmapped); } - ); - } - - // Validate the SPIR header - void spirvbin_t::validate() const - { - msg(2, 2, std::string("validating: ")); - - if (spv.size() < header_size) { - error("file too short: "); - return; - } - - if (magic() != spv::MagicNumber) { - error("bad magic number"); - return; - } - - // field 1 = version - // field 2 = generator magic - // field 3 = result bound - - if (schemaNum() != 0) { - error("bad schema, must be 0"); - return; - } - } - - int spirvbin_t::processInstruction(unsigned word, instfn_t instFn, idfn_t idFn) - { - const auto instructionStart = word; - const unsigned wordCount = asWordCount(instructionStart); - const int nextInst = word++ + wordCount; - spv::Op opCode = asOpCode(instructionStart); - - if (nextInst > int(spv.size())) { - error("spir instruction terminated too early"); - return -1; - } - - // Base for computing number of operands; will be updated as more is learned - unsigned numOperands = wordCount - 1; - - if (instFn(opCode, instructionStart)) - return nextInst; - - // Read type and result ID from instruction desc table - if (spv::InstructionDesc[opCode].hasType()) { - idFn(asId(word++)); - --numOperands; - } - - if (spv::InstructionDesc[opCode].hasResult()) { - idFn(asId(word++)); - --numOperands; - } - - // Extended instructions: currently, assume everything is an ID. - // TODO: add whatever data we need for exceptions to that - if (opCode == spv::OpExtInst) { - word += 2; // instruction set, and instruction from set - numOperands -= 2; - - for (unsigned op=0; op < numOperands; ++op) - idFn(asId(word++)); // ID - - return nextInst; - } - - // Circular buffer so we can look back at previous unmapped values during the mapping pass. - static const unsigned idBufferSize = 4; - spv::Id idBuffer[idBufferSize]; - unsigned idBufferPos = 0; - - // Store IDs from instruction in our map - for (int op = 0; numOperands > 0; ++op, --numOperands) { - // SpecConstantOp is special: it includes the operands of another opcode which is - // given as a literal in the 3rd word. We will switch over to pretending that the - // opcode being processed is the literal opcode value of the SpecConstantOp. See the - // SPIRV spec for details. This way we will handle IDs and literals as appropriate for - // the embedded op. - if (opCode == spv::OpSpecConstantOp) { - if (op == 0) { - opCode = asOpCode(word++); // this is the opcode embedded in the SpecConstantOp. - --numOperands; - } - } - - switch (spv::InstructionDesc[opCode].operands.getClass(op)) { - case spv::OperandId: - case spv::OperandScope: - case spv::OperandMemorySemantics: - idBuffer[idBufferPos] = asId(word); - idBufferPos = (idBufferPos + 1) % idBufferSize; - idFn(asId(word++)); - break; - - case spv::OperandVariableIds: - for (unsigned i = 0; i < numOperands; ++i) - idFn(asId(word++)); - return nextInst; - - case spv::OperandVariableLiterals: - // for clarity - // if (opCode == spv::OpDecorate && asDecoration(word - 1) == spv::DecorationBuiltIn) { - // ++word; - // --numOperands; - // } - // word += numOperands; - return nextInst; - - case spv::OperandVariableLiteralId: { - if (opCode == OpSwitch) { - // word-2 is the position of the selector ID. OpSwitch Literals match its type. - // In case the IDs are currently being remapped, we get the word[-2] ID from - // the circular idBuffer. - const unsigned literalSizePos = (idBufferPos+idBufferSize-2) % idBufferSize; - const unsigned literalSize = idTypeSizeInWords(idBuffer[literalSizePos]); - const unsigned numLiteralIdPairs = (nextInst-word) / (1+literalSize); - - if (errorLatch) - return -1; - - for (unsigned arg=0; arg instPos; - instPos.reserve(unsigned(spv.size()) / 16); // initial estimate; can grow if needed. - - // Build local table of instruction start positions - process( - [&](spv::Op, unsigned start) { instPos.push_back(start); return true; }, - op_fn_nop); - - if (errorLatch) - return; - - // Window size for context-sensitive canonicalization values - // Empirical best size from a single data set. TODO: Would be a good tunable. - // We essentially perform a little convolution around each instruction, - // to capture the flavor of nearby code, to hopefully match to similar - // code in other modules. - static const unsigned windowSize = 2; - - for (unsigned entry = 0; entry < unsigned(instPos.size()); ++entry) { - const unsigned start = instPos[entry]; - const spv::Op opCode = asOpCode(start); - - if (opCode == spv::OpFunction) - fnId = asId(start + 2); - - if (opCode == spv::OpFunctionEnd) - fnId = spv::NoResult; - - if (fnId != spv::NoResult) { // if inside a function - if (spv::InstructionDesc[opCode].hasResult()) { - const unsigned word = start + (spv::InstructionDesc[opCode].hasType() ? 2 : 1); - const spv::Id resId = asId(word); - std::uint32_t hashval = fnId * 17; // small prime - - for (unsigned i = entry-1; i >= entry-windowSize; --i) { - if (asOpCode(instPos[i]) == spv::OpFunction) - break; - hashval = hashval * 30103 + asOpCodeHash(instPos[i]); // 30103 = semiarbitrary prime - } - - for (unsigned i = entry; i <= entry + windowSize; ++i) { - if (asOpCode(instPos[i]) == spv::OpFunctionEnd) - break; - hashval = hashval * 30103 + asOpCodeHash(instPos[i]); // 30103 = semiarbitrary prime - } - - if (isOldIdUnmapped(resId)) { - localId(resId, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); - if (errorLatch) - return; - } - - } - } - } - - spv::Op thisOpCode(spv::OpNop); - std::unordered_map opCounter; - int idCounter(0); - fnId = spv::NoResult; - - process( - [&](spv::Op opCode, unsigned start) { - switch (opCode) { - case spv::OpFunction: - // Reset counters at each function - idCounter = 0; - opCounter.clear(); - fnId = asId(start + 2); - break; - - case spv::OpImageSampleImplicitLod: - case spv::OpImageSampleExplicitLod: - case spv::OpImageSampleDrefImplicitLod: - case spv::OpImageSampleDrefExplicitLod: - case spv::OpImageSampleProjImplicitLod: - case spv::OpImageSampleProjExplicitLod: - case spv::OpImageSampleProjDrefImplicitLod: - case spv::OpImageSampleProjDrefExplicitLod: - case spv::OpDot: - case spv::OpCompositeExtract: - case spv::OpCompositeInsert: - case spv::OpVectorShuffle: - case spv::OpLabel: - case spv::OpVariable: - - case spv::OpAccessChain: - case spv::OpLoad: - case spv::OpStore: - case spv::OpCompositeConstruct: - case spv::OpFunctionCall: - ++opCounter[opCode]; - idCounter = 0; - thisOpCode = opCode; - break; - default: - thisOpCode = spv::OpNop; - } - - return false; - }, - - [&](spv::Id& id) { - if (thisOpCode != spv::OpNop) { - ++idCounter; - const std::uint32_t hashval = opCounter[thisOpCode] * thisOpCode * 50047 + idCounter + fnId * 117; - - if (isOldIdUnmapped(id)) - localId(id, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); - } - }); - } - - // EXPERIMENTAL: forward IO and uniform load/stores into operands - // This produces invalid Schema-0 SPIRV - void spirvbin_t::forwardLoadStores() - { - idset_t fnLocalVars; // set of function local vars - idmap_t idMap; // Map of load result IDs to what they load - - // EXPERIMENTAL: Forward input and access chain loads into consumptions - process( - [&](spv::Op opCode, unsigned start) { - // Add inputs and uniforms to the map - if ((opCode == spv::OpVariable && asWordCount(start) == 4) && - (spv[start+3] == spv::StorageClassUniform || - spv[start+3] == spv::StorageClassUniformConstant || - spv[start+3] == spv::StorageClassInput)) - fnLocalVars.insert(asId(start+2)); - - if (opCode == spv::OpAccessChain && fnLocalVars.count(asId(start+3)) > 0) - fnLocalVars.insert(asId(start+2)); - - if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) { - idMap[asId(start+2)] = asId(start+3); - stripInst(start); - } - - return false; - }, - - [&](spv::Id& id) { if (idMap.find(id) != idMap.end()) id = idMap[id]; } - ); - - if (errorLatch) - return; - - // EXPERIMENTAL: Implicit output stores - fnLocalVars.clear(); - idMap.clear(); - - process( - [&](spv::Op opCode, unsigned start) { - // Add inputs and uniforms to the map - if ((opCode == spv::OpVariable && asWordCount(start) == 4) && - (spv[start+3] == spv::StorageClassOutput)) - fnLocalVars.insert(asId(start+2)); - - if (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) { - idMap[asId(start+2)] = asId(start+1); - stripInst(start); - } - - return false; - }, - op_fn_nop); - - if (errorLatch) - return; - - process( - inst_fn_nop, - [&](spv::Id& id) { if (idMap.find(id) != idMap.end()) id = idMap[id]; } - ); - - if (errorLatch) - return; - - strip(); // strip out data we decided to eliminate - } - - // optimize loads and stores - void spirvbin_t::optLoadStore() - { - idset_t fnLocalVars; // candidates for removal (only locals) - idmap_t idMap; // Map of load result IDs to what they load - blockmap_t blockMap; // Map of IDs to blocks they first appear in - int blockNum = 0; // block count, to avoid crossing flow control - - // Find all the function local pointers stored at most once, and not via access chains - process( - [&](spv::Op opCode, unsigned start) { - const int wordCount = asWordCount(start); - - // Count blocks, so we can avoid crossing flow control - if (isFlowCtrl(opCode)) - ++blockNum; - - // Add local variables to the map - if ((opCode == spv::OpVariable && spv[start+3] == spv::StorageClassFunction && asWordCount(start) == 4)) { - fnLocalVars.insert(asId(start+2)); - return true; - } - - // Ignore process vars referenced via access chain - if ((opCode == spv::OpAccessChain || opCode == spv::OpInBoundsAccessChain) && fnLocalVars.count(asId(start+3)) > 0) { - fnLocalVars.erase(asId(start+3)); - idMap.erase(asId(start+3)); - return true; - } - - if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) { - const spv::Id varId = asId(start+3); - - // Avoid loads before stores - if (idMap.find(varId) == idMap.end()) { - fnLocalVars.erase(varId); - idMap.erase(varId); - } - - // don't do for volatile references - if (wordCount > 4 && (spv[start+4] & spv::MemoryAccessVolatileMask)) { - fnLocalVars.erase(varId); - idMap.erase(varId); - } - - // Handle flow control - if (blockMap.find(varId) == blockMap.end()) { - blockMap[varId] = blockNum; // track block we found it in. - } else if (blockMap[varId] != blockNum) { - fnLocalVars.erase(varId); // Ignore if crosses flow control - idMap.erase(varId); - } - - return true; - } - - if (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) { - const spv::Id varId = asId(start+1); - - if (idMap.find(varId) == idMap.end()) { - idMap[varId] = asId(start+2); - } else { - // Remove if it has more than one store to the same pointer - fnLocalVars.erase(varId); - idMap.erase(varId); - } - - // don't do for volatile references - if (wordCount > 3 && (spv[start+3] & spv::MemoryAccessVolatileMask)) { - fnLocalVars.erase(asId(start+3)); - idMap.erase(asId(start+3)); - } - - // Handle flow control - if (blockMap.find(varId) == blockMap.end()) { - blockMap[varId] = blockNum; // track block we found it in. - } else if (blockMap[varId] != blockNum) { - fnLocalVars.erase(varId); // Ignore if crosses flow control - idMap.erase(varId); - } - - return true; - } - - return false; - }, - - // If local var id used anywhere else, don't eliminate - [&](spv::Id& id) { - if (fnLocalVars.count(id) > 0) { - fnLocalVars.erase(id); - idMap.erase(id); - } - } - ); - - if (errorLatch) - return; - - process( - [&](spv::Op opCode, unsigned start) { - if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) - idMap[asId(start+2)] = idMap[asId(start+3)]; - return false; - }, - op_fn_nop); - - if (errorLatch) - return; - - // Chase replacements to their origins, in case there is a chain such as: - // 2 = store 1 - // 3 = load 2 - // 4 = store 3 - // 5 = load 4 - // We want to replace uses of 5 with 1. - for (const auto& idPair : idMap) { - spv::Id id = idPair.first; - while (idMap.find(id) != idMap.end()) // Chase to end of chain - id = idMap[id]; - - idMap[idPair.first] = id; // replace with final result - } - - // Remove the load/store/variables for the ones we've discovered - process( - [&](spv::Op opCode, unsigned start) { - if ((opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) || - (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) || - (opCode == spv::OpVariable && fnLocalVars.count(asId(start+2)) > 0)) { - - stripInst(start); - return true; - } - - return false; - }, - - [&](spv::Id& id) { - if (idMap.find(id) != idMap.end()) id = idMap[id]; - } - ); - - if (errorLatch) - return; - - strip(); // strip out data we decided to eliminate - } - - // remove bodies of uncalled functions - void spirvbin_t::dceFuncs() - { - msg(3, 2, std::string("Removing Dead Functions: ")); - - // TODO: There are more efficient ways to do this. - bool changed = true; - - while (changed) { - changed = false; - - for (auto fn = fnPos.begin(); fn != fnPos.end(); ) { - if (fn->first == entryPoint) { // don't DCE away the entry point! - ++fn; - continue; - } - - const auto call_it = fnCalls.find(fn->first); - - if (call_it == fnCalls.end() || call_it->second == 0) { - changed = true; - stripRange.push_back(fn->second); - - // decrease counts of called functions - process( - [&](spv::Op opCode, unsigned start) { - if (opCode == spv::Op::OpFunctionCall) { - const auto call_it = fnCalls.find(asId(start + 3)); - if (call_it != fnCalls.end()) { - if (--call_it->second <= 0) - fnCalls.erase(call_it); - } - } - - return true; - }, - op_fn_nop, - fn->second.first, - fn->second.second); - - if (errorLatch) - return; - - fn = fnPos.erase(fn); - } else ++fn; - } - } - } - - // remove unused function variables + decorations - void spirvbin_t::dceVars() - { - msg(3, 2, std::string("DCE Vars: ")); - - std::unordered_map varUseCount; - - // Count function variable use - process( - [&](spv::Op opCode, unsigned start) { - if (opCode == spv::OpVariable) { - ++varUseCount[asId(start+2)]; - return true; - } else if (opCode == spv::OpEntryPoint) { - const int wordCount = asWordCount(start); - for (int i = 4; i < wordCount; i++) { - ++varUseCount[asId(start+i)]; - } - return true; - } else - return false; - }, - - [&](spv::Id& id) { if (varUseCount[id]) ++varUseCount[id]; } - ); - - if (errorLatch) - return; - - // Remove single-use function variables + associated decorations and names - process( - [&](spv::Op opCode, unsigned start) { - spv::Id id = spv::NoResult; - if (opCode == spv::OpVariable) - id = asId(start+2); - if (opCode == spv::OpDecorate || opCode == spv::OpName) - id = asId(start+1); - - if (id != spv::NoResult && varUseCount[id] == 1) - stripInst(start); - - return true; - }, - op_fn_nop); - } - - // remove unused types - void spirvbin_t::dceTypes() - { - std::vector isType(bound(), false); - - // for speed, make O(1) way to get to type query (map is log(n)) - for (const auto typeStart : typeConstPos) - isType[asTypeConstId(typeStart)] = true; - - std::unordered_map typeUseCount; - - // This is not the most efficient algorithm, but this is an offline tool, and - // it's easy to write this way. Can be improved opportunistically if needed. - bool changed = true; - while (changed) { - changed = false; - strip(); - typeUseCount.clear(); - - // Count total type usage - process(inst_fn_nop, - [&](spv::Id& id) { if (isType[id]) ++typeUseCount[id]; } - ); - - if (errorLatch) - return; - - // Remove single reference types - for (const auto typeStart : typeConstPos) { - const spv::Id typeId = asTypeConstId(typeStart); - if (typeUseCount[typeId] == 1) { - changed = true; - --typeUseCount[typeId]; - stripInst(typeStart); - } - } - - if (errorLatch) - return; - } - } - -#ifdef NOTDEF - bool spirvbin_t::matchType(const spirvbin_t::globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const - { - // Find the local type id "lt" and global type id "gt" - const auto lt_it = typeConstPosR.find(lt); - if (lt_it == typeConstPosR.end()) - return false; - - const auto typeStart = lt_it->second; - - // Search for entry in global table - const auto gtype = globalTypes.find(gt); - if (gtype == globalTypes.end()) - return false; - - const auto& gdata = gtype->second; - - // local wordcount and opcode - const int wordCount = asWordCount(typeStart); - const spv::Op opCode = asOpCode(typeStart); - - // no type match if opcodes don't match, or operand count doesn't match - if (opCode != opOpCode(gdata[0]) || wordCount != opWordCount(gdata[0])) - return false; - - const unsigned numOperands = wordCount - 2; // all types have a result - - const auto cmpIdRange = [&](range_t range) { - for (int x=range.first; xsecond; - } - - // Hash types to canonical values. This can return ID collisions (it's a bit - // inevitable): it's up to the caller to handle that gracefully. - std::uint32_t spirvbin_t::hashType(unsigned typeStart) const - { - const unsigned wordCount = asWordCount(typeStart); - const spv::Op opCode = asOpCode(typeStart); - - switch (opCode) { - case spv::OpTypeVoid: return 0; - case spv::OpTypeBool: return 1; - case spv::OpTypeInt: return 3 + (spv[typeStart+3]); - case spv::OpTypeFloat: return 5; - case spv::OpTypeVector: - return 6 + hashType(idPos(spv[typeStart+2])) * (spv[typeStart+3] - 1); - case spv::OpTypeMatrix: - return 30 + hashType(idPos(spv[typeStart+2])) * (spv[typeStart+3] - 1); - case spv::OpTypeImage: - return 120 + hashType(idPos(spv[typeStart+2])) + - spv[typeStart+3] + // dimensionality - spv[typeStart+4] * 8 * 16 + // depth - spv[typeStart+5] * 4 * 16 + // arrayed - spv[typeStart+6] * 2 * 16 + // multisampled - spv[typeStart+7] * 1 * 16; // format - case spv::OpTypeSampler: - return 500; - case spv::OpTypeSampledImage: - return 502; - case spv::OpTypeArray: - return 501 + hashType(idPos(spv[typeStart+2])) * spv[typeStart+3]; - case spv::OpTypeRuntimeArray: - return 5000 + hashType(idPos(spv[typeStart+2])); - case spv::OpTypeStruct: - { - std::uint32_t hash = 10000; - for (unsigned w=2; w < wordCount; ++w) - hash += w * hashType(idPos(spv[typeStart+w])); - return hash; - } - - case spv::OpTypeOpaque: return 6000 + spv[typeStart+2]; - case spv::OpTypePointer: return 100000 + hashType(idPos(spv[typeStart+3])); - case spv::OpTypeFunction: - { - std::uint32_t hash = 200000; - for (unsigned w=2; w < wordCount; ++w) - hash += w * hashType(idPos(spv[typeStart+w])); - return hash; - } - - case spv::OpTypeEvent: return 300000; - case spv::OpTypeDeviceEvent: return 300001; - case spv::OpTypeReserveId: return 300002; - case spv::OpTypeQueue: return 300003; - case spv::OpTypePipe: return 300004; - - case spv::OpConstantNull: return 300005; - case spv::OpConstantSampler: return 300006; - - case spv::OpConstantTrue: return 300007; - case spv::OpConstantFalse: return 300008; - case spv::OpConstantComposite: - { - std::uint32_t hash = 300011 + hashType(idPos(spv[typeStart+1])); - for (unsigned w=3; w < wordCount; ++w) - hash += w * hashType(idPos(spv[typeStart+w])); - return hash; - } - case spv::OpConstant: - { - std::uint32_t hash = 400011 + hashType(idPos(spv[typeStart+1])); - for (unsigned w=3; w < wordCount; ++w) - hash += w * spv[typeStart+w]; - return hash; - } - - default: - error("unknown type opcode"); - return 0; - } - } - - void spirvbin_t::mapTypeConst() - { - globaltypes_t globalTypeMap; - - msg(3, 2, std::string("Remapping Consts & Types: ")); - - static const std::uint32_t softTypeIdLimit = 3011; // small prime. TODO: get from options - static const std::uint32_t firstMappedID = 8; // offset into ID space - - for (auto& typeStart : typeConstPos) { - const spv::Id resId = asTypeConstId(typeStart); - const std::uint32_t hashval = hashType(typeStart); - - if (errorLatch) - return; - - if (isOldIdUnmapped(resId)) { - localId(resId, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); - if (errorLatch) - return; - } - } - } - - // Strip a single binary by removing ranges given in stripRange - void spirvbin_t::strip() - { - if (stripRange.empty()) // nothing to do - return; - - // Sort strip ranges in order of traversal - std::sort(stripRange.begin(), stripRange.end()); - - // Allocate a new binary big enough to hold old binary - // We'll step this iterator through the strip ranges as we go through the binary - auto strip_it = stripRange.begin(); - - int strippedPos = 0; - for (unsigned word = 0; word < unsigned(spv.size()); ++word) { - while (strip_it != stripRange.end() && word >= strip_it->second) - ++strip_it; - - if (strip_it == stripRange.end() || word < strip_it->first || word >= strip_it->second) - spv[strippedPos++] = spv[word]; - } - - spv.resize(strippedPos); - stripRange.clear(); - - buildLocalMaps(); - } - - // Strip a single binary by removing ranges given in stripRange - void spirvbin_t::remap(std::uint32_t opts) - { - options = opts; - - // Set up opcode tables from SpvDoc - spv::Parameterize(); - - validate(); // validate header - buildLocalMaps(); // build ID maps - - msg(3, 4, std::string("ID bound: ") + std::to_string(bound())); - - if (options & STRIP) stripDebug(); - if (errorLatch) return; - - strip(); // strip out data we decided to eliminate - if (errorLatch) return; - - if (options & OPT_LOADSTORE) optLoadStore(); - if (errorLatch) return; - - if (options & OPT_FWD_LS) forwardLoadStores(); - if (errorLatch) return; - - if (options & DCE_FUNCS) dceFuncs(); - if (errorLatch) return; - - if (options & DCE_VARS) dceVars(); - if (errorLatch) return; - - if (options & DCE_TYPES) dceTypes(); - if (errorLatch) return; - - strip(); // strip out data we decided to eliminate - if (errorLatch) return; - - stripDeadRefs(); // remove references to things we DCEed - if (errorLatch) return; - - // after the last strip, we must clean any debug info referring to now-deleted data - - if (options & MAP_TYPES) mapTypeConst(); - if (errorLatch) return; - - if (options & MAP_NAMES) mapNames(); - if (errorLatch) return; - - if (options & MAP_FUNCS) mapFnBodies(); - if (errorLatch) return; - - if (options & MAP_ALL) { - mapRemainder(); // map any unmapped IDs - if (errorLatch) return; - - applyMap(); // Now remap each shader to the new IDs we've come up with - if (errorLatch) return; - } - } - - // remap from a memory image - void spirvbin_t::remap(std::vector& in_spv, std::uint32_t opts) - { - spv.swap(in_spv); - remap(opts); - spv.swap(in_spv); - } - -} // namespace SPV - -#endif // defined (use_cpp11) - diff --git a/third_party/glslang-spirv/SPVRemapper.h b/third_party/glslang-spirv/SPVRemapper.h deleted file mode 100644 index 97e3f31fa..000000000 --- a/third_party/glslang-spirv/SPVRemapper.h +++ /dev/null @@ -1,304 +0,0 @@ -// -// Copyright (C) 2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef SPIRVREMAPPER_H -#define SPIRVREMAPPER_H - -#include -#include -#include -#include - -namespace spv { - -// MSVC defines __cplusplus as an older value, even when it supports almost all of 11. -// We handle that here by making our own symbol. -#if __cplusplus >= 201103L || _MSC_VER >= 1700 -# define use_cpp11 1 -#endif - -class spirvbin_base_t -{ -public: - enum Options { - NONE = 0, - STRIP = (1<<0), - MAP_TYPES = (1<<1), - MAP_NAMES = (1<<2), - MAP_FUNCS = (1<<3), - DCE_FUNCS = (1<<4), - DCE_VARS = (1<<5), - DCE_TYPES = (1<<6), - OPT_LOADSTORE = (1<<7), - OPT_FWD_LS = (1<<8), // EXPERIMENTAL: PRODUCES INVALID SCHEMA-0 SPIRV - MAP_ALL = (MAP_TYPES | MAP_NAMES | MAP_FUNCS), - DCE_ALL = (DCE_FUNCS | DCE_VARS | DCE_TYPES), - OPT_ALL = (OPT_LOADSTORE), - - ALL_BUT_STRIP = (MAP_ALL | DCE_ALL | OPT_ALL), - DO_EVERYTHING = (STRIP | ALL_BUT_STRIP) - }; -}; - -} // namespace SPV - -#if !defined (use_cpp11) -#include -#include - -namespace spv { -class spirvbin_t : public spirvbin_base_t -{ -public: - spirvbin_t(int /*verbose = 0*/) { } - - void remap(std::vector& /*spv*/, unsigned int /*opts = 0*/) - { - printf("Tool not compiled for C++11, which is required for SPIR-V remapping.\n"); - exit(5); - } -}; - -} // namespace SPV - -#else // defined (use_cpp11) - -#include -#include -#include -#include -#include -#include -#include - -#include "spirv.hpp" -#include "spvIR.h" - -namespace spv { - -// class to hold SPIR-V binary data for remapping, DCE, and debug stripping -class spirvbin_t : public spirvbin_base_t -{ -public: - spirvbin_t(int verbose = 0) : entryPoint(spv::NoResult), largestNewId(0), verbose(verbose), errorLatch(false) - { } - - virtual ~spirvbin_t() { } - - // remap on an existing binary in memory - void remap(std::vector& spv, std::uint32_t opts = DO_EVERYTHING); - - // Type for error/log handler functions - typedef std::function errorfn_t; - typedef std::function logfn_t; - - // Register error/log handling functions (can be lambda fn / functor / etc) - static void registerErrorHandler(errorfn_t handler) { errorHandler = handler; } - static void registerLogHandler(logfn_t handler) { logHandler = handler; } - -protected: - // This can be overridden to provide other message behavior if needed - virtual void msg(int minVerbosity, int indent, const std::string& txt) const; - -private: - // Local to global, or global to local ID map - typedef std::unordered_map idmap_t; - typedef std::unordered_set idset_t; - typedef std::unordered_map blockmap_t; - - void remap(std::uint32_t opts = DO_EVERYTHING); - - // Map of names to IDs - typedef std::unordered_map namemap_t; - - typedef std::uint32_t spirword_t; - - typedef std::pair range_t; - typedef std::function idfn_t; - typedef std::function instfn_t; - - // Special Values for ID map: - static const spv::Id unmapped; // unchanged from default value - static const spv::Id unused; // unused ID - static const int header_size; // SPIR header = 5 words - - class id_iterator_t; - - // For mapping type entries between different shaders - typedef std::vector typeentry_t; - typedef std::map globaltypes_t; - - // A set that preserves position order, and a reverse map - typedef std::set posmap_t; - typedef std::unordered_map posmap_rev_t; - - // Maps and ID to the size of its base type, if known. - typedef std::unordered_map typesize_map_t; - - // handle error - void error(const std::string& txt) const { errorLatch = true; errorHandler(txt); } - - bool isConstOp(spv::Op opCode) const; - bool isTypeOp(spv::Op opCode) const; - bool isStripOp(spv::Op opCode) const; - bool isFlowCtrl(spv::Op opCode) const; - range_t literalRange(spv::Op opCode) const; - range_t typeRange(spv::Op opCode) const; - range_t constRange(spv::Op opCode) const; - unsigned typeSizeInWords(spv::Id id) const; - unsigned idTypeSizeInWords(spv::Id id) const; - - spv::Id& asId(unsigned word) { return spv[word]; } - const spv::Id& asId(unsigned word) const { return spv[word]; } - spv::Op asOpCode(unsigned word) const { return opOpCode(spv[word]); } - std::uint32_t asOpCodeHash(unsigned word); - spv::Decoration asDecoration(unsigned word) const { return spv::Decoration(spv[word]); } - unsigned asWordCount(unsigned word) const { return opWordCount(spv[word]); } - spv::Id asTypeConstId(unsigned word) const { return asId(word + (isTypeOp(asOpCode(word)) ? 1 : 2)); } - unsigned idPos(spv::Id id) const; - - static unsigned opWordCount(spirword_t data) { return data >> spv::WordCountShift; } - static spv::Op opOpCode(spirword_t data) { return spv::Op(data & spv::OpCodeMask); } - - // Header access & set methods - spirword_t magic() const { return spv[0]; } // return magic number - spirword_t bound() const { return spv[3]; } // return Id bound from header - spirword_t bound(spirword_t b) { return spv[3] = b; }; - spirword_t genmagic() const { return spv[2]; } // generator magic - spirword_t genmagic(spirword_t m) { return spv[2] = m; } - spirword_t schemaNum() const { return spv[4]; } // schema number from header - - // Mapping fns: get - spv::Id localId(spv::Id id) const { return idMapL[id]; } - - // Mapping fns: set - inline spv::Id localId(spv::Id id, spv::Id newId); - void countIds(spv::Id id); - - // Return next unused new local ID. - // NOTE: boost::dynamic_bitset would be more efficient due to find_next(), - // which std::vector doens't have. - inline spv::Id nextUnusedId(spv::Id id); - - void buildLocalMaps(); - std::string literalString(unsigned word) const; // Return literal as a std::string - int literalStringWords(const std::string& str) const { return (int(str.size())+4)/4; } - - bool isNewIdMapped(spv::Id newId) const { return isMapped(newId); } - bool isOldIdUnmapped(spv::Id oldId) const { return localId(oldId) == unmapped; } - bool isOldIdUnused(spv::Id oldId) const { return localId(oldId) == unused; } - bool isOldIdMapped(spv::Id oldId) const { return !isOldIdUnused(oldId) && !isOldIdUnmapped(oldId); } - bool isFunction(spv::Id oldId) const { return fnPos.find(oldId) != fnPos.end(); } - - // bool matchType(const globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const; - // spv::Id findType(const globaltypes_t& globalTypes, spv::Id lt) const; - std::uint32_t hashType(unsigned typeStart) const; - - spirvbin_t& process(instfn_t, idfn_t, unsigned begin = 0, unsigned end = 0); - int processInstruction(unsigned word, instfn_t, idfn_t); - - void validate() const; - void mapTypeConst(); - void mapFnBodies(); - void optLoadStore(); - void dceFuncs(); - void dceVars(); - void dceTypes(); - void mapNames(); - void foldIds(); // fold IDs to smallest space - void forwardLoadStores(); // load store forwarding (EXPERIMENTAL) - void offsetIds(); // create relative offset IDs - - void applyMap(); // remap per local name map - void mapRemainder(); // map any IDs we haven't touched yet - void stripDebug(); // strip all debug info - void stripDeadRefs(); // strips debug info for now-dead references after DCE - void strip(); // remove debug symbols - - std::vector spv; // SPIR words - - namemap_t nameMap; // ID names from OpName - - // Since we want to also do binary ops, we can't use std::vector. we could use - // boost::dynamic_bitset, but we're trying to avoid a boost dependency. - typedef std::uint64_t bits_t; - std::vector mapped; // which new IDs have been mapped - static const int mBits = sizeof(bits_t) * 4; - - bool isMapped(spv::Id id) const { return id < maxMappedId() && ((mapped[id/mBits] & (1LL<<(id%mBits))) != 0); } - void setMapped(spv::Id id) { resizeMapped(id); mapped[id/mBits] |= (1LL<<(id%mBits)); } - void resizeMapped(spv::Id id) { if (id >= maxMappedId()) mapped.resize(id/mBits+1, 0); } - size_t maxMappedId() const { return mapped.size() * mBits; } - - // Add a strip range for a given instruction starting at 'start' - // Note: avoiding brace initializers to please older versions os MSVC. - void stripInst(unsigned start) { stripRange.push_back(range_t(start, start + asWordCount(start))); } - - // Function start and end. use unordered_map because we'll have - // many fewer functions than IDs. - std::unordered_map fnPos; - - // Which functions are called, anywhere in the module, with a call count - std::unordered_map fnCalls; - - posmap_t typeConstPos; // word positions that define types & consts (ordered) - posmap_rev_t idPosR; // reverse map from IDs to positions - typesize_map_t idTypeSizeMap; // maps each ID to its type size, if known. - - std::vector idMapL; // ID {M}ap from {L}ocal to {G}lobal IDs - - spv::Id entryPoint; // module entry point - spv::Id largestNewId; // biggest new ID we have mapped anything to - - // Sections of the binary to strip, given as [begin,end) - std::vector stripRange; - - // processing options: - std::uint32_t options; - int verbose; // verbosity level - - // Error latch: this is set if the error handler is ever executed. It would be better to - // use a try/catch block and throw, but that's not desired for certain environments, so - // this is the alternative. - mutable bool errorLatch; - - static errorfn_t errorHandler; - static logfn_t logHandler; -}; - -} // namespace SPV - -#endif // defined (use_cpp11) -#endif // SPIRVREMAPPER_H diff --git a/third_party/glslang-spirv/SpvBuilder.cpp b/third_party/glslang-spirv/SpvBuilder.cpp deleted file mode 100644 index 0afcc6433..000000000 --- a/third_party/glslang-spirv/SpvBuilder.cpp +++ /dev/null @@ -1,2676 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// Copyright (C) 2015-2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Helper for making SPIR-V IR. Generally, this is documented in the header -// SpvBuilder.h. -// - -#include -#include - -#include -#include - -#include "SpvBuilder.h" - -#ifdef AMD_EXTENSIONS - #include "hex_float.h" -#endif - -#ifndef _WIN32 - #include -#endif - -namespace spv { - -Builder::Builder(unsigned int spvVersion, unsigned int magicNumber, SpvBuildLogger* buildLogger) : - spvVersion(spvVersion), - source(SourceLanguageUnknown), - sourceVersion(0), - sourceFileStringId(NoResult), - currentLine(0), - emitOpLines(false), - addressModel(AddressingModelLogical), - memoryModel(MemoryModelGLSL450), - builderNumber(magicNumber), - buildPoint(0), - uniqueId(0), - entryPointFunction(0), - generatingOpCodeForSpecConst(false), - logger(buildLogger) -{ - clearAccessChain(); -} - -Builder::~Builder() -{ -} - -Id Builder::import(const char* name) -{ - Instruction* import = new Instruction(getUniqueId(), NoType, OpExtInstImport); - import->addStringOperand(name); - - imports.push_back(std::unique_ptr(import)); - return import->getResultId(); -} - -// Emit an OpLine if we've been asked to emit OpLines and the line number -// has changed since the last time, and is a valid line number. -void Builder::setLine(int lineNum) -{ - if (lineNum != 0 && lineNum != currentLine) { - currentLine = lineNum; - if (emitOpLines) - addLine(sourceFileStringId, currentLine, 0); - } -} - -void Builder::addLine(Id fileName, int lineNum, int column) -{ - Instruction* line = new Instruction(OpLine); - line->addIdOperand(fileName); - line->addImmediateOperand(lineNum); - line->addImmediateOperand(column); - buildPoint->addInstruction(std::unique_ptr(line)); -} - -// For creating new groupedTypes (will return old type if the requested one was already made). -Id Builder::makeVoidType() -{ - Instruction* type; - if (groupedTypes[OpTypeVoid].size() == 0) { - type = new Instruction(getUniqueId(), NoType, OpTypeVoid); - groupedTypes[OpTypeVoid].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - } else - type = groupedTypes[OpTypeVoid].back(); - - return type->getResultId(); -} - -Id Builder::makeBoolType() -{ - Instruction* type; - if (groupedTypes[OpTypeBool].size() == 0) { - type = new Instruction(getUniqueId(), NoType, OpTypeBool); - groupedTypes[OpTypeBool].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - } else - type = groupedTypes[OpTypeBool].back(); - - return type->getResultId(); -} - -Id Builder::makeSamplerType() -{ - Instruction* type; - if (groupedTypes[OpTypeSampler].size() == 0) { - type = new Instruction(getUniqueId(), NoType, OpTypeSampler); - groupedTypes[OpTypeSampler].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - } else - type = groupedTypes[OpTypeSampler].back(); - - return type->getResultId(); -} - -Id Builder::makePointer(StorageClass storageClass, Id pointee) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) { - type = groupedTypes[OpTypePointer][t]; - if (type->getImmediateOperand(0) == (unsigned)storageClass && - type->getIdOperand(1) == pointee) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypePointer); - type->addImmediateOperand(storageClass); - type->addIdOperand(pointee); - groupedTypes[OpTypePointer].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeIntegerType(int width, bool hasSign) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeInt].size(); ++t) { - type = groupedTypes[OpTypeInt][t]; - if (type->getImmediateOperand(0) == (unsigned)width && - type->getImmediateOperand(1) == (hasSign ? 1u : 0u)) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeInt); - type->addImmediateOperand(width); - type->addImmediateOperand(hasSign ? 1 : 0); - groupedTypes[OpTypeInt].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - // deal with capabilities - switch (width) { - case 16: - addCapability(CapabilityInt16); - break; - case 64: - addCapability(CapabilityInt64); - break; - default: - break; - } - - return type->getResultId(); -} - -Id Builder::makeFloatType(int width) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeFloat].size(); ++t) { - type = groupedTypes[OpTypeFloat][t]; - if (type->getImmediateOperand(0) == (unsigned)width) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeFloat); - type->addImmediateOperand(width); - groupedTypes[OpTypeFloat].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - // deal with capabilities - switch (width) { - case 16: - addCapability(CapabilityFloat16); - break; - case 64: - addCapability(CapabilityFloat64); - break; - default: - break; - } - - return type->getResultId(); -} - -// Make a struct without checking for duplication. -// See makeStructResultType() for non-decorated structs -// needed as the result of some instructions, which does -// check for duplicates. -Id Builder::makeStructType(const std::vector& members, const char* name) -{ - // Don't look for previous one, because in the general case, - // structs can be duplicated except for decorations. - - // not found, make it - Instruction* type = new Instruction(getUniqueId(), NoType, OpTypeStruct); - for (int op = 0; op < (int)members.size(); ++op) - type->addIdOperand(members[op]); - groupedTypes[OpTypeStruct].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - addName(type->getResultId(), name); - - return type->getResultId(); -} - -// Make a struct for the simple results of several instructions, -// checking for duplication. -Id Builder::makeStructResultType(Id type0, Id type1) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeStruct].size(); ++t) { - type = groupedTypes[OpTypeStruct][t]; - if (type->getNumOperands() != 2) - continue; - if (type->getIdOperand(0) != type0 || - type->getIdOperand(1) != type1) - continue; - return type->getResultId(); - } - - // not found, make it - std::vector members; - members.push_back(type0); - members.push_back(type1); - - return makeStructType(members, "ResType"); -} - -Id Builder::makeVectorType(Id component, int size) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeVector].size(); ++t) { - type = groupedTypes[OpTypeVector][t]; - if (type->getIdOperand(0) == component && - type->getImmediateOperand(1) == (unsigned)size) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeVector); - type->addIdOperand(component); - type->addImmediateOperand(size); - groupedTypes[OpTypeVector].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeMatrixType(Id component, int cols, int rows) -{ - assert(cols <= maxMatrixSize && rows <= maxMatrixSize); - - Id column = makeVectorType(component, rows); - - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeMatrix].size(); ++t) { - type = groupedTypes[OpTypeMatrix][t]; - if (type->getIdOperand(0) == column && - type->getImmediateOperand(1) == (unsigned)cols) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeMatrix); - type->addIdOperand(column); - type->addImmediateOperand(cols); - groupedTypes[OpTypeMatrix].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -// TODO: performance: track arrays per stride -// If a stride is supplied (non-zero) make an array. -// If no stride (0), reuse previous array types. -// 'size' is an Id of a constant or specialization constant of the array size -Id Builder::makeArrayType(Id element, Id sizeId, int stride) -{ - Instruction* type; - if (stride == 0) { - // try to find existing type - for (int t = 0; t < (int)groupedTypes[OpTypeArray].size(); ++t) { - type = groupedTypes[OpTypeArray][t]; - if (type->getIdOperand(0) == element && - type->getIdOperand(1) == sizeId) - return type->getResultId(); - } - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeArray); - type->addIdOperand(element); - type->addIdOperand(sizeId); - groupedTypes[OpTypeArray].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeRuntimeArray(Id element) -{ - Instruction* type = new Instruction(getUniqueId(), NoType, OpTypeRuntimeArray); - type->addIdOperand(element); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeFunctionType(Id returnType, const std::vector& paramTypes) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeFunction].size(); ++t) { - type = groupedTypes[OpTypeFunction][t]; - if (type->getIdOperand(0) != returnType || (int)paramTypes.size() != type->getNumOperands() - 1) - continue; - bool mismatch = false; - for (int p = 0; p < (int)paramTypes.size(); ++p) { - if (paramTypes[p] != type->getIdOperand(p + 1)) { - mismatch = true; - break; - } - } - if (! mismatch) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeFunction); - type->addIdOperand(returnType); - for (int p = 0; p < (int)paramTypes.size(); ++p) - type->addIdOperand(paramTypes[p]); - groupedTypes[OpTypeFunction].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeImageType(Id sampledType, Dim dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format) -{ - assert(sampled == 1 || sampled == 2); - - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeImage].size(); ++t) { - type = groupedTypes[OpTypeImage][t]; - if (type->getIdOperand(0) == sampledType && - type->getImmediateOperand(1) == (unsigned int)dim && - type->getImmediateOperand(2) == ( depth ? 1u : 0u) && - type->getImmediateOperand(3) == (arrayed ? 1u : 0u) && - type->getImmediateOperand(4) == ( ms ? 1u : 0u) && - type->getImmediateOperand(5) == sampled && - type->getImmediateOperand(6) == (unsigned int)format) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeImage); - type->addIdOperand(sampledType); - type->addImmediateOperand( dim); - type->addImmediateOperand( depth ? 1 : 0); - type->addImmediateOperand(arrayed ? 1 : 0); - type->addImmediateOperand( ms ? 1 : 0); - type->addImmediateOperand(sampled); - type->addImmediateOperand((unsigned int)format); - - groupedTypes[OpTypeImage].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - // deal with capabilities - switch (dim) { - case DimBuffer: - if (sampled == 1) - addCapability(CapabilitySampledBuffer); - else - addCapability(CapabilityImageBuffer); - break; - case Dim1D: - if (sampled == 1) - addCapability(CapabilitySampled1D); - else - addCapability(CapabilityImage1D); - break; - case DimCube: - if (arrayed) { - if (sampled == 1) - addCapability(CapabilitySampledCubeArray); - else - addCapability(CapabilityImageCubeArray); - } - break; - case DimRect: - if (sampled == 1) - addCapability(CapabilitySampledRect); - else - addCapability(CapabilityImageRect); - break; - case DimSubpassData: - addCapability(CapabilityInputAttachment); - break; - default: - break; - } - - if (ms) { - if (sampled == 2) { - // Images used with subpass data are not storage - // images, so don't require the capability for them. - if (dim != Dim::DimSubpassData) - addCapability(CapabilityStorageImageMultisample); - if (arrayed) - addCapability(CapabilityImageMSArray); - } - } - - return type->getResultId(); -} - -Id Builder::makeSampledImageType(Id imageType) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeSampledImage].size(); ++t) { - type = groupedTypes[OpTypeSampledImage][t]; - if (type->getIdOperand(0) == imageType) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeSampledImage); - type->addIdOperand(imageType); - - groupedTypes[OpTypeSampledImage].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::getDerefTypeId(Id resultId) const -{ - Id typeId = getTypeId(resultId); - assert(isPointerType(typeId)); - - return module.getInstruction(typeId)->getImmediateOperand(1); -} - -Op Builder::getMostBasicTypeClass(Id typeId) const -{ - Instruction* instr = module.getInstruction(typeId); - - Op typeClass = instr->getOpCode(); - switch (typeClass) - { - case OpTypeVoid: - case OpTypeBool: - case OpTypeInt: - case OpTypeFloat: - case OpTypeStruct: - return typeClass; - case OpTypeVector: - case OpTypeMatrix: - case OpTypeArray: - case OpTypeRuntimeArray: - return getMostBasicTypeClass(instr->getIdOperand(0)); - case OpTypePointer: - return getMostBasicTypeClass(instr->getIdOperand(1)); - default: - assert(0); - return OpTypeFloat; - } -} - -int Builder::getNumTypeConstituents(Id typeId) const -{ - Instruction* instr = module.getInstruction(typeId); - - switch (instr->getOpCode()) - { - case OpTypeBool: - case OpTypeInt: - case OpTypeFloat: - return 1; - case OpTypeVector: - case OpTypeMatrix: - return instr->getImmediateOperand(1); - case OpTypeArray: - { - Id lengthId = instr->getImmediateOperand(1); - return module.getInstruction(lengthId)->getImmediateOperand(0); - } - case OpTypeStruct: - return instr->getNumOperands(); - default: - assert(0); - return 1; - } -} - -// Return the lowest-level type of scalar that an homogeneous composite is made out of. -// Typically, this is just to find out if something is made out of ints or floats. -// However, it includes returning a structure, if say, it is an array of structure. -Id Builder::getScalarTypeId(Id typeId) const -{ - Instruction* instr = module.getInstruction(typeId); - - Op typeClass = instr->getOpCode(); - switch (typeClass) - { - case OpTypeVoid: - case OpTypeBool: - case OpTypeInt: - case OpTypeFloat: - case OpTypeStruct: - return instr->getResultId(); - case OpTypeVector: - case OpTypeMatrix: - case OpTypeArray: - case OpTypeRuntimeArray: - case OpTypePointer: - return getScalarTypeId(getContainedTypeId(typeId)); - default: - assert(0); - return NoResult; - } -} - -// Return the type of 'member' of a composite. -Id Builder::getContainedTypeId(Id typeId, int member) const -{ - Instruction* instr = module.getInstruction(typeId); - - Op typeClass = instr->getOpCode(); - switch (typeClass) - { - case OpTypeVector: - case OpTypeMatrix: - case OpTypeArray: - case OpTypeRuntimeArray: - return instr->getIdOperand(0); - case OpTypePointer: - return instr->getIdOperand(1); - case OpTypeStruct: - return instr->getIdOperand(member); - default: - assert(0); - return NoResult; - } -} - -// Return the immediately contained type of a given composite type. -Id Builder::getContainedTypeId(Id typeId) const -{ - return getContainedTypeId(typeId, 0); -} - -// See if a scalar constant of this type has already been created, so it -// can be reused rather than duplicated. (Required by the specification). -Id Builder::findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value) const -{ - Instruction* constant; - for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) { - constant = groupedConstants[typeClass][i]; - if (constant->getOpCode() == opcode && - constant->getTypeId() == typeId && - constant->getImmediateOperand(0) == value) - return constant->getResultId(); - } - - return 0; -} - -// Version of findScalarConstant (see above) for scalars that take two operands (e.g. a 'double' or 'int64'). -Id Builder::findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2) const -{ - Instruction* constant; - for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) { - constant = groupedConstants[typeClass][i]; - if (constant->getOpCode() == opcode && - constant->getTypeId() == typeId && - constant->getImmediateOperand(0) == v1 && - constant->getImmediateOperand(1) == v2) - return constant->getResultId(); - } - - return 0; -} - -// Return true if consuming 'opcode' means consuming a constant. -// "constant" here means after final transform to executable code, -// the value consumed will be a constant, so includes specialization. -bool Builder::isConstantOpCode(Op opcode) const -{ - switch (opcode) { - case OpUndef: - case OpConstantTrue: - case OpConstantFalse: - case OpConstant: - case OpConstantComposite: - case OpConstantSampler: - case OpConstantNull: - case OpSpecConstantTrue: - case OpSpecConstantFalse: - case OpSpecConstant: - case OpSpecConstantComposite: - case OpSpecConstantOp: - return true; - default: - return false; - } -} - -// Return true if consuming 'opcode' means consuming a specialization constant. -bool Builder::isSpecConstantOpCode(Op opcode) const -{ - switch (opcode) { - case OpSpecConstantTrue: - case OpSpecConstantFalse: - case OpSpecConstant: - case OpSpecConstantComposite: - case OpSpecConstantOp: - return true; - default: - return false; - } -} - -Id Builder::makeBoolConstant(bool b, bool specConstant) -{ - Id typeId = makeBoolType(); - Instruction* constant; - Op opcode = specConstant ? (b ? OpSpecConstantTrue : OpSpecConstantFalse) : (b ? OpConstantTrue : OpConstantFalse); - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = 0; - for (int i = 0; i < (int)groupedConstants[OpTypeBool].size(); ++i) { - constant = groupedConstants[OpTypeBool][i]; - if (constant->getTypeId() == typeId && constant->getOpCode() == opcode) - existing = constant->getResultId(); - } - - if (existing) - return existing; - } - - // Make it - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeBool].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Id Builder::makeIntConstant(Id typeId, unsigned value, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = findScalarConstant(OpTypeInt, opcode, typeId, value); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(value); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeInt].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Id Builder::makeInt64Constant(Id typeId, unsigned long long value, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - - unsigned op1 = value & 0xFFFFFFFF; - unsigned op2 = value >> 32; - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = findScalarConstant(OpTypeInt, opcode, typeId, op1, op2); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(op1); - c->addImmediateOperand(op2); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeInt].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Id Builder::makeFloatConstant(float f, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - Id typeId = makeFloatType(32); - union { float fl; unsigned int ui; } u; - u.fl = f; - unsigned value = u.ui; - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, value); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(value); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeFloat].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Id Builder::makeDoubleConstant(double d, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - Id typeId = makeFloatType(64); - union { double db; unsigned long long ull; } u; - u.db = d; - unsigned long long value = u.ull; - unsigned op1 = value & 0xFFFFFFFF; - unsigned op2 = value >> 32; - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, op1, op2); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(op1); - c->addImmediateOperand(op2); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeFloat].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -#ifdef AMD_EXTENSIONS -Id Builder::makeFloat16Constant(float f16, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - Id typeId = makeFloatType(16); - - spvutils::HexFloat> fVal(f16); - spvutils::HexFloat> f16Val(0); - fVal.castTo(f16Val, spvutils::kRoundToZero); - - unsigned value = f16Val.value().getAsFloat().get_value(); - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (!specConstant) { - Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, value); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(value); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeFloat].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} -#endif - -Id Builder::findCompositeConstant(Op typeClass, const std::vector& comps) const -{ - Instruction* constant = 0; - bool found = false; - for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) { - constant = groupedConstants[typeClass][i]; - - // same shape? - if (constant->getNumOperands() != (int)comps.size()) - continue; - - // same contents? - bool mismatch = false; - for (int op = 0; op < constant->getNumOperands(); ++op) { - if (constant->getIdOperand(op) != comps[op]) { - mismatch = true; - break; - } - } - if (! mismatch) { - found = true; - break; - } - } - - return found ? constant->getResultId() : NoResult; -} - -// Comments in header -Id Builder::makeCompositeConstant(Id typeId, const std::vector& members, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstantComposite : OpConstantComposite; - assert(typeId); - Op typeClass = getTypeClass(typeId); - - switch (typeClass) { - case OpTypeVector: - case OpTypeArray: - case OpTypeStruct: - case OpTypeMatrix: - break; - default: - assert(0); - return makeFloatConstant(0.0); - } - - if (! specConstant) { - Id existing = findCompositeConstant(typeClass, members); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - for (int op = 0; op < (int)members.size(); ++op) - c->addIdOperand(members[op]); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[typeClass].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Instruction* Builder::addEntryPoint(ExecutionModel model, Function* function, const char* name) -{ - Instruction* entryPoint = new Instruction(OpEntryPoint); - entryPoint->addImmediateOperand(model); - entryPoint->addIdOperand(function->getId()); - entryPoint->addStringOperand(name); - - entryPoints.push_back(std::unique_ptr(entryPoint)); - - return entryPoint; -} - -// Currently relying on the fact that all 'value' of interest are small non-negative values. -void Builder::addExecutionMode(Function* entryPoint, ExecutionMode mode, int value1, int value2, int value3) -{ - Instruction* instr = new Instruction(OpExecutionMode); - instr->addIdOperand(entryPoint->getId()); - instr->addImmediateOperand(mode); - if (value1 >= 0) - instr->addImmediateOperand(value1); - if (value2 >= 0) - instr->addImmediateOperand(value2); - if (value3 >= 0) - instr->addImmediateOperand(value3); - - executionModes.push_back(std::unique_ptr(instr)); -} - -void Builder::addName(Id id, const char* string) -{ - Instruction* name = new Instruction(OpName); - name->addIdOperand(id); - name->addStringOperand(string); - - names.push_back(std::unique_ptr(name)); -} - -void Builder::addMemberName(Id id, int memberNumber, const char* string) -{ - Instruction* name = new Instruction(OpMemberName); - name->addIdOperand(id); - name->addImmediateOperand(memberNumber); - name->addStringOperand(string); - - names.push_back(std::unique_ptr(name)); -} - -void Builder::addDecoration(Id id, Decoration decoration, int num) -{ - if (decoration == spv::DecorationMax) - return; - Instruction* dec = new Instruction(OpDecorate); - dec->addIdOperand(id); - dec->addImmediateOperand(decoration); - if (num >= 0) - dec->addImmediateOperand(num); - - decorations.push_back(std::unique_ptr(dec)); -} - -void Builder::addMemberDecoration(Id id, unsigned int member, Decoration decoration, int num) -{ - Instruction* dec = new Instruction(OpMemberDecorate); - dec->addIdOperand(id); - dec->addImmediateOperand(member); - dec->addImmediateOperand(decoration); - if (num >= 0) - dec->addImmediateOperand(num); - - decorations.push_back(std::unique_ptr(dec)); -} - -// Comments in header -Function* Builder::makeEntryPoint(const char* entryPoint) -{ - assert(! entryPointFunction); - - Block* entry; - std::vector params; - std::vector> decorations; - - entryPointFunction = makeFunctionEntry(NoPrecision, makeVoidType(), entryPoint, params, decorations, &entry); - - return entryPointFunction; -} - -// Comments in header -Function* Builder::makeFunctionEntry(Decoration precision, Id returnType, const char* name, - const std::vector& paramTypes, const std::vector>& decorations, Block **entry) -{ - // Make the function and initial instructions in it - Id typeId = makeFunctionType(returnType, paramTypes); - Id firstParamId = paramTypes.size() == 0 ? 0 : getUniqueIds((int)paramTypes.size()); - Function* function = new Function(getUniqueId(), returnType, typeId, firstParamId, module); - - // Set up the precisions - setPrecision(function->getId(), precision); - for (unsigned p = 0; p < (unsigned)decorations.size(); ++p) { - for (int d = 0; d < (int)decorations[p].size(); ++d) - addDecoration(firstParamId + p, decorations[p][d]); - } - - // CFG - if (entry) { - *entry = new Block(getUniqueId(), *function); - function->addBlock(*entry); - setBuildPoint(*entry); - } - - if (name) - addName(function->getId(), name); - - functions.push_back(std::unique_ptr(function)); - - return function; -} - -// Comments in header -void Builder::makeReturn(bool implicit, Id retVal) -{ - if (retVal) { - Instruction* inst = new Instruction(NoResult, NoType, OpReturnValue); - inst->addIdOperand(retVal); - buildPoint->addInstruction(std::unique_ptr(inst)); - } else - buildPoint->addInstruction(std::unique_ptr(new Instruction(NoResult, NoType, OpReturn))); - - if (! implicit) - createAndSetNoPredecessorBlock("post-return"); -} - -// Comments in header -void Builder::leaveFunction() -{ - Block* block = buildPoint; - Function& function = buildPoint->getParent(); - assert(block); - - // If our function did not contain a return, add a return void now. - if (! block->isTerminated()) { - if (function.getReturnType() == makeVoidType()) - makeReturn(true); - else { - makeReturn(true, createUndefined(function.getReturnType())); - } - } -} - -// Comments in header -void Builder::makeDiscard() -{ - buildPoint->addInstruction(std::unique_ptr(new Instruction(OpKill))); - createAndSetNoPredecessorBlock("post-discard"); -} - -// Comments in header -Id Builder::createVariable(StorageClass storageClass, Id type, const char* name) -{ - Id pointerType = makePointer(storageClass, type); - Instruction* inst = new Instruction(getUniqueId(), pointerType, OpVariable); - inst->addImmediateOperand(storageClass); - - switch (storageClass) { - case StorageClassFunction: - // Validation rules require the declaration in the entry block - buildPoint->getParent().addLocalVariable(std::unique_ptr(inst)); - break; - - default: - constantsTypesGlobals.push_back(std::unique_ptr(inst)); - module.mapInstruction(inst); - break; - } - - if (name) - addName(inst->getResultId(), name); - - return inst->getResultId(); -} - -// Comments in header -Id Builder::createUndefined(Id type) -{ - Instruction* inst = new Instruction(getUniqueId(), type, OpUndef); - buildPoint->addInstruction(std::unique_ptr(inst)); - return inst->getResultId(); -} - -// Comments in header -void Builder::createStore(Id rValue, Id lValue) -{ - Instruction* store = new Instruction(OpStore); - store->addIdOperand(lValue); - store->addIdOperand(rValue); - buildPoint->addInstruction(std::unique_ptr(store)); -} - -// Comments in header -Id Builder::createLoad(Id lValue) -{ - Instruction* load = new Instruction(getUniqueId(), getDerefTypeId(lValue), OpLoad); - load->addIdOperand(lValue); - buildPoint->addInstruction(std::unique_ptr(load)); - - return load->getResultId(); -} - -// Comments in header -Id Builder::createAccessChain(StorageClass storageClass, Id base, const std::vector& offsets) -{ - // Figure out the final resulting type. - spv::Id typeId = getTypeId(base); - assert(isPointerType(typeId) && offsets.size() > 0); - typeId = getContainedTypeId(typeId); - for (int i = 0; i < (int)offsets.size(); ++i) { - if (isStructType(typeId)) { - assert(isConstantScalar(offsets[i])); - typeId = getContainedTypeId(typeId, getConstantScalar(offsets[i])); - } else - typeId = getContainedTypeId(typeId, offsets[i]); - } - typeId = makePointer(storageClass, typeId); - - // Make the instruction - Instruction* chain = new Instruction(getUniqueId(), typeId, OpAccessChain); - chain->addIdOperand(base); - for (int i = 0; i < (int)offsets.size(); ++i) - chain->addIdOperand(offsets[i]); - buildPoint->addInstruction(std::unique_ptr(chain)); - - return chain->getResultId(); -} - -Id Builder::createArrayLength(Id base, unsigned int member) -{ - spv::Id intType = makeIntType(32); - Instruction* length = new Instruction(getUniqueId(), intType, OpArrayLength); - length->addIdOperand(base); - length->addImmediateOperand(member); - buildPoint->addInstruction(std::unique_ptr(length)); - - return length->getResultId(); -} - -Id Builder::createCompositeExtract(Id composite, Id typeId, unsigned index) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - return createSpecConstantOp(OpCompositeExtract, typeId, std::vector(1, composite), std::vector(1, index)); - } - Instruction* extract = new Instruction(getUniqueId(), typeId, OpCompositeExtract); - extract->addIdOperand(composite); - extract->addImmediateOperand(index); - buildPoint->addInstruction(std::unique_ptr(extract)); - - return extract->getResultId(); -} - -Id Builder::createCompositeExtract(Id composite, Id typeId, const std::vector& indexes) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - return createSpecConstantOp(OpCompositeExtract, typeId, std::vector(1, composite), indexes); - } - Instruction* extract = new Instruction(getUniqueId(), typeId, OpCompositeExtract); - extract->addIdOperand(composite); - for (int i = 0; i < (int)indexes.size(); ++i) - extract->addImmediateOperand(indexes[i]); - buildPoint->addInstruction(std::unique_ptr(extract)); - - return extract->getResultId(); -} - -Id Builder::createCompositeInsert(Id object, Id composite, Id typeId, unsigned index) -{ - Instruction* insert = new Instruction(getUniqueId(), typeId, OpCompositeInsert); - insert->addIdOperand(object); - insert->addIdOperand(composite); - insert->addImmediateOperand(index); - buildPoint->addInstruction(std::unique_ptr(insert)); - - return insert->getResultId(); -} - -Id Builder::createCompositeInsert(Id object, Id composite, Id typeId, const std::vector& indexes) -{ - Instruction* insert = new Instruction(getUniqueId(), typeId, OpCompositeInsert); - insert->addIdOperand(object); - insert->addIdOperand(composite); - for (int i = 0; i < (int)indexes.size(); ++i) - insert->addImmediateOperand(indexes[i]); - buildPoint->addInstruction(std::unique_ptr(insert)); - - return insert->getResultId(); -} - -Id Builder::createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex) -{ - Instruction* extract = new Instruction(getUniqueId(), typeId, OpVectorExtractDynamic); - extract->addIdOperand(vector); - extract->addIdOperand(componentIndex); - buildPoint->addInstruction(std::unique_ptr(extract)); - - return extract->getResultId(); -} - -Id Builder::createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex) -{ - Instruction* insert = new Instruction(getUniqueId(), typeId, OpVectorInsertDynamic); - insert->addIdOperand(vector); - insert->addIdOperand(component); - insert->addIdOperand(componentIndex); - buildPoint->addInstruction(std::unique_ptr(insert)); - - return insert->getResultId(); -} - -// An opcode that has no operands, no result id, and no type -void Builder::createNoResultOp(Op opCode) -{ - Instruction* op = new Instruction(opCode); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -// An opcode that has one operand, no result id, and no type -void Builder::createNoResultOp(Op opCode, Id operand) -{ - Instruction* op = new Instruction(opCode); - op->addIdOperand(operand); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -// An opcode that has one operand, no result id, and no type -void Builder::createNoResultOp(Op opCode, const std::vector& operands) -{ - Instruction* op = new Instruction(opCode); - for (auto it = operands.cbegin(); it != operands.cend(); ++it) - op->addIdOperand(*it); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -void Builder::createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask semantics) -{ - Instruction* op = new Instruction(OpControlBarrier); - op->addImmediateOperand(makeUintConstant(execution)); - op->addImmediateOperand(makeUintConstant(memory)); - op->addImmediateOperand(makeUintConstant(semantics)); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -void Builder::createMemoryBarrier(unsigned executionScope, unsigned memorySemantics) -{ - Instruction* op = new Instruction(OpMemoryBarrier); - op->addImmediateOperand(makeUintConstant(executionScope)); - op->addImmediateOperand(makeUintConstant(memorySemantics)); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -// An opcode that has one operands, a result id, and a type -Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - return createSpecConstantOp(opCode, typeId, std::vector(1, operand), std::vector()); - } - Instruction* op = new Instruction(getUniqueId(), typeId, opCode); - op->addIdOperand(operand); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - std::vector operands(2); - operands[0] = left; operands[1] = right; - return createSpecConstantOp(opCode, typeId, operands, std::vector()); - } - Instruction* op = new Instruction(getUniqueId(), typeId, opCode); - op->addIdOperand(left); - op->addIdOperand(right); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createTriOp(Op opCode, Id typeId, Id op1, Id op2, Id op3) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - std::vector operands(3); - operands[0] = op1; - operands[1] = op2; - operands[2] = op3; - return createSpecConstantOp( - opCode, typeId, operands, std::vector()); - } - Instruction* op = new Instruction(getUniqueId(), typeId, opCode); - op->addIdOperand(op1); - op->addIdOperand(op2); - op->addIdOperand(op3); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createOp(Op opCode, Id typeId, const std::vector& operands) -{ - Instruction* op = new Instruction(getUniqueId(), typeId, opCode); - for (auto it = operands.cbegin(); it != operands.cend(); ++it) - op->addIdOperand(*it); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createSpecConstantOp(Op opCode, Id typeId, const std::vector& operands, const std::vector& literals) -{ - Instruction* op = new Instruction(getUniqueId(), typeId, OpSpecConstantOp); - op->addImmediateOperand((unsigned) opCode); - for (auto it = operands.cbegin(); it != operands.cend(); ++it) - op->addIdOperand(*it); - for (auto it = literals.cbegin(); it != literals.cend(); ++it) - op->addImmediateOperand(*it); - module.mapInstruction(op); - constantsTypesGlobals.push_back(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createFunctionCall(spv::Function* function, const std::vector& args) -{ - Instruction* op = new Instruction(getUniqueId(), function->getReturnType(), OpFunctionCall); - op->addIdOperand(function->getId()); - for (int a = 0; a < (int)args.size(); ++a) - op->addIdOperand(args[a]); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -// Comments in header -Id Builder::createRvalueSwizzle(Decoration precision, Id typeId, Id source, const std::vector& channels) -{ - if (channels.size() == 1) - return setPrecision(createCompositeExtract(source, typeId, channels.front()), precision); - - if (generatingOpCodeForSpecConst) { - std::vector operands(2); - operands[0] = operands[1] = source; - return setPrecision(createSpecConstantOp(OpVectorShuffle, typeId, operands, channels), precision); - } - Instruction* swizzle = new Instruction(getUniqueId(), typeId, OpVectorShuffle); - assert(isVector(source)); - swizzle->addIdOperand(source); - swizzle->addIdOperand(source); - for (int i = 0; i < (int)channels.size(); ++i) - swizzle->addImmediateOperand(channels[i]); - buildPoint->addInstruction(std::unique_ptr(swizzle)); - - return setPrecision(swizzle->getResultId(), precision); -} - -// Comments in header -Id Builder::createLvalueSwizzle(Id typeId, Id target, Id source, const std::vector& channels) -{ - if (channels.size() == 1 && getNumComponents(source) == 1) - return createCompositeInsert(source, target, typeId, channels.front()); - - Instruction* swizzle = new Instruction(getUniqueId(), typeId, OpVectorShuffle); - - assert(isVector(target)); - swizzle->addIdOperand(target); - - assert(getNumComponents(source) == (int)channels.size()); - assert(isVector(source)); - swizzle->addIdOperand(source); - - // Set up an identity shuffle from the base value to the result value - unsigned int components[4]; - int numTargetComponents = getNumComponents(target); - for (int i = 0; i < numTargetComponents; ++i) - components[i] = i; - - // Punch in the l-value swizzle - for (int i = 0; i < (int)channels.size(); ++i) - components[channels[i]] = numTargetComponents + i; - - // finish the instruction with these components selectors - for (int i = 0; i < numTargetComponents; ++i) - swizzle->addImmediateOperand(components[i]); - buildPoint->addInstruction(std::unique_ptr(swizzle)); - - return swizzle->getResultId(); -} - -// Comments in header -void Builder::promoteScalar(Decoration precision, Id& left, Id& right) -{ - int direction = getNumComponents(right) - getNumComponents(left); - - if (direction > 0) - left = smearScalar(precision, left, makeVectorType(getTypeId(left), getNumComponents(right))); - else if (direction < 0) - right = smearScalar(precision, right, makeVectorType(getTypeId(right), getNumComponents(left))); - - return; -} - -// Comments in header -Id Builder::smearScalar(Decoration precision, Id scalar, Id vectorType) -{ - assert(getNumComponents(scalar) == 1); - assert(getTypeId(scalar) == getScalarTypeId(vectorType)); - - int numComponents = getNumTypeComponents(vectorType); - if (numComponents == 1) - return scalar; - - Instruction* smear = nullptr; - if (generatingOpCodeForSpecConst) { - auto members = std::vector(numComponents, scalar); - // Sometime even in spec-constant-op mode, the temporary vector created by - // promoting a scalar might not be a spec constant. This should depend on - // the scalar. - // e.g.: - // const vec2 spec_const_result = a_spec_const_vec2 + a_front_end_const_scalar; - // In such cases, the temporary vector created from a_front_end_const_scalar - // is not a spec constant vector, even though the binary operation node is marked - // as 'specConstant' and we are in spec-constant-op mode. - auto result_id = makeCompositeConstant(vectorType, members, isSpecConstant(scalar)); - smear = module.getInstruction(result_id); - } else { - smear = new Instruction(getUniqueId(), vectorType, OpCompositeConstruct); - for (int c = 0; c < numComponents; ++c) - smear->addIdOperand(scalar); - buildPoint->addInstruction(std::unique_ptr(smear)); - } - - return setPrecision(smear->getResultId(), precision); -} - -// Comments in header -Id Builder::createBuiltinCall(Id resultType, Id builtins, int entryPoint, const std::vector& args) -{ - Instruction* inst = new Instruction(getUniqueId(), resultType, OpExtInst); - inst->addIdOperand(builtins); - inst->addImmediateOperand(entryPoint); - for (int arg = 0; arg < (int)args.size(); ++arg) - inst->addIdOperand(args[arg]); - - buildPoint->addInstruction(std::unique_ptr(inst)); - - return inst->getResultId(); -} - -// Accept all parameters needed to create a texture instruction. -// Create the correct instruction based on the inputs, and make the call. -Id Builder::createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, bool noImplicitLod, const TextureParameters& parameters) -{ - static const int maxTextureArgs = 10; - Id texArgs[maxTextureArgs] = {}; - - // - // Set up the fixed arguments - // - int numArgs = 0; - bool explicitLod = false; - texArgs[numArgs++] = parameters.sampler; - texArgs[numArgs++] = parameters.coords; - if (parameters.Dref != NoResult) - texArgs[numArgs++] = parameters.Dref; - if (parameters.component != NoResult) - texArgs[numArgs++] = parameters.component; - - // - // Set up the optional arguments - // - int optArgNum = numArgs; // track which operand, if it exists, is the mask of optional arguments - ++numArgs; // speculatively make room for the mask operand - ImageOperandsMask mask = ImageOperandsMaskNone; // the mask operand - if (parameters.bias) { - mask = (ImageOperandsMask)(mask | ImageOperandsBiasMask); - texArgs[numArgs++] = parameters.bias; - } - if (parameters.lod) { - mask = (ImageOperandsMask)(mask | ImageOperandsLodMask); - texArgs[numArgs++] = parameters.lod; - explicitLod = true; - } else if (parameters.gradX) { - mask = (ImageOperandsMask)(mask | ImageOperandsGradMask); - texArgs[numArgs++] = parameters.gradX; - texArgs[numArgs++] = parameters.gradY; - explicitLod = true; - } else if (noImplicitLod && ! fetch && ! gather) { - // have to explicitly use lod of 0 if not allowed to have them be implicit, and - // we would otherwise be about to issue an implicit instruction - mask = (ImageOperandsMask)(mask | ImageOperandsLodMask); - texArgs[numArgs++] = makeFloatConstant(0.0); - explicitLod = true; - } - if (parameters.offset) { - if (isConstant(parameters.offset)) - mask = (ImageOperandsMask)(mask | ImageOperandsConstOffsetMask); - else { - addCapability(CapabilityImageGatherExtended); - mask = (ImageOperandsMask)(mask | ImageOperandsOffsetMask); - } - texArgs[numArgs++] = parameters.offset; - } - if (parameters.offsets) { - mask = (ImageOperandsMask)(mask | ImageOperandsConstOffsetsMask); - texArgs[numArgs++] = parameters.offsets; - } - if (parameters.sample) { - mask = (ImageOperandsMask)(mask | ImageOperandsSampleMask); - texArgs[numArgs++] = parameters.sample; - } - if (parameters.lodClamp) { - // capability if this bit is used - addCapability(CapabilityMinLod); - - mask = (ImageOperandsMask)(mask | ImageOperandsMinLodMask); - texArgs[numArgs++] = parameters.lodClamp; - } - if (mask == ImageOperandsMaskNone) - --numArgs; // undo speculative reservation for the mask argument - else - texArgs[optArgNum] = mask; - - // - // Set up the instruction - // - Op opCode = OpNop; // All paths below need to set this - if (fetch) { - if (sparse) - opCode = OpImageSparseFetch; - else - opCode = OpImageFetch; - } else if (gather) { - if (parameters.Dref) - if (sparse) - opCode = OpImageSparseDrefGather; - else - opCode = OpImageDrefGather; - else - if (sparse) - opCode = OpImageSparseGather; - else - opCode = OpImageGather; - } else if (explicitLod) { - if (parameters.Dref) { - if (proj) - if (sparse) - opCode = OpImageSparseSampleProjDrefExplicitLod; - else - opCode = OpImageSampleProjDrefExplicitLod; - else - if (sparse) - opCode = OpImageSparseSampleDrefExplicitLod; - else - opCode = OpImageSampleDrefExplicitLod; - } else { - if (proj) - if (sparse) - opCode = OpImageSparseSampleProjExplicitLod; - else - opCode = OpImageSampleProjExplicitLod; - else - if (sparse) - opCode = OpImageSparseSampleExplicitLod; - else - opCode = OpImageSampleExplicitLod; - } - } else { - if (parameters.Dref) { - if (proj) - if (sparse) - opCode = OpImageSparseSampleProjDrefImplicitLod; - else - opCode = OpImageSampleProjDrefImplicitLod; - else - if (sparse) - opCode = OpImageSparseSampleDrefImplicitLod; - else - opCode = OpImageSampleDrefImplicitLod; - } else { - if (proj) - if (sparse) - opCode = OpImageSparseSampleProjImplicitLod; - else - opCode = OpImageSampleProjImplicitLod; - else - if (sparse) - opCode = OpImageSparseSampleImplicitLod; - else - opCode = OpImageSampleImplicitLod; - } - } - - // See if the result type is expecting a smeared result. - // This happens when a legacy shadow*() call is made, which - // gets a vec4 back instead of a float. - Id smearedType = resultType; - if (! isScalarType(resultType)) { - switch (opCode) { - case OpImageSampleDrefImplicitLod: - case OpImageSampleDrefExplicitLod: - case OpImageSampleProjDrefImplicitLod: - case OpImageSampleProjDrefExplicitLod: - resultType = getScalarTypeId(resultType); - break; - default: - break; - } - } - - Id typeId0 = 0; - Id typeId1 = 0; - - if (sparse) { - typeId0 = resultType; - typeId1 = getDerefTypeId(parameters.texelOut); - resultType = makeStructResultType(typeId0, typeId1); - } - - // Build the SPIR-V instruction - Instruction* textureInst = new Instruction(getUniqueId(), resultType, opCode); - for (int op = 0; op < optArgNum; ++op) - textureInst->addIdOperand(texArgs[op]); - if (optArgNum < numArgs) - textureInst->addImmediateOperand(texArgs[optArgNum]); - for (int op = optArgNum + 1; op < numArgs; ++op) - textureInst->addIdOperand(texArgs[op]); - setPrecision(textureInst->getResultId(), precision); - buildPoint->addInstruction(std::unique_ptr(textureInst)); - - Id resultId = textureInst->getResultId(); - - if (sparse) { - // set capability - addCapability(CapabilitySparseResidency); - - // Decode the return type that was a special structure - createStore(createCompositeExtract(resultId, typeId1, 1), parameters.texelOut); - resultId = createCompositeExtract(resultId, typeId0, 0); - setPrecision(resultId, precision); - } else { - // When a smear is needed, do it, as per what was computed - // above when resultType was changed to a scalar type. - if (resultType != smearedType) - resultId = smearScalar(precision, resultId, smearedType); - } - - return resultId; -} - -// Comments in header -Id Builder::createTextureQueryCall(Op opCode, const TextureParameters& parameters, bool isUnsignedResult) -{ - // All these need a capability - addCapability(CapabilityImageQuery); - - // Figure out the result type - Id resultType = 0; - switch (opCode) { - case OpImageQuerySize: - case OpImageQuerySizeLod: - { - int numComponents = 0; - switch (getTypeDimensionality(getImageType(parameters.sampler))) { - case Dim1D: - case DimBuffer: - numComponents = 1; - break; - case Dim2D: - case DimCube: - case DimRect: - case DimSubpassData: - numComponents = 2; - break; - case Dim3D: - numComponents = 3; - break; - - default: - assert(0); - break; - } - if (isArrayedImageType(getImageType(parameters.sampler))) - ++numComponents; - - Id intType = isUnsignedResult ? makeUintType(32) : makeIntType(32); - if (numComponents == 1) - resultType = intType; - else - resultType = makeVectorType(intType, numComponents); - - break; - } - case OpImageQueryLod: - resultType = makeVectorType(makeFloatType(32), 2); - break; - case OpImageQueryLevels: - case OpImageQuerySamples: - resultType = isUnsignedResult ? makeUintType(32) : makeIntType(32); - break; - default: - assert(0); - break; - } - - Instruction* query = new Instruction(getUniqueId(), resultType, opCode); - query->addIdOperand(parameters.sampler); - if (parameters.coords) - query->addIdOperand(parameters.coords); - if (parameters.lod) - query->addIdOperand(parameters.lod); - buildPoint->addInstruction(std::unique_ptr(query)); - - return query->getResultId(); -} - -// External comments in header. -// Operates recursively to visit the composite's hierarchy. -Id Builder::createCompositeCompare(Decoration precision, Id value1, Id value2, bool equal) -{ - Id boolType = makeBoolType(); - Id valueType = getTypeId(value1); - - Id resultId = NoResult; - - int numConstituents = getNumTypeConstituents(valueType); - - // Scalars and Vectors - - if (isScalarType(valueType) || isVectorType(valueType)) { - assert(valueType == getTypeId(value2)); - // These just need a single comparison, just have - // to figure out what it is. - Op op; - switch (getMostBasicTypeClass(valueType)) { - case OpTypeFloat: - op = equal ? OpFOrdEqual : OpFOrdNotEqual; - break; - case OpTypeInt: - default: - op = equal ? OpIEqual : OpINotEqual; - break; - case OpTypeBool: - op = equal ? OpLogicalEqual : OpLogicalNotEqual; - precision = NoPrecision; - break; - } - - if (isScalarType(valueType)) { - // scalar - resultId = createBinOp(op, boolType, value1, value2); - } else { - // vector - resultId = createBinOp(op, makeVectorType(boolType, numConstituents), value1, value2); - setPrecision(resultId, precision); - // reduce vector compares... - resultId = createUnaryOp(equal ? OpAll : OpAny, boolType, resultId); - } - - return setPrecision(resultId, precision); - } - - // Only structs, arrays, and matrices should be left. - // They share in common the reduction operation across their constituents. - assert(isAggregateType(valueType) || isMatrixType(valueType)); - - // Compare each pair of constituents - for (int constituent = 0; constituent < numConstituents; ++constituent) { - std::vector indexes(1, constituent); - Id constituentType1 = getContainedTypeId(getTypeId(value1), constituent); - Id constituentType2 = getContainedTypeId(getTypeId(value2), constituent); - Id constituent1 = createCompositeExtract(value1, constituentType1, indexes); - Id constituent2 = createCompositeExtract(value2, constituentType2, indexes); - - Id subResultId = createCompositeCompare(precision, constituent1, constituent2, equal); - - if (constituent == 0) - resultId = subResultId; - else - resultId = setPrecision(createBinOp(equal ? OpLogicalAnd : OpLogicalOr, boolType, resultId, subResultId), precision); - } - - return resultId; -} - -// OpCompositeConstruct -Id Builder::createCompositeConstruct(Id typeId, const std::vector& constituents) -{ - assert(isAggregateType(typeId) || (getNumTypeConstituents(typeId) > 1 && getNumTypeConstituents(typeId) == (int)constituents.size())); - - if (generatingOpCodeForSpecConst) { - // Sometime, even in spec-constant-op mode, the constant composite to be - // constructed may not be a specialization constant. - // e.g.: - // const mat2 m2 = mat2(a_spec_const, a_front_end_const, another_front_end_const, third_front_end_const); - // The first column vector should be a spec constant one, as a_spec_const is a spec constant. - // The second column vector should NOT be spec constant, as it does not contain any spec constants. - // To handle such cases, we check the constituents of the constant vector to determine whether this - // vector should be created as a spec constant. - return makeCompositeConstant(typeId, constituents, - std::any_of(constituents.begin(), constituents.end(), - [&](spv::Id id) { return isSpecConstant(id); })); - } - - Instruction* op = new Instruction(getUniqueId(), typeId, OpCompositeConstruct); - for (int c = 0; c < (int)constituents.size(); ++c) - op->addIdOperand(constituents[c]); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -// Vector or scalar constructor -Id Builder::createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId) -{ - Id result = NoResult; - unsigned int numTargetComponents = getNumTypeComponents(resultTypeId); - unsigned int targetComponent = 0; - - // Special case: when calling a vector constructor with a single scalar - // argument, smear the scalar - if (sources.size() == 1 && isScalar(sources[0]) && numTargetComponents > 1) - return smearScalar(precision, sources[0], resultTypeId); - - // accumulate the arguments for OpCompositeConstruct - std::vector constituents; - Id scalarTypeId = getScalarTypeId(resultTypeId); - - // lambda to store the result of visiting an argument component - const auto latchResult = [&](Id comp) { - if (numTargetComponents > 1) - constituents.push_back(comp); - else - result = comp; - ++targetComponent; - }; - - // lambda to visit a vector argument's components - const auto accumulateVectorConstituents = [&](Id sourceArg) { - unsigned int sourceSize = getNumComponents(sourceArg); - unsigned int sourcesToUse = sourceSize; - if (sourcesToUse + targetComponent > numTargetComponents) - sourcesToUse = numTargetComponents - targetComponent; - - for (unsigned int s = 0; s < sourcesToUse; ++s) { - std::vector swiz; - swiz.push_back(s); - latchResult(createRvalueSwizzle(precision, scalarTypeId, sourceArg, swiz)); - } - }; - - // lambda to visit a matrix argument's components - const auto accumulateMatrixConstituents = [&](Id sourceArg) { - unsigned int sourceSize = getNumColumns(sourceArg) * getNumRows(sourceArg); - unsigned int sourcesToUse = sourceSize; - if (sourcesToUse + targetComponent > numTargetComponents) - sourcesToUse = numTargetComponents - targetComponent; - - int col = 0; - int row = 0; - for (unsigned int s = 0; s < sourcesToUse; ++s) { - if (row >= getNumRows(sourceArg)) { - row = 0; - col++; - } - std::vector indexes; - indexes.push_back(col); - indexes.push_back(row); - latchResult(createCompositeExtract(sourceArg, scalarTypeId, indexes)); - row++; - } - }; - - // Go through the source arguments, each one could have either - // a single or multiple components to contribute. - for (unsigned int i = 0; i < sources.size(); ++i) { - if (isScalar(sources[i])) - latchResult(sources[i]); - else if (isVector(sources[i])) - accumulateVectorConstituents(sources[i]); - else if (isMatrix(sources[i])) - accumulateMatrixConstituents(sources[i]); - else - assert(0); - - if (targetComponent >= numTargetComponents) - break; - } - - // If the result is a vector, make it from the gathered constituents. - if (constituents.size() > 0) - result = createCompositeConstruct(resultTypeId, constituents); - - return setPrecision(result, precision); -} - -// Comments in header -Id Builder::createMatrixConstructor(Decoration precision, const std::vector& sources, Id resultTypeId) -{ - Id componentTypeId = getScalarTypeId(resultTypeId); - int numCols = getTypeNumColumns(resultTypeId); - int numRows = getTypeNumRows(resultTypeId); - - Instruction* instr = module.getInstruction(componentTypeId); - Id bitCount = instr->getIdOperand(0); - - // Will use a two step process - // 1. make a compile-time 2D array of values - // 2. construct a matrix from that array - - // Step 1. - - // initialize the array to the identity matrix - Id ids[maxMatrixSize][maxMatrixSize]; - Id one = (bitCount == 64 ? makeDoubleConstant(1.0) : makeFloatConstant(1.0)); - Id zero = (bitCount == 64 ? makeDoubleConstant(0.0) : makeFloatConstant(0.0)); - for (int col = 0; col < 4; ++col) { - for (int row = 0; row < 4; ++row) { - if (col == row) - ids[col][row] = one; - else - ids[col][row] = zero; - } - } - - // modify components as dictated by the arguments - if (sources.size() == 1 && isScalar(sources[0])) { - // a single scalar; resets the diagonals - for (int col = 0; col < 4; ++col) - ids[col][col] = sources[0]; - } else if (isMatrix(sources[0])) { - // constructing from another matrix; copy over the parts that exist in both the argument and constructee - Id matrix = sources[0]; - int minCols = std::min(numCols, getNumColumns(matrix)); - int minRows = std::min(numRows, getNumRows(matrix)); - for (int col = 0; col < minCols; ++col) { - std::vector indexes; - indexes.push_back(col); - for (int row = 0; row < minRows; ++row) { - indexes.push_back(row); - ids[col][row] = createCompositeExtract(matrix, componentTypeId, indexes); - indexes.pop_back(); - setPrecision(ids[col][row], precision); - } - } - } else { - // fill in the matrix in column-major order with whatever argument components are available - int row = 0; - int col = 0; - - for (int arg = 0; arg < (int)sources.size(); ++arg) { - Id argComp = sources[arg]; - for (int comp = 0; comp < getNumComponents(sources[arg]); ++comp) { - if (getNumComponents(sources[arg]) > 1) { - argComp = createCompositeExtract(sources[arg], componentTypeId, comp); - setPrecision(argComp, precision); - } - ids[col][row++] = argComp; - if (row == numRows) { - row = 0; - col++; - } - } - } - } - - // Step 2: Construct a matrix from that array. - // First make the column vectors, then make the matrix. - - // make the column vectors - Id columnTypeId = getContainedTypeId(resultTypeId); - std::vector matrixColumns; - for (int col = 0; col < numCols; ++col) { - std::vector vectorComponents; - for (int row = 0; row < numRows; ++row) - vectorComponents.push_back(ids[col][row]); - Id column = createCompositeConstruct(columnTypeId, vectorComponents); - setPrecision(column, precision); - matrixColumns.push_back(column); - } - - // make the matrix - return setPrecision(createCompositeConstruct(resultTypeId, matrixColumns), precision); -} - -// Comments in header -Builder::If::If(Id cond, unsigned int ctrl, Builder& gb) : - builder(gb), - condition(cond), - control(ctrl), - elseBlock(0) -{ - function = &builder.getBuildPoint()->getParent(); - - // make the blocks, but only put the then-block into the function, - // the else-block and merge-block will be added later, in order, after - // earlier code is emitted - thenBlock = new Block(builder.getUniqueId(), *function); - mergeBlock = new Block(builder.getUniqueId(), *function); - - // Save the current block, so that we can add in the flow control split when - // makeEndIf is called. - headerBlock = builder.getBuildPoint(); - - function->addBlock(thenBlock); - builder.setBuildPoint(thenBlock); -} - -// Comments in header -void Builder::If::makeBeginElse() -{ - // Close out the "then" by having it jump to the mergeBlock - builder.createBranch(mergeBlock); - - // Make the first else block and add it to the function - elseBlock = new Block(builder.getUniqueId(), *function); - function->addBlock(elseBlock); - - // Start building the else block - builder.setBuildPoint(elseBlock); -} - -// Comments in header -void Builder::If::makeEndIf() -{ - // jump to the merge block - builder.createBranch(mergeBlock); - - // Go back to the headerBlock and make the flow control split - builder.setBuildPoint(headerBlock); - builder.createSelectionMerge(mergeBlock, control); - if (elseBlock) - builder.createConditionalBranch(condition, thenBlock, elseBlock); - else - builder.createConditionalBranch(condition, thenBlock, mergeBlock); - - // add the merge block to the function - function->addBlock(mergeBlock); - builder.setBuildPoint(mergeBlock); -} - -// Comments in header -void Builder::makeSwitch(Id selector, unsigned int control, int numSegments, const std::vector& caseValues, - const std::vector& valueIndexToSegment, int defaultSegment, - std::vector& segmentBlocks) -{ - Function& function = buildPoint->getParent(); - - // make all the blocks - for (int s = 0; s < numSegments; ++s) - segmentBlocks.push_back(new Block(getUniqueId(), function)); - - Block* mergeBlock = new Block(getUniqueId(), function); - - // make and insert the switch's selection-merge instruction - createSelectionMerge(mergeBlock, control); - - // make the switch instruction - Instruction* switchInst = new Instruction(NoResult, NoType, OpSwitch); - switchInst->addIdOperand(selector); - auto defaultOrMerge = (defaultSegment >= 0) ? segmentBlocks[defaultSegment] : mergeBlock; - switchInst->addIdOperand(defaultOrMerge->getId()); - defaultOrMerge->addPredecessor(buildPoint); - for (int i = 0; i < (int)caseValues.size(); ++i) { - switchInst->addImmediateOperand(caseValues[i]); - switchInst->addIdOperand(segmentBlocks[valueIndexToSegment[i]]->getId()); - segmentBlocks[valueIndexToSegment[i]]->addPredecessor(buildPoint); - } - buildPoint->addInstruction(std::unique_ptr(switchInst)); - - // push the merge block - switchMerges.push(mergeBlock); -} - -// Comments in header -void Builder::addSwitchBreak() -{ - // branch to the top of the merge block stack - createBranch(switchMerges.top()); - createAndSetNoPredecessorBlock("post-switch-break"); -} - -// Comments in header -void Builder::nextSwitchSegment(std::vector& segmentBlock, int nextSegment) -{ - int lastSegment = nextSegment - 1; - if (lastSegment >= 0) { - // Close out previous segment by jumping, if necessary, to next segment - if (! buildPoint->isTerminated()) - createBranch(segmentBlock[nextSegment]); - } - Block* block = segmentBlock[nextSegment]; - block->getParent().addBlock(block); - setBuildPoint(block); -} - -// Comments in header -void Builder::endSwitch(std::vector& /*segmentBlock*/) -{ - // Close out previous segment by jumping, if necessary, to next segment - if (! buildPoint->isTerminated()) - addSwitchBreak(); - - switchMerges.top()->getParent().addBlock(switchMerges.top()); - setBuildPoint(switchMerges.top()); - - switchMerges.pop(); -} - -Block& Builder::makeNewBlock() -{ - Function& function = buildPoint->getParent(); - auto block = new Block(getUniqueId(), function); - function.addBlock(block); - return *block; -} - -Builder::LoopBlocks& Builder::makeNewLoop() -{ - // This verbosity is needed to simultaneously get the same behavior - // everywhere (id's in the same order), have a syntax that works - // across lots of versions of C++, have no warnings from pedantic - // compilation modes, and leave the rest of the code alone. - Block& head = makeNewBlock(); - Block& body = makeNewBlock(); - Block& merge = makeNewBlock(); - Block& continue_target = makeNewBlock(); - LoopBlocks blocks(head, body, merge, continue_target); - loops.push(blocks); - return loops.top(); -} - -void Builder::createLoopContinue() -{ - createBranch(&loops.top().continue_target); - // Set up a block for dead code. - createAndSetNoPredecessorBlock("post-loop-continue"); -} - -void Builder::createLoopExit() -{ - createBranch(&loops.top().merge); - // Set up a block for dead code. - createAndSetNoPredecessorBlock("post-loop-break"); -} - -void Builder::closeLoop() -{ - loops.pop(); -} - -void Builder::clearAccessChain() -{ - accessChain.base = NoResult; - accessChain.indexChain.clear(); - accessChain.instr = NoResult; - accessChain.swizzle.clear(); - accessChain.component = NoResult; - accessChain.preSwizzleBaseType = NoType; - accessChain.isRValue = false; -} - -// Comments in header -void Builder::accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType) -{ - // swizzles can be stacked in GLSL, but simplified to a single - // one here; the base type doesn't change - if (accessChain.preSwizzleBaseType == NoType) - accessChain.preSwizzleBaseType = preSwizzleBaseType; - - // if needed, propagate the swizzle for the current access chain - if (accessChain.swizzle.size() > 0) { - std::vector oldSwizzle = accessChain.swizzle; - accessChain.swizzle.resize(0); - for (unsigned int i = 0; i < swizzle.size(); ++i) { - assert(swizzle[i] < oldSwizzle.size()); - accessChain.swizzle.push_back(oldSwizzle[swizzle[i]]); - } - } else - accessChain.swizzle = swizzle; - - // determine if we need to track this swizzle anymore - simplifyAccessChainSwizzle(); -} - -// Comments in header -void Builder::accessChainStore(Id rvalue) -{ - assert(accessChain.isRValue == false); - - transferAccessChainSwizzle(true); - Id base = collapseAccessChain(); - Id source = rvalue; - - // dynamic component should be gone - assert(accessChain.component == NoResult); - - // If swizzle still exists, it is out-of-order or not full, we must load the target vector, - // extract and insert elements to perform writeMask and/or swizzle. - if (accessChain.swizzle.size() > 0) { - Id tempBaseId = createLoad(base); - source = createLvalueSwizzle(getTypeId(tempBaseId), tempBaseId, source, accessChain.swizzle); - } - - createStore(source, base); -} - -// Comments in header -Id Builder::accessChainLoad(Decoration precision, Id resultType) -{ - Id id; - - if (accessChain.isRValue) { - // transfer access chain, but try to stay in registers - transferAccessChainSwizzle(false); - if (accessChain.indexChain.size() > 0) { - Id swizzleBase = accessChain.preSwizzleBaseType != NoType ? accessChain.preSwizzleBaseType : resultType; - - // if all the accesses are constants, we can use OpCompositeExtract - std::vector indexes; - bool constant = true; - for (int i = 0; i < (int)accessChain.indexChain.size(); ++i) { - if (isConstantScalar(accessChain.indexChain[i])) - indexes.push_back(getConstantScalar(accessChain.indexChain[i])); - else { - constant = false; - break; - } - } - - if (constant) - id = createCompositeExtract(accessChain.base, swizzleBase, indexes); - else { - // make a new function variable for this r-value - Id lValue = createVariable(StorageClassFunction, getTypeId(accessChain.base), "indexable"); - - // store into it - createStore(accessChain.base, lValue); - - // move base to the new variable - accessChain.base = lValue; - accessChain.isRValue = false; - - // load through the access chain - id = createLoad(collapseAccessChain()); - } - setPrecision(id, precision); - } else - id = accessChain.base; // no precision, it was set when this was defined - } else { - transferAccessChainSwizzle(true); - // load through the access chain - id = createLoad(collapseAccessChain()); - setPrecision(id, precision); - } - - // Done, unless there are swizzles to do - if (accessChain.swizzle.size() == 0 && accessChain.component == NoResult) - return id; - - // Do remaining swizzling - - // Do the basic swizzle - if (accessChain.swizzle.size() > 0) { - Id swizzledType = getScalarTypeId(getTypeId(id)); - if (accessChain.swizzle.size() > 1) - swizzledType = makeVectorType(swizzledType, (int)accessChain.swizzle.size()); - id = createRvalueSwizzle(precision, swizzledType, id, accessChain.swizzle); - } - - // Do the dynamic component - if (accessChain.component != NoResult) - id = setPrecision(createVectorExtractDynamic(id, resultType, accessChain.component), precision); - - return id; -} - -Id Builder::accessChainGetLValue() -{ - assert(accessChain.isRValue == false); - - transferAccessChainSwizzle(true); - Id lvalue = collapseAccessChain(); - - // If swizzle exists, it is out-of-order or not full, we must load the target vector, - // extract and insert elements to perform writeMask and/or swizzle. This does not - // go with getting a direct l-value pointer. - assert(accessChain.swizzle.size() == 0); - assert(accessChain.component == NoResult); - - return lvalue; -} - -// comment in header -Id Builder::accessChainGetInferredType() -{ - // anything to operate on? - if (accessChain.base == NoResult) - return NoType; - Id type = getTypeId(accessChain.base); - - // do initial dereference - if (! accessChain.isRValue) - type = getContainedTypeId(type); - - // dereference each index - for (auto it = accessChain.indexChain.cbegin(); it != accessChain.indexChain.cend(); ++it) { - if (isStructType(type)) - type = getContainedTypeId(type, getConstantScalar(*it)); - else - type = getContainedTypeId(type); - } - - // dereference swizzle - if (accessChain.swizzle.size() == 1) - type = getContainedTypeId(type); - else if (accessChain.swizzle.size() > 1) - type = makeVectorType(getContainedTypeId(type), (int)accessChain.swizzle.size()); - - // dereference component selection - if (accessChain.component) - type = getContainedTypeId(type); - - return type; -} - -// comment in header -void Builder::eliminateDeadDecorations() { - std::unordered_set reachable_blocks; - std::unordered_set unreachable_definitions; - // Collect IDs defined in unreachable blocks. For each function, label the - // reachable blocks first. Then for each unreachable block, collect the - // result IDs of the instructions in it. - for (std::vector::const_iterator fi = module.getFunctions().cbegin(); - fi != module.getFunctions().cend(); fi++) { - Function* f = *fi; - Block* entry = f->getEntryBlock(); - inReadableOrder(entry, [&reachable_blocks](const Block* b) { - reachable_blocks.insert(b); - }); - for (std::vector::const_iterator bi = f->getBlocks().cbegin(); - bi != f->getBlocks().cend(); bi++) { - Block* b = *bi; - if (!reachable_blocks.count(b)) { - for (std::vector >::const_iterator - ii = b->getInstructions().cbegin(); - ii != b->getInstructions().cend(); ii++) { - Instruction* i = ii->get(); - unreachable_definitions.insert(i->getResultId()); - } - } - } - } - decorations.erase(std::remove_if(decorations.begin(), decorations.end(), - [&unreachable_definitions](std::unique_ptr& I) -> bool { - Instruction* inst = I.get(); - Id decoration_id = inst->getIdOperand(0); - return unreachable_definitions.count(decoration_id) != 0; - }), - decorations.end()); -} - -void Builder::dump(std::vector& out) const -{ - // Header, before first instructions: - out.push_back(MagicNumber); - out.push_back(spvVersion); - out.push_back(builderNumber); - out.push_back(uniqueId + 1); - out.push_back(0); - - // Capabilities - for (auto it = capabilities.cbegin(); it != capabilities.cend(); ++it) { - Instruction capInst(0, 0, OpCapability); - capInst.addImmediateOperand(*it); - capInst.dump(out); - } - - for (auto it = extensions.cbegin(); it != extensions.cend(); ++it) { - Instruction extInst(0, 0, OpExtension); - extInst.addStringOperand(it->c_str()); - extInst.dump(out); - } - - dumpInstructions(out, imports); - Instruction memInst(0, 0, OpMemoryModel); - memInst.addImmediateOperand(addressModel); - memInst.addImmediateOperand(memoryModel); - memInst.dump(out); - - // Instructions saved up while building: - dumpInstructions(out, entryPoints); - dumpInstructions(out, executionModes); - - // Debug instructions - dumpInstructions(out, strings); - dumpModuleProcesses(out); - dumpSourceInstructions(out); - for (int e = 0; e < (int)sourceExtensions.size(); ++e) { - Instruction sourceExtInst(0, 0, OpSourceExtension); - sourceExtInst.addStringOperand(sourceExtensions[e]); - sourceExtInst.dump(out); - } - dumpInstructions(out, names); - dumpInstructions(out, lines); - - // Annotation instructions - dumpInstructions(out, decorations); - - dumpInstructions(out, constantsTypesGlobals); - dumpInstructions(out, externals); - - // The functions - module.dump(out); -} - -// -// Protected methods. -// - -// Turn the described access chain in 'accessChain' into an instruction(s) -// computing its address. This *cannot* include complex swizzles, which must -// be handled after this is called. -// -// Can generate code. -Id Builder::collapseAccessChain() -{ - assert(accessChain.isRValue == false); - - // did we already emit an access chain for this? - if (accessChain.instr != NoResult) - return accessChain.instr; - - // If we have a dynamic component, we can still transfer - // that into a final operand to the access chain. We need to remap the - // dynamic component through the swizzle to get a new dynamic component to - // update. - // - // This was not done in transferAccessChainSwizzle() because it might - // generate code. - remapDynamicSwizzle(); - if (accessChain.component != NoResult) { - // transfer the dynamic component to the access chain - accessChain.indexChain.push_back(accessChain.component); - accessChain.component = NoResult; - } - - // note that non-trivial swizzling is left pending - - // do we have an access chain? - if (accessChain.indexChain.size() == 0) - return accessChain.base; - - // emit the access chain - StorageClass storageClass = (StorageClass)module.getStorageClass(getTypeId(accessChain.base)); - accessChain.instr = createAccessChain(storageClass, accessChain.base, accessChain.indexChain); - - return accessChain.instr; -} - -// For a dynamic component selection of a swizzle. -// -// Turn the swizzle and dynamic component into just a dynamic component. -// -// Generates code. -void Builder::remapDynamicSwizzle() -{ - // do we have a swizzle to remap a dynamic component through? - if (accessChain.component != NoResult && accessChain.swizzle.size() > 1) { - // build a vector of the swizzle for the component to map into - std::vector components; - for (int c = 0; c < accessChain.swizzle.size(); ++c) - components.push_back(makeUintConstant(accessChain.swizzle[c])); - Id mapType = makeVectorType(makeUintType(32), (int)accessChain.swizzle.size()); - Id map = makeCompositeConstant(mapType, components); - - // use it - accessChain.component = createVectorExtractDynamic(map, makeUintType(32), accessChain.component); - accessChain.swizzle.clear(); - } -} - -// clear out swizzle if it is redundant, that is reselecting the same components -// that would be present without the swizzle. -void Builder::simplifyAccessChainSwizzle() -{ - // If the swizzle has fewer components than the vector, it is subsetting, and must stay - // to preserve that fact. - if (getNumTypeComponents(accessChain.preSwizzleBaseType) > (int)accessChain.swizzle.size()) - return; - - // if components are out of order, it is a swizzle - for (unsigned int i = 0; i < accessChain.swizzle.size(); ++i) { - if (i != accessChain.swizzle[i]) - return; - } - - // otherwise, there is no need to track this swizzle - accessChain.swizzle.clear(); - if (accessChain.component == NoResult) - accessChain.preSwizzleBaseType = NoType; -} - -// To the extent any swizzling can become part of the chain -// of accesses instead of a post operation, make it so. -// If 'dynamic' is true, include transferring the dynamic component, -// otherwise, leave it pending. -// -// Does not generate code. just updates the access chain. -void Builder::transferAccessChainSwizzle(bool dynamic) -{ - // non existent? - if (accessChain.swizzle.size() == 0 && accessChain.component == NoResult) - return; - - // too complex? - // (this requires either a swizzle, or generating code for a dynamic component) - if (accessChain.swizzle.size() > 1) - return; - - // single component, either in the swizzle and/or dynamic component - if (accessChain.swizzle.size() == 1) { - assert(accessChain.component == NoResult); - // handle static component selection - accessChain.indexChain.push_back(makeUintConstant(accessChain.swizzle.front())); - accessChain.swizzle.clear(); - accessChain.preSwizzleBaseType = NoType; - } else if (dynamic && accessChain.component != NoResult) { - assert(accessChain.swizzle.size() == 0); - // handle dynamic component - accessChain.indexChain.push_back(accessChain.component); - accessChain.preSwizzleBaseType = NoType; - accessChain.component = NoResult; - } -} - -// Utility method for creating a new block and setting the insert point to -// be in it. This is useful for flow-control operations that need a "dummy" -// block proceeding them (e.g. instructions after a discard, etc). -void Builder::createAndSetNoPredecessorBlock(const char* /*name*/) -{ - Block* block = new Block(getUniqueId(), buildPoint->getParent()); - block->setUnreachable(); - buildPoint->getParent().addBlock(block); - setBuildPoint(block); - - // if (name) - // addName(block->getId(), name); -} - -// Comments in header -void Builder::createBranch(Block* block) -{ - Instruction* branch = new Instruction(OpBranch); - branch->addIdOperand(block->getId()); - buildPoint->addInstruction(std::unique_ptr(branch)); - block->addPredecessor(buildPoint); -} - -void Builder::createSelectionMerge(Block* mergeBlock, unsigned int control) -{ - Instruction* merge = new Instruction(OpSelectionMerge); - merge->addIdOperand(mergeBlock->getId()); - merge->addImmediateOperand(control); - buildPoint->addInstruction(std::unique_ptr(merge)); -} - -void Builder::createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control, - unsigned int dependencyLength) -{ - Instruction* merge = new Instruction(OpLoopMerge); - merge->addIdOperand(mergeBlock->getId()); - merge->addIdOperand(continueBlock->getId()); - merge->addImmediateOperand(control); - if ((control & LoopControlDependencyLengthMask) != 0) - merge->addImmediateOperand(dependencyLength); - buildPoint->addInstruction(std::unique_ptr(merge)); -} - -void Builder::createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock) -{ - Instruction* branch = new Instruction(OpBranchConditional); - branch->addIdOperand(condition); - branch->addIdOperand(thenBlock->getId()); - branch->addIdOperand(elseBlock->getId()); - buildPoint->addInstruction(std::unique_ptr(branch)); - thenBlock->addPredecessor(buildPoint); - elseBlock->addPredecessor(buildPoint); -} - -// OpSource -// [OpSourceContinued] -// ... -void Builder::dumpSourceInstructions(std::vector& out) const -{ - const int maxWordCount = 0xFFFF; - const int opSourceWordCount = 4; - const int nonNullBytesPerInstruction = 4 * (maxWordCount - opSourceWordCount) - 1; - - if (source != SourceLanguageUnknown) { - // OpSource Language Version File Source - Instruction sourceInst(NoResult, NoType, OpSource); - sourceInst.addImmediateOperand(source); - sourceInst.addImmediateOperand(sourceVersion); - // File operand - if (sourceFileStringId != NoResult) { - sourceInst.addIdOperand(sourceFileStringId); - // Source operand - if (sourceText.size() > 0) { - int nextByte = 0; - std::string subString; - while ((int)sourceText.size() - nextByte > 0) { - subString = sourceText.substr(nextByte, nonNullBytesPerInstruction); - if (nextByte == 0) { - // OpSource - sourceInst.addStringOperand(subString.c_str()); - sourceInst.dump(out); - } else { - // OpSourcContinued - Instruction sourceContinuedInst(OpSourceContinued); - sourceContinuedInst.addStringOperand(subString.c_str()); - sourceContinuedInst.dump(out); - } - nextByte += nonNullBytesPerInstruction; - } - } else - sourceInst.dump(out); - } else - sourceInst.dump(out); - } -} - -void Builder::dumpInstructions(std::vector& out, const std::vector >& instructions) const -{ - for (int i = 0; i < (int)instructions.size(); ++i) { - instructions[i]->dump(out); - } -} - -void Builder::dumpModuleProcesses(std::vector& out) const -{ - for (int i = 0; i < (int)moduleProcesses.size(); ++i) { - Instruction moduleProcessed(OpModuleProcessed); - moduleProcessed.addStringOperand(moduleProcesses[i]); - moduleProcessed.dump(out); - } -} - -}; // end spv namespace diff --git a/third_party/glslang-spirv/SpvBuilder.h b/third_party/glslang-spirv/SpvBuilder.h deleted file mode 100644 index 173d10ef2..000000000 --- a/third_party/glslang-spirv/SpvBuilder.h +++ /dev/null @@ -1,641 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// Copyright (C) 2015-2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// "Builder" is an interface to fully build SPIR-V IR. Allocate one of -// these to build (a thread safe) internal SPIR-V representation (IR), -// and then dump it as a binary stream according to the SPIR-V specification. -// -// A Builder has a 1:1 relationship with a SPIR-V module. -// - -#pragma once -#ifndef SpvBuilder_H -#define SpvBuilder_H - -#include "Logger.h" -#include "spirv.hpp" -#include "spvIR.h" - -#include -#include -#include -#include -#include -#include - -namespace spv { - -class Builder { -public: - Builder(unsigned int spvVersion, unsigned int userNumber, SpvBuildLogger* logger); - virtual ~Builder(); - - static const int maxMatrixSize = 4; - - void setSource(spv::SourceLanguage lang, int version) - { - source = lang; - sourceVersion = version; - } - void setSourceFile(const std::string& file) - { - Instruction* fileString = new Instruction(getUniqueId(), NoType, OpString); - fileString->addStringOperand(file.c_str()); - sourceFileStringId = fileString->getResultId(); - strings.push_back(std::unique_ptr(fileString)); - } - void setSourceText(const std::string& text) { sourceText = text; } - void addSourceExtension(const char* ext) { sourceExtensions.push_back(ext); } - void addModuleProcessed(const std::string& p) { moduleProcesses.push_back(p.c_str()); } - void setEmitOpLines() { emitOpLines = true; } - void addExtension(const char* ext) { extensions.insert(ext); } - Id import(const char*); - void setMemoryModel(spv::AddressingModel addr, spv::MemoryModel mem) - { - addressModel = addr; - memoryModel = mem; - } - - void addCapability(spv::Capability cap) { capabilities.insert(cap); } - - // To get a new for anything needing a new one. - Id getUniqueId() { return ++uniqueId; } - - // To get a set of new s, e.g., for a set of function parameters - Id getUniqueIds(int numIds) - { - Id id = uniqueId + 1; - uniqueId += numIds; - return id; - } - - // Log the current line, and if different than the last one, - // issue a new OpLine, using the current file name. - void setLine(int line); - // Low-level OpLine. See setLine() for a layered helper. - void addLine(Id fileName, int line, int column); - - // For creating new types (will return old type if the requested one was already made). - Id makeVoidType(); - Id makeBoolType(); - Id makePointer(StorageClass, Id type); - Id makeIntegerType(int width, bool hasSign); // generic - Id makeIntType(int width) { return makeIntegerType(width, true); } - Id makeUintType(int width) { return makeIntegerType(width, false); } - Id makeFloatType(int width); - Id makeStructType(const std::vector& members, const char*); - Id makeStructResultType(Id type0, Id type1); - Id makeVectorType(Id component, int size); - Id makeMatrixType(Id component, int cols, int rows); - Id makeArrayType(Id element, Id sizeId, int stride); // 0 stride means no stride decoration - Id makeRuntimeArray(Id element); - Id makeFunctionType(Id returnType, const std::vector& paramTypes); - Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format); - Id makeSamplerType(); - Id makeSampledImageType(Id imageType); - - // For querying about types. - Id getTypeId(Id resultId) const { return module.getTypeId(resultId); } - Id getDerefTypeId(Id resultId) const; - Op getOpCode(Id id) const { return module.getInstruction(id)->getOpCode(); } - Op getTypeClass(Id typeId) const { return getOpCode(typeId); } - Op getMostBasicTypeClass(Id typeId) const; - int getNumComponents(Id resultId) const { return getNumTypeComponents(getTypeId(resultId)); } - int getNumTypeConstituents(Id typeId) const; - int getNumTypeComponents(Id typeId) const { return getNumTypeConstituents(typeId); } - Id getScalarTypeId(Id typeId) const; - Id getContainedTypeId(Id typeId) const; - Id getContainedTypeId(Id typeId, int) const; - StorageClass getTypeStorageClass(Id typeId) const { return module.getStorageClass(typeId); } - ImageFormat getImageTypeFormat(Id typeId) const { return (ImageFormat)module.getInstruction(typeId)->getImmediateOperand(6); } - - bool isPointer(Id resultId) const { return isPointerType(getTypeId(resultId)); } - bool isScalar(Id resultId) const { return isScalarType(getTypeId(resultId)); } - bool isVector(Id resultId) const { return isVectorType(getTypeId(resultId)); } - bool isMatrix(Id resultId) const { return isMatrixType(getTypeId(resultId)); } - bool isAggregate(Id resultId) const { return isAggregateType(getTypeId(resultId)); } - bool isSampledImage(Id resultId) const { return isSampledImageType(getTypeId(resultId)); } - - bool isBoolType(Id typeId) const { return groupedTypes[OpTypeBool].size() > 0 && typeId == groupedTypes[OpTypeBool].back()->getResultId(); } - bool isIntType(Id typeId) const { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) != 0; } - bool isUintType(Id typeId) const { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) == 0; } - bool isFloatType(Id typeId) const { return getTypeClass(typeId) == OpTypeFloat; } - bool isPointerType(Id typeId) const { return getTypeClass(typeId) == OpTypePointer; } - bool isScalarType(Id typeId) const { return getTypeClass(typeId) == OpTypeFloat || getTypeClass(typeId) == OpTypeInt || getTypeClass(typeId) == OpTypeBool; } - bool isVectorType(Id typeId) const { return getTypeClass(typeId) == OpTypeVector; } - bool isMatrixType(Id typeId) const { return getTypeClass(typeId) == OpTypeMatrix; } - bool isStructType(Id typeId) const { return getTypeClass(typeId) == OpTypeStruct; } - bool isArrayType(Id typeId) const { return getTypeClass(typeId) == OpTypeArray; } - bool isAggregateType(Id typeId) const { return isArrayType(typeId) || isStructType(typeId); } - bool isImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeImage; } - bool isSamplerType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampler; } - bool isSampledImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampledImage; } - - bool isConstantOpCode(Op opcode) const; - bool isSpecConstantOpCode(Op opcode) const; - bool isConstant(Id resultId) const { return isConstantOpCode(getOpCode(resultId)); } - bool isConstantScalar(Id resultId) const { return getOpCode(resultId) == OpConstant; } - bool isSpecConstant(Id resultId) const { return isSpecConstantOpCode(getOpCode(resultId)); } - unsigned int getConstantScalar(Id resultId) const { return module.getInstruction(resultId)->getImmediateOperand(0); } - StorageClass getStorageClass(Id resultId) const { return getTypeStorageClass(getTypeId(resultId)); } - - int getScalarTypeWidth(Id typeId) const - { - Id scalarTypeId = getScalarTypeId(typeId); - assert(getTypeClass(scalarTypeId) == OpTypeInt || getTypeClass(scalarTypeId) == OpTypeFloat); - return module.getInstruction(scalarTypeId)->getImmediateOperand(0); - } - - int getTypeNumColumns(Id typeId) const - { - assert(isMatrixType(typeId)); - return getNumTypeConstituents(typeId); - } - int getNumColumns(Id resultId) const { return getTypeNumColumns(getTypeId(resultId)); } - int getTypeNumRows(Id typeId) const - { - assert(isMatrixType(typeId)); - return getNumTypeComponents(getContainedTypeId(typeId)); - } - int getNumRows(Id resultId) const { return getTypeNumRows(getTypeId(resultId)); } - - Dim getTypeDimensionality(Id typeId) const - { - assert(isImageType(typeId)); - return (Dim)module.getInstruction(typeId)->getImmediateOperand(1); - } - Id getImageType(Id resultId) const - { - Id typeId = getTypeId(resultId); - assert(isImageType(typeId) || isSampledImageType(typeId)); - return isSampledImageType(typeId) ? module.getInstruction(typeId)->getIdOperand(0) : typeId; - } - bool isArrayedImageType(Id typeId) const - { - assert(isImageType(typeId)); - return module.getInstruction(typeId)->getImmediateOperand(3) != 0; - } - - // For making new constants (will return old constant if the requested one was already made). - Id makeBoolConstant(bool b, bool specConstant = false); - Id makeIntConstant(int i, bool specConstant = false) { return makeIntConstant(makeIntType(32), (unsigned)i, specConstant); } - Id makeUintConstant(unsigned u, bool specConstant = false) { return makeIntConstant(makeUintType(32), u, specConstant); } - Id makeInt64Constant(long long i, bool specConstant = false) { return makeInt64Constant(makeIntType(64), (unsigned long long)i, specConstant); } - Id makeUint64Constant(unsigned long long u, bool specConstant = false) { return makeInt64Constant(makeUintType(64), u, specConstant); } -#ifdef AMD_EXTENSIONS - Id makeInt16Constant(short i, bool specConstant = false) { return makeIntConstant(makeIntType(16), (unsigned)((unsigned short)i), specConstant); } - Id makeUint16Constant(unsigned short u, bool specConstant = false) { return makeIntConstant(makeUintType(16), (unsigned)u, specConstant); } -#endif - Id makeFloatConstant(float f, bool specConstant = false); - Id makeDoubleConstant(double d, bool specConstant = false); -#ifdef AMD_EXTENSIONS - Id makeFloat16Constant(float f16, bool specConstant = false); -#endif - - // Turn the array of constants into a proper spv constant of the requested type. - Id makeCompositeConstant(Id type, const std::vector& comps, bool specConst = false); - - // Methods for adding information outside the CFG. - Instruction* addEntryPoint(ExecutionModel, Function*, const char* name); - void addExecutionMode(Function*, ExecutionMode mode, int value1 = -1, int value2 = -1, int value3 = -1); - void addName(Id, const char* name); - void addMemberName(Id, int member, const char* name); - void addDecoration(Id, Decoration, int num = -1); - void addMemberDecoration(Id, unsigned int member, Decoration, int num = -1); - - // At the end of what block do the next create*() instructions go? - void setBuildPoint(Block* bp) { buildPoint = bp; } - Block* getBuildPoint() const { return buildPoint; } - - // Make the entry-point function. The returned pointer is only valid - // for the lifetime of this builder. - Function* makeEntryPoint(const char*); - - // Make a shader-style function, and create its entry block if entry is non-zero. - // Return the function, pass back the entry. - // The returned pointer is only valid for the lifetime of this builder. - Function* makeFunctionEntry(Decoration precision, Id returnType, const char* name, const std::vector& paramTypes, - const std::vector>& precisions, Block **entry = 0); - - // Create a return. An 'implicit' return is one not appearing in the source - // code. In the case of an implicit return, no post-return block is inserted. - void makeReturn(bool implicit, Id retVal = 0); - - // Generate all the code needed to finish up a function. - void leaveFunction(); - - // Create a discard. - void makeDiscard(); - - // Create a global or function local or IO variable. - Id createVariable(StorageClass, Id type, const char* name = 0); - - // Create an intermediate with an undefined value. - Id createUndefined(Id type); - - // Store into an Id and return the l-value - void createStore(Id rValue, Id lValue); - - // Load from an Id and return it - Id createLoad(Id lValue); - - // Create an OpAccessChain instruction - Id createAccessChain(StorageClass, Id base, const std::vector& offsets); - - // Create an OpArrayLength instruction - Id createArrayLength(Id base, unsigned int member); - - // Create an OpCompositeExtract instruction - Id createCompositeExtract(Id composite, Id typeId, unsigned index); - Id createCompositeExtract(Id composite, Id typeId, const std::vector& indexes); - Id createCompositeInsert(Id object, Id composite, Id typeId, unsigned index); - Id createCompositeInsert(Id object, Id composite, Id typeId, const std::vector& indexes); - - Id createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex); - Id createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex); - - void createNoResultOp(Op); - void createNoResultOp(Op, Id operand); - void createNoResultOp(Op, const std::vector& operands); - void createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask); - void createMemoryBarrier(unsigned executionScope, unsigned memorySemantics); - Id createUnaryOp(Op, Id typeId, Id operand); - Id createBinOp(Op, Id typeId, Id operand1, Id operand2); - Id createTriOp(Op, Id typeId, Id operand1, Id operand2, Id operand3); - Id createOp(Op, Id typeId, const std::vector& operands); - Id createFunctionCall(spv::Function*, const std::vector&); - Id createSpecConstantOp(Op, Id typeId, const std::vector& operands, const std::vector& literals); - - // Take an rvalue (source) and a set of channels to extract from it to - // make a new rvalue, which is returned. - Id createRvalueSwizzle(Decoration precision, Id typeId, Id source, const std::vector& channels); - - // Take a copy of an lvalue (target) and a source of components, and set the - // source components into the lvalue where the 'channels' say to put them. - // An updated version of the target is returned. - // (No true lvalue or stores are used.) - Id createLvalueSwizzle(Id typeId, Id target, Id source, const std::vector& channels); - - // If both the id and precision are valid, the id - // gets tagged with the requested precision. - // The passed in id is always the returned id, to simplify use patterns. - Id setPrecision(Id id, Decoration precision) - { - if (precision != NoPrecision && id != NoResult) - addDecoration(id, precision); - - return id; - } - - // Can smear a scalar to a vector for the following forms: - // - promoteScalar(scalar, vector) // smear scalar to width of vector - // - promoteScalar(vector, scalar) // smear scalar to width of vector - // - promoteScalar(pointer, scalar) // smear scalar to width of what pointer points to - // - promoteScalar(scalar, scalar) // do nothing - // Other forms are not allowed. - // - // Generally, the type of 'scalar' does not need to be the same type as the components in 'vector'. - // The type of the created vector is a vector of components of the same type as the scalar. - // - // Note: One of the arguments will change, with the result coming back that way rather than - // through the return value. - void promoteScalar(Decoration precision, Id& left, Id& right); - - // Make a value by smearing the scalar to fill the type. - // vectorType should be the correct type for making a vector of scalarVal. - // (No conversions are done.) - Id smearScalar(Decoration precision, Id scalarVal, Id vectorType); - - // Create a call to a built-in function. - Id createBuiltinCall(Id resultType, Id builtins, int entryPoint, const std::vector& args); - - // List of parameters used to create a texture operation - struct TextureParameters { - Id sampler; - Id coords; - Id bias; - Id lod; - Id Dref; - Id offset; - Id offsets; - Id gradX; - Id gradY; - Id sample; - Id component; - Id texelOut; - Id lodClamp; - }; - - // Select the correct texture operation based on all inputs, and emit the correct instruction - Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, bool noImplicit, const TextureParameters&); - - // Emit the OpTextureQuery* instruction that was passed in. - // Figure out the right return value and type, and return it. - Id createTextureQueryCall(Op, const TextureParameters&, bool isUnsignedResult); - - Id createSamplePositionCall(Decoration precision, Id, Id); - - Id createBitFieldExtractCall(Decoration precision, Id, Id, Id, bool isSigned); - Id createBitFieldInsertCall(Decoration precision, Id, Id, Id, Id); - - // Reduction comparison for composites: For equal and not-equal resulting in a scalar. - Id createCompositeCompare(Decoration precision, Id, Id, bool /* true if for equal, false if for not-equal */); - - // OpCompositeConstruct - Id createCompositeConstruct(Id typeId, const std::vector& constituents); - - // vector or scalar constructor - Id createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId); - - // matrix constructor - Id createMatrixConstructor(Decoration precision, const std::vector& sources, Id constructee); - - // Helper to use for building nested control flow with if-then-else. - class If { - public: - If(Id condition, unsigned int ctrl, Builder& builder); - ~If() {} - - void makeBeginElse(); - void makeEndIf(); - - private: - If(const If&); - If& operator=(If&); - - Builder& builder; - Id condition; - unsigned int control; - Function* function; - Block* headerBlock; - Block* thenBlock; - Block* elseBlock; - Block* mergeBlock; - }; - - // Make a switch statement. A switch has 'numSegments' of pieces of code, not containing - // any case/default labels, all separated by one or more case/default labels. Each possible - // case value v is a jump to the caseValues[v] segment. The defaultSegment is also in this - // number space. How to compute the value is given by 'condition', as in switch(condition). - // - // The SPIR-V Builder will maintain the stack of post-switch merge blocks for nested switches. - // - // Use a defaultSegment < 0 if there is no default segment (to branch to post switch). - // - // Returns the right set of basic blocks to start each code segment with, so that the caller's - // recursion stack can hold the memory for it. - // - void makeSwitch(Id condition, unsigned int control, int numSegments, const std::vector& caseValues, - const std::vector& valueToSegment, int defaultSegment, std::vector& segmentBB); // return argument - - // Add a branch to the innermost switch's merge block. - void addSwitchBreak(); - - // Move to the next code segment, passing in the return argument in makeSwitch() - void nextSwitchSegment(std::vector& segmentBB, int segment); - - // Finish off the innermost switch. - void endSwitch(std::vector& segmentBB); - - struct LoopBlocks { - LoopBlocks(Block& head, Block& body, Block& merge, Block& continue_target) : - head(head), body(body), merge(merge), continue_target(continue_target) { } - Block &head, &body, &merge, &continue_target; - private: - LoopBlocks(); - LoopBlocks& operator=(const LoopBlocks&); - }; - - // Start a new loop and prepare the builder to generate code for it. Until - // closeLoop() is called for this loop, createLoopContinue() and - // createLoopExit() will target its corresponding blocks. - LoopBlocks& makeNewLoop(); - - // Create a new block in the function containing the build point. Memory is - // owned by the function object. - Block& makeNewBlock(); - - // Add a branch to the continue_target of the current (innermost) loop. - void createLoopContinue(); - - // Add an exit (e.g. "break") from the innermost loop that we're currently - // in. - void createLoopExit(); - - // Close the innermost loop that you're in - void closeLoop(); - - // - // Access chain design for an R-Value vs. L-Value: - // - // There is a single access chain the builder is building at - // any particular time. Such a chain can be used to either to a load or - // a store, when desired. - // - // Expressions can be r-values, l-values, or both, or only r-values: - // a[b.c].d = .... // l-value - // ... = a[b.c].d; // r-value, that also looks like an l-value - // ++a[b.c].d; // r-value and l-value - // (x + y)[2]; // r-value only, can't possibly be l-value - // - // Computing an r-value means generating code. Hence, - // r-values should only be computed when they are needed, not speculatively. - // - // Computing an l-value means saving away information for later use in the compiler, - // no code is generated until the l-value is later dereferenced. It is okay - // to speculatively generate an l-value, just not okay to speculatively dereference it. - // - // The base of the access chain (the left-most variable or expression - // from which everything is based) can be set either as an l-value - // or as an r-value. Most efficient would be to set an l-value if one - // is available. If an expression was evaluated, the resulting r-value - // can be set as the chain base. - // - // The users of this single access chain can save and restore if they - // want to nest or manage multiple chains. - // - - struct AccessChain { - Id base; // for l-values, pointer to the base object, for r-values, the base object - std::vector indexChain; - Id instr; // cache the instruction that generates this access chain - std::vector swizzle; // each std::vector element selects the next GLSL component number - Id component; // a dynamic component index, can coexist with a swizzle, done after the swizzle, NoResult if not present - Id preSwizzleBaseType; // dereferenced type, before swizzle or component is applied; NoType unless a swizzle or component is present - bool isRValue; // true if 'base' is an r-value, otherwise, base is an l-value - }; - - // - // the SPIR-V builder maintains a single active chain that - // the following methods operate on - // - - // for external save and restore - AccessChain getAccessChain() { return accessChain; } - void setAccessChain(AccessChain newChain) { accessChain = newChain; } - - // clear accessChain - void clearAccessChain(); - - // set new base as an l-value base - void setAccessChainLValue(Id lValue) - { - assert(isPointer(lValue)); - accessChain.base = lValue; - } - - // set new base value as an r-value - void setAccessChainRValue(Id rValue) - { - accessChain.isRValue = true; - accessChain.base = rValue; - } - - // push offset onto the end of the chain - void accessChainPush(Id offset) - { - accessChain.indexChain.push_back(offset); - } - - // push new swizzle onto the end of any existing swizzle, merging into a single swizzle - void accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType); - - // push a dynamic component selection onto the access chain, only applicable with a - // non-trivial swizzle or no swizzle - void accessChainPushComponent(Id component, Id preSwizzleBaseType) - { - if (accessChain.swizzle.size() != 1) { - accessChain.component = component; - if (accessChain.preSwizzleBaseType == NoType) - accessChain.preSwizzleBaseType = preSwizzleBaseType; - } - } - - // use accessChain and swizzle to store value - void accessChainStore(Id rvalue); - - // use accessChain and swizzle to load an r-value - Id accessChainLoad(Decoration precision, Id ResultType); - - // get the direct pointer for an l-value - Id accessChainGetLValue(); - - // Get the inferred SPIR-V type of the result of the current access chain, - // based on the type of the base and the chain of dereferences. - Id accessChainGetInferredType(); - - // Remove OpDecorate instructions whose operands are defined in unreachable - // blocks. - void eliminateDeadDecorations(); - void dump(std::vector&) const; - - void createBranch(Block* block); - void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock); - void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control, unsigned int dependencyLength); - void createSelectionMerge(Block* mergeBlock, unsigned int control); - - // Sets to generate opcode for specialization constants. - void setToSpecConstCodeGenMode() { generatingOpCodeForSpecConst = true; } - // Sets to generate opcode for non-specialization constants (normal mode). - void setToNormalCodeGenMode() { generatingOpCodeForSpecConst = false; } - // Check if the builder is generating code for spec constants. - bool isInSpecConstCodeGenMode() { return generatingOpCodeForSpecConst; } - - protected: - Id makeIntConstant(Id typeId, unsigned value, bool specConstant); - Id makeInt64Constant(Id typeId, unsigned long long value, bool specConstant); - Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value) const; - Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2) const; - Id findCompositeConstant(Op typeClass, const std::vector& comps) const; - Id collapseAccessChain(); - void remapDynamicSwizzle(); - void transferAccessChainSwizzle(bool dynamic); - void simplifyAccessChainSwizzle(); - void createAndSetNoPredecessorBlock(const char*); - void dumpSourceInstructions(std::vector&) const; - void dumpInstructions(std::vector&, const std::vector >&) const; - void dumpModuleProcesses(std::vector&) const; - - unsigned int spvVersion; // the version of SPIR-V to emit in the header - SourceLanguage source; - int sourceVersion; - spv::Id sourceFileStringId; - std::string sourceText; - int currentLine; - bool emitOpLines; - std::set extensions; - std::vector sourceExtensions; - std::vector moduleProcesses; - AddressingModel addressModel; - MemoryModel memoryModel; - std::set capabilities; - int builderNumber; - Module module; - Block* buildPoint; - Id uniqueId; - Function* entryPointFunction; - bool generatingOpCodeForSpecConst; - AccessChain accessChain; - - // special blocks of instructions for output - std::vector > strings; - std::vector > imports; - std::vector > entryPoints; - std::vector > executionModes; - std::vector > names; - std::vector > lines; - std::vector > decorations; - std::vector > constantsTypesGlobals; - std::vector > externals; - std::vector > functions; - - // not output, internally used for quick & dirty canonical (unique) creation - std::vector groupedConstants[OpConstant]; // all types appear before OpConstant - std::vector groupedTypes[OpConstant]; - - // stack of switches - std::stack switchMerges; - - // Our loop stack. - std::stack loops; - - // The stream for outputting warnings and errors. - SpvBuildLogger* logger; -}; // end Builder class - -}; // end spv namespace - -#endif // SpvBuilder_H diff --git a/third_party/glslang-spirv/bitutils.h b/third_party/glslang-spirv/bitutils.h deleted file mode 100644 index 31288ab69..000000000 --- a/third_party/glslang-spirv/bitutils.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2015-2016 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef LIBSPIRV_UTIL_BITUTILS_H_ -#define LIBSPIRV_UTIL_BITUTILS_H_ - -#include -#include - -namespace spvutils { - -// Performs a bitwise copy of source to the destination type Dest. -template -Dest BitwiseCast(Src source) { - Dest dest; - static_assert(sizeof(source) == sizeof(dest), - "BitwiseCast: Source and destination must have the same size"); - std::memcpy(&dest, &source, sizeof(dest)); - return dest; -} - -// SetBits returns an integer of type with bits set -// for position through , counting from the least -// significant bit. In particular when Num == 0, no positions are set to 1. -// A static assert will be triggered if First + Num > sizeof(T) * 8, that is, -// a bit that will not fit in the underlying type is set. -template -struct SetBits { - static_assert(First < sizeof(T) * 8, - "Tried to set a bit that is shifted too far."); - const static T get = (T(1) << First) | SetBits::get; -}; - -template -struct SetBits { - const static T get = T(0); -}; - -// This is all compile-time so we can put our tests right here. -static_assert(SetBits::get == uint32_t(0x00000000), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0x00000001), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0x80000000), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0x00000006), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0xc0000000), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0x7FFFFFFF), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0xFFFFFFFF), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0xFFFF0000), - "SetBits failed"); - -static_assert(SetBits::get == uint64_t(0x0000000000000001LL), - "SetBits failed"); -static_assert(SetBits::get == uint64_t(0x8000000000000000LL), - "SetBits failed"); -static_assert(SetBits::get == uint64_t(0xc000000000000000LL), - "SetBits failed"); -static_assert(SetBits::get == uint64_t(0x0000000080000000LL), - "SetBits failed"); -static_assert(SetBits::get == uint64_t(0x00000000FFFF0000LL), - "SetBits failed"); - -} // namespace spvutils - -#endif // LIBSPIRV_UTIL_BITUTILS_H_ diff --git a/third_party/glslang-spirv/disassemble.cpp b/third_party/glslang-spirv/disassemble.cpp deleted file mode 100644 index c950a66a0..000000000 --- a/third_party/glslang-spirv/disassemble.cpp +++ /dev/null @@ -1,695 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Disassembler for SPIR-V. -// - -#include -#include -#include -#include -#include -#include -#include - -#include "disassemble.h" -#include "doc.h" - -namespace spv { - extern "C" { - // Include C-based headers that don't have a namespace - #include "GLSL.std.450.h" -#ifdef AMD_EXTENSIONS - #include "GLSL.ext.AMD.h" -#endif -#ifdef NV_EXTENSIONS - #include "GLSL.ext.NV.h" -#endif - } -} -const char* GlslStd450DebugNames[spv::GLSLstd450Count]; - -namespace spv { - -#ifdef AMD_EXTENSIONS -static const char* GLSLextAMDGetDebugNames(const char*, unsigned); -#endif - -#ifdef NV_EXTENSIONS -static const char* GLSLextNVGetDebugNames(const char*, unsigned); -#endif - -static void Kill(std::ostream& out, const char* message) -{ - out << std::endl << "Disassembly failed: " << message << std::endl; - exit(1); -} - -// used to identify the extended instruction library imported when printing -enum ExtInstSet { - GLSL450Inst, -#ifdef AMD_EXTENSIONS - GLSLextAMDInst, -#endif -#ifdef NV_EXTENSIONS - GLSLextNVInst, -#endif - OpenCLExtInst, -}; - -// Container class for a single instance of a SPIR-V stream, with methods for disassembly. -class SpirvStream { -public: - SpirvStream(std::ostream& out, const std::vector& stream) : out(out), stream(stream), word(0), nextNestedControl(0) { } - virtual ~SpirvStream() { } - - void validate(); - void processInstructions(); - -protected: - SpirvStream(const SpirvStream&); - SpirvStream& operator=(const SpirvStream&); - Op getOpCode(int id) const { return idInstruction[id] ? (Op)(stream[idInstruction[id]] & OpCodeMask) : OpNop; } - - // Output methods - void outputIndent(); - void formatId(Id id, std::stringstream&); - void outputResultId(Id id); - void outputTypeId(Id id); - void outputId(Id id); - void outputMask(OperandClass operandClass, unsigned mask); - void disassembleImmediates(int numOperands); - void disassembleIds(int numOperands); - int disassembleString(); - void disassembleInstruction(Id resultId, Id typeId, Op opCode, int numOperands); - - // Data - std::ostream& out; // where to write the disassembly - const std::vector& stream; // the actual word stream - int size; // the size of the word stream - int word; // the next word of the stream to read - - // map each to the instruction that created it - Id bound; - std::vector idInstruction; // the word offset into the stream where the instruction for result [id] starts; 0 if not yet seen (forward reference or function parameter) - - std::vector idDescriptor; // the best text string known for explaining the - - // schema - unsigned int schema; - - // stack of structured-merge points - std::stack nestedControl; - Id nextNestedControl; // need a slight delay for when we are nested -}; - -void SpirvStream::validate() -{ - size = (int)stream.size(); - if (size < 4) - Kill(out, "stream is too short"); - - // Magic number - if (stream[word++] != MagicNumber) { - out << "Bad magic number"; - return; - } - - // Version - out << "// Module Version " << std::hex << stream[word++] << std::endl; - - // Generator's magic number - out << "// Generated by (magic number): " << std::hex << stream[word++] << std::dec << std::endl; - - // Result bound - bound = stream[word++]; - idInstruction.resize(bound); - idDescriptor.resize(bound); - out << "// Id's are bound by " << bound << std::endl; - out << std::endl; - - // Reserved schema, must be 0 for now - schema = stream[word++]; - if (schema != 0) - Kill(out, "bad schema, must be 0"); -} - -// Loop over all the instructions, in order, processing each. -// Boiler plate for each is handled here directly, the rest is dispatched. -void SpirvStream::processInstructions() -{ - // Instructions - while (word < size) { - int instructionStart = word; - - // Instruction wordCount and opcode - unsigned int firstWord = stream[word]; - unsigned wordCount = firstWord >> WordCountShift; - Op opCode = (Op)(firstWord & OpCodeMask); - int nextInst = word + wordCount; - ++word; - - // Presence of full instruction - if (nextInst > size) - Kill(out, "stream instruction terminated too early"); - - // Base for computing number of operands; will be updated as more is learned - unsigned numOperands = wordCount - 1; - - // Type - Id typeId = 0; - if (InstructionDesc[opCode].hasType()) { - typeId = stream[word++]; - --numOperands; - } - - // Result - Id resultId = 0; - if (InstructionDesc[opCode].hasResult()) { - resultId = stream[word++]; - --numOperands; - - // save instruction for future reference - idInstruction[resultId] = instructionStart; - } - - outputResultId(resultId); - outputTypeId(typeId); - outputIndent(); - - // Hand off the Op and all its operands - disassembleInstruction(resultId, typeId, opCode, numOperands); - if (word != nextInst) { - out << " ERROR, incorrect number of operands consumed. At " << word << " instead of " << nextInst << " instruction start was " << instructionStart; - word = nextInst; - } - out << std::endl; - } -} - -void SpirvStream::outputIndent() -{ - for (int i = 0; i < (int)nestedControl.size(); ++i) - out << " "; -} - -void SpirvStream::formatId(Id id, std::stringstream& idStream) -{ - if (id != 0) { - // On instructions with no IDs, this is called with "0", which does not - // have to be within ID bounds on null shaders. - if (id >= bound) - Kill(out, "Bad "); - - idStream << id; - if (idDescriptor[id].size() > 0) - idStream << "(" << idDescriptor[id] << ")"; - } -} - -void SpirvStream::outputResultId(Id id) -{ - const int width = 16; - std::stringstream idStream; - formatId(id, idStream); - out << std::setw(width) << std::right << idStream.str(); - if (id != 0) - out << ":"; - else - out << " "; - - if (nestedControl.size() && id == nestedControl.top()) - nestedControl.pop(); -} - -void SpirvStream::outputTypeId(Id id) -{ - const int width = 12; - std::stringstream idStream; - formatId(id, idStream); - out << std::setw(width) << std::right << idStream.str() << " "; -} - -void SpirvStream::outputId(Id id) -{ - if (id >= bound) - Kill(out, "Bad "); - - out << id; - if (idDescriptor[id].size() > 0) - out << "(" << idDescriptor[id] << ")"; -} - -void SpirvStream::outputMask(OperandClass operandClass, unsigned mask) -{ - if (mask == 0) - out << "None"; - else { - for (int m = 0; m < OperandClassParams[operandClass].ceiling; ++m) { - if (mask & (1 << m)) - out << OperandClassParams[operandClass].getName(m) << " "; - } - } -} - -void SpirvStream::disassembleImmediates(int numOperands) -{ - for (int i = 0; i < numOperands; ++i) { - out << stream[word++]; - if (i < numOperands - 1) - out << " "; - } -} - -void SpirvStream::disassembleIds(int numOperands) -{ - for (int i = 0; i < numOperands; ++i) { - outputId(stream[word++]); - if (i < numOperands - 1) - out << " "; - } -} - -// return the number of operands consumed by the string -int SpirvStream::disassembleString() -{ - int startWord = word; - - out << " \""; - - const char* wordString; - bool done = false; - do { - unsigned int content = stream[word]; - wordString = (const char*)&content; - for (int charCount = 0; charCount < 4; ++charCount) { - if (*wordString == 0) { - done = true; - break; - } - out << *(wordString++); - } - ++word; - } while (! done); - - out << "\""; - - return word - startWord; -} - -void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode, int numOperands) -{ - // Process the opcode - - out << (OpcodeString(opCode) + 2); // leave out the "Op" - - if (opCode == OpLoopMerge || opCode == OpSelectionMerge) - nextNestedControl = stream[word]; - else if (opCode == OpBranchConditional || opCode == OpSwitch) { - if (nextNestedControl) { - nestedControl.push(nextNestedControl); - nextNestedControl = 0; - } - } else if (opCode == OpExtInstImport) { - idDescriptor[resultId] = (const char*)(&stream[word]); - } - else { - if (resultId != 0 && idDescriptor[resultId].size() == 0) { - switch (opCode) { - case OpTypeInt: - idDescriptor[resultId] = "int"; - break; - case OpTypeFloat: - idDescriptor[resultId] = "float"; - break; - case OpTypeBool: - idDescriptor[resultId] = "bool"; - break; - case OpTypeStruct: - idDescriptor[resultId] = "struct"; - break; - case OpTypePointer: - idDescriptor[resultId] = "ptr"; - break; - case OpTypeVector: - if (idDescriptor[stream[word]].size() > 0) - idDescriptor[resultId].append(idDescriptor[stream[word]].begin(), idDescriptor[stream[word]].begin() + 1); - idDescriptor[resultId].append("vec"); - switch (stream[word + 1]) { - case 2: idDescriptor[resultId].append("2"); break; - case 3: idDescriptor[resultId].append("3"); break; - case 4: idDescriptor[resultId].append("4"); break; - case 8: idDescriptor[resultId].append("8"); break; - case 16: idDescriptor[resultId].append("16"); break; - case 32: idDescriptor[resultId].append("32"); break; - default: break; - } - break; - default: - break; - } - } - } - - // Process the operands. Note, a new context-dependent set could be - // swapped in mid-traversal. - - // Handle images specially, so can put out helpful strings. - if (opCode == OpTypeImage) { - out << " "; - disassembleIds(1); - out << " " << DimensionString((Dim)stream[word++]); - out << (stream[word++] != 0 ? " depth" : ""); - out << (stream[word++] != 0 ? " array" : ""); - out << (stream[word++] != 0 ? " multi-sampled" : ""); - switch (stream[word++]) { - case 0: out << " runtime"; break; - case 1: out << " sampled"; break; - case 2: out << " nonsampled"; break; - } - out << " format:" << ImageFormatString((ImageFormat)stream[word++]); - - if (numOperands == 8) { - out << " " << AccessQualifierString(stream[word++]); - } - return; - } - - // Handle all the parameterized operands - for (int op = 0; op < InstructionDesc[opCode].operands.getNum() && numOperands > 0; ++op) { - out << " "; - OperandClass operandClass = InstructionDesc[opCode].operands.getClass(op); - switch (operandClass) { - case OperandId: - case OperandScope: - case OperandMemorySemantics: - disassembleIds(1); - --numOperands; - // Get names for printing "(XXX)" for readability, *after* this id - if (opCode == OpName) - idDescriptor[stream[word - 1]] = (const char*)(&stream[word]); - break; - case OperandVariableIds: - disassembleIds(numOperands); - return; - case OperandImageOperands: - outputMask(OperandImageOperands, stream[word++]); - --numOperands; - disassembleIds(numOperands); - return; - case OperandOptionalLiteral: - case OperandVariableLiterals: - if ((opCode == OpDecorate && stream[word - 1] == DecorationBuiltIn) || - (opCode == OpMemberDecorate && stream[word - 1] == DecorationBuiltIn)) { - out << BuiltInString(stream[word++]); - --numOperands; - ++op; - } - disassembleImmediates(numOperands); - return; - case OperandVariableIdLiteral: - while (numOperands > 0) { - out << std::endl; - outputResultId(0); - outputTypeId(0); - outputIndent(); - out << " Type "; - disassembleIds(1); - out << ", member "; - disassembleImmediates(1); - numOperands -= 2; - } - return; - case OperandVariableLiteralId: - while (numOperands > 0) { - out << std::endl; - outputResultId(0); - outputTypeId(0); - outputIndent(); - out << " case "; - disassembleImmediates(1); - out << ": "; - disassembleIds(1); - numOperands -= 2; - } - return; - case OperandLiteralNumber: - disassembleImmediates(1); - --numOperands; - if (opCode == OpExtInst) { - ExtInstSet extInstSet = GLSL450Inst; - const char* name = idDescriptor[stream[word - 2]].c_str(); - if (0 == memcmp("OpenCL", name, 6)) { - extInstSet = OpenCLExtInst; -#ifdef AMD_EXTENSIONS - } else if (strcmp(spv::E_SPV_AMD_shader_ballot, name) == 0 || - strcmp(spv::E_SPV_AMD_shader_trinary_minmax, name) == 0 || - strcmp(spv::E_SPV_AMD_shader_explicit_vertex_parameter, name) == 0 || - strcmp(spv::E_SPV_AMD_gcn_shader, name) == 0) { - extInstSet = GLSLextAMDInst; -#endif -#ifdef NV_EXTENSIONS - }else if (strcmp(spv::E_SPV_NV_sample_mask_override_coverage, name) == 0 || - strcmp(spv::E_SPV_NV_geometry_shader_passthrough, name) == 0 || - strcmp(spv::E_SPV_NV_viewport_array2, name) == 0 || - strcmp(spv::E_SPV_NVX_multiview_per_view_attributes, name) == 0) { - extInstSet = GLSLextNVInst; -#endif - } - unsigned entrypoint = stream[word - 1]; - if (extInstSet == GLSL450Inst) { - if (entrypoint < GLSLstd450Count) { - out << "(" << GlslStd450DebugNames[entrypoint] << ")"; - } -#ifdef AMD_EXTENSIONS - } else if (extInstSet == GLSLextAMDInst) { - out << "(" << GLSLextAMDGetDebugNames(name, entrypoint) << ")"; -#endif -#ifdef NV_EXTENSIONS - } - else if (extInstSet == GLSLextNVInst) { - out << "(" << GLSLextNVGetDebugNames(name, entrypoint) << ")"; -#endif - } - } - break; - case OperandOptionalLiteralString: - case OperandLiteralString: - numOperands -= disassembleString(); - break; - default: - assert(operandClass >= OperandSource && operandClass < OperandOpcode); - - if (OperandClassParams[operandClass].bitmask) - outputMask(operandClass, stream[word++]); - else - out << OperandClassParams[operandClass].getName(stream[word++]); - --numOperands; - - break; - } - } - - return; -} - -static void GLSLstd450GetDebugNames(const char** names) -{ - for (int i = 0; i < GLSLstd450Count; ++i) - names[i] = "Unknown"; - - names[GLSLstd450Round] = "Round"; - names[GLSLstd450RoundEven] = "RoundEven"; - names[GLSLstd450Trunc] = "Trunc"; - names[GLSLstd450FAbs] = "FAbs"; - names[GLSLstd450SAbs] = "SAbs"; - names[GLSLstd450FSign] = "FSign"; - names[GLSLstd450SSign] = "SSign"; - names[GLSLstd450Floor] = "Floor"; - names[GLSLstd450Ceil] = "Ceil"; - names[GLSLstd450Fract] = "Fract"; - names[GLSLstd450Radians] = "Radians"; - names[GLSLstd450Degrees] = "Degrees"; - names[GLSLstd450Sin] = "Sin"; - names[GLSLstd450Cos] = "Cos"; - names[GLSLstd450Tan] = "Tan"; - names[GLSLstd450Asin] = "Asin"; - names[GLSLstd450Acos] = "Acos"; - names[GLSLstd450Atan] = "Atan"; - names[GLSLstd450Sinh] = "Sinh"; - names[GLSLstd450Cosh] = "Cosh"; - names[GLSLstd450Tanh] = "Tanh"; - names[GLSLstd450Asinh] = "Asinh"; - names[GLSLstd450Acosh] = "Acosh"; - names[GLSLstd450Atanh] = "Atanh"; - names[GLSLstd450Atan2] = "Atan2"; - names[GLSLstd450Pow] = "Pow"; - names[GLSLstd450Exp] = "Exp"; - names[GLSLstd450Log] = "Log"; - names[GLSLstd450Exp2] = "Exp2"; - names[GLSLstd450Log2] = "Log2"; - names[GLSLstd450Sqrt] = "Sqrt"; - names[GLSLstd450InverseSqrt] = "InverseSqrt"; - names[GLSLstd450Determinant] = "Determinant"; - names[GLSLstd450MatrixInverse] = "MatrixInverse"; - names[GLSLstd450Modf] = "Modf"; - names[GLSLstd450ModfStruct] = "ModfStruct"; - names[GLSLstd450FMin] = "FMin"; - names[GLSLstd450SMin] = "SMin"; - names[GLSLstd450UMin] = "UMin"; - names[GLSLstd450FMax] = "FMax"; - names[GLSLstd450SMax] = "SMax"; - names[GLSLstd450UMax] = "UMax"; - names[GLSLstd450FClamp] = "FClamp"; - names[GLSLstd450SClamp] = "SClamp"; - names[GLSLstd450UClamp] = "UClamp"; - names[GLSLstd450FMix] = "FMix"; - names[GLSLstd450Step] = "Step"; - names[GLSLstd450SmoothStep] = "SmoothStep"; - names[GLSLstd450Fma] = "Fma"; - names[GLSLstd450Frexp] = "Frexp"; - names[GLSLstd450FrexpStruct] = "FrexpStruct"; - names[GLSLstd450Ldexp] = "Ldexp"; - names[GLSLstd450PackSnorm4x8] = "PackSnorm4x8"; - names[GLSLstd450PackUnorm4x8] = "PackUnorm4x8"; - names[GLSLstd450PackSnorm2x16] = "PackSnorm2x16"; - names[GLSLstd450PackUnorm2x16] = "PackUnorm2x16"; - names[GLSLstd450PackHalf2x16] = "PackHalf2x16"; - names[GLSLstd450PackDouble2x32] = "PackDouble2x32"; - names[GLSLstd450UnpackSnorm2x16] = "UnpackSnorm2x16"; - names[GLSLstd450UnpackUnorm2x16] = "UnpackUnorm2x16"; - names[GLSLstd450UnpackHalf2x16] = "UnpackHalf2x16"; - names[GLSLstd450UnpackSnorm4x8] = "UnpackSnorm4x8"; - names[GLSLstd450UnpackUnorm4x8] = "UnpackUnorm4x8"; - names[GLSLstd450UnpackDouble2x32] = "UnpackDouble2x32"; - names[GLSLstd450Length] = "Length"; - names[GLSLstd450Distance] = "Distance"; - names[GLSLstd450Cross] = "Cross"; - names[GLSLstd450Normalize] = "Normalize"; - names[GLSLstd450FaceForward] = "FaceForward"; - names[GLSLstd450Reflect] = "Reflect"; - names[GLSLstd450Refract] = "Refract"; - names[GLSLstd450FindILsb] = "FindILsb"; - names[GLSLstd450FindSMsb] = "FindSMsb"; - names[GLSLstd450FindUMsb] = "FindUMsb"; - names[GLSLstd450InterpolateAtCentroid] = "InterpolateAtCentroid"; - names[GLSLstd450InterpolateAtSample] = "InterpolateAtSample"; - names[GLSLstd450InterpolateAtOffset] = "InterpolateAtOffset"; -} - -#ifdef AMD_EXTENSIONS -static const char* GLSLextAMDGetDebugNames(const char* name, unsigned entrypoint) -{ - if (strcmp(name, spv::E_SPV_AMD_shader_ballot) == 0) { - switch (entrypoint) { - case SwizzleInvocationsAMD: return "SwizzleInvocationsAMD"; - case SwizzleInvocationsMaskedAMD: return "SwizzleInvocationsMaskedAMD"; - case WriteInvocationAMD: return "WriteInvocationAMD"; - case MbcntAMD: return "MbcntAMD"; - default: return "Bad"; - } - } else if (strcmp(name, spv::E_SPV_AMD_shader_trinary_minmax) == 0) { - switch (entrypoint) { - case FMin3AMD: return "FMin3AMD"; - case UMin3AMD: return "UMin3AMD"; - case SMin3AMD: return "SMin3AMD"; - case FMax3AMD: return "FMax3AMD"; - case UMax3AMD: return "UMax3AMD"; - case SMax3AMD: return "SMax3AMD"; - case FMid3AMD: return "FMid3AMD"; - case UMid3AMD: return "UMid3AMD"; - case SMid3AMD: return "SMid3AMD"; - default: return "Bad"; - } - } else if (strcmp(name, spv::E_SPV_AMD_shader_explicit_vertex_parameter) == 0) { - switch (entrypoint) { - case InterpolateAtVertexAMD: return "InterpolateAtVertexAMD"; - default: return "Bad"; - } - } - else if (strcmp(name, spv::E_SPV_AMD_gcn_shader) == 0) { - switch (entrypoint) { - case CubeFaceIndexAMD: return "CubeFaceIndexAMD"; - case CubeFaceCoordAMD: return "CubeFaceCoordAMD"; - case TimeAMD: return "TimeAMD"; - default: - break; - } - } - - return "Bad"; -} -#endif - - -#ifdef NV_EXTENSIONS -static const char* GLSLextNVGetDebugNames(const char* name, unsigned entrypoint) -{ - if (strcmp(name, spv::E_SPV_NV_sample_mask_override_coverage) == 0 || - strcmp(name, spv::E_SPV_NV_geometry_shader_passthrough) == 0 || - strcmp(name, spv::E_ARB_shader_viewport_layer_array) == 0 || - strcmp(name, spv::E_SPV_NV_viewport_array2) == 0 || - strcmp(spv::E_SPV_NVX_multiview_per_view_attributes, name) == 0) { - switch (entrypoint) { - case DecorationOverrideCoverageNV: return "OverrideCoverageNV"; - case DecorationPassthroughNV: return "PassthroughNV"; - case CapabilityGeometryShaderPassthroughNV: return "GeometryShaderPassthroughNV"; - case DecorationViewportRelativeNV: return "ViewportRelativeNV"; - case BuiltInViewportMaskNV: return "ViewportMaskNV"; - case CapabilityShaderViewportMaskNV: return "ShaderViewportMaskNV"; - case DecorationSecondaryViewportRelativeNV: return "SecondaryViewportRelativeNV"; - case BuiltInSecondaryPositionNV: return "SecondaryPositionNV"; - case BuiltInSecondaryViewportMaskNV: return "SecondaryViewportMaskNV"; - case CapabilityShaderStereoViewNV: return "ShaderStereoViewNV"; - case BuiltInPositionPerViewNV: return "PositionPerViewNV"; - case BuiltInViewportMaskPerViewNV: return "ViewportMaskPerViewNV"; - case CapabilityPerViewAttributesNV: return "PerViewAttributesNV"; - default: return "Bad"; - } - } - return "Bad"; -} -#endif - -void Disassemble(std::ostream& out, const std::vector& stream) -{ - SpirvStream SpirvStream(out, stream); - spv::Parameterize(); - GLSLstd450GetDebugNames(GlslStd450DebugNames); - SpirvStream.validate(); - SpirvStream.processInstructions(); -} - -}; // end namespace spv diff --git a/third_party/glslang-spirv/disassemble.h b/third_party/glslang-spirv/disassemble.h deleted file mode 100644 index 47cef65a5..000000000 --- a/third_party/glslang-spirv/disassemble.h +++ /dev/null @@ -1,52 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Disassembler for SPIR-V. -// - -#pragma once -#ifndef disassembler_H -#define disassembler_H - -#include -#include - -namespace spv { - - void Disassemble(std::ostream& out, const std::vector&); - -}; // end namespace spv - -#endif // disassembler_H diff --git a/third_party/glslang-spirv/doc.cpp b/third_party/glslang-spirv/doc.cpp deleted file mode 100644 index 809af4c1c..000000000 --- a/third_party/glslang-spirv/doc.cpp +++ /dev/null @@ -1,2894 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// 1) Programmatically fill in instruction/operand information. -// This can be used for disassembly, printing documentation, etc. -// -// 2) Print documentation from this parameterization. -// - -#include "doc.h" - -#include -#include -#include - -namespace spv { - extern "C" { - // Include C-based headers that don't have a namespace - #include "GLSL.ext.KHR.h" - #include "GLSL.ext.EXT.h" -#ifdef AMD_EXTENSIONS - #include "GLSL.ext.AMD.h" -#endif -#ifdef NV_EXTENSIONS - #include "GLSL.ext.NV.h" -#endif - } -} - -namespace spv { - -// -// Whole set of functions that translate enumerants to their text strings for -// the specification (or their sanitized versions for auto-generating the -// spirv headers. -// -// Also, the ceilings are declared next to these, to help keep them in sync. -// Ceilings should be -// - one more than the maximum value an enumerant takes on, for non-mask enumerants -// (for non-sparse enums, this is the number of enumerants) -// - the number of bits consumed by the set of masks -// (for non-sparse mask enums, this is the number of enumerants) -// - -const int SourceLanguageCeiling = 6; // HLSL todo: need official enumerant - -const char* SourceString(int source) -{ - switch (source) { - case 0: return "Unknown"; - case 1: return "ESSL"; - case 2: return "GLSL"; - case 3: return "OpenCL_C"; - case 4: return "OpenCL_CPP"; - case 5: return "HLSL"; - - case SourceLanguageCeiling: - default: return "Bad"; - } -} - -const int ExecutionModelCeiling = 7; - -const char* ExecutionModelString(int model) -{ - switch (model) { - case 0: return "Vertex"; - case 1: return "TessellationControl"; - case 2: return "TessellationEvaluation"; - case 3: return "Geometry"; - case 4: return "Fragment"; - case 5: return "GLCompute"; - case 6: return "Kernel"; - - case ExecutionModelCeiling: - default: return "Bad"; - } -} - -const int AddressingModelCeiling = 3; - -const char* AddressingString(int addr) -{ - switch (addr) { - case 0: return "Logical"; - case 1: return "Physical32"; - case 2: return "Physical64"; - - case AddressingModelCeiling: - default: return "Bad"; - } -} - -const int MemoryModelCeiling = 3; - -const char* MemoryString(int mem) -{ - switch (mem) { - case 0: return "Simple"; - case 1: return "GLSL450"; - case 2: return "OpenCL"; - - case MemoryModelCeiling: - default: return "Bad"; - } -} - -const int ExecutionModeCeiling = 33; - -const char* ExecutionModeString(int mode) -{ - switch (mode) { - case 0: return "Invocations"; - case 1: return "SpacingEqual"; - case 2: return "SpacingFractionalEven"; - case 3: return "SpacingFractionalOdd"; - case 4: return "VertexOrderCw"; - case 5: return "VertexOrderCcw"; - case 6: return "PixelCenterInteger"; - case 7: return "OriginUpperLeft"; - case 8: return "OriginLowerLeft"; - case 9: return "EarlyFragmentTests"; - case 10: return "PointMode"; - case 11: return "Xfb"; - case 12: return "DepthReplacing"; - case 13: return "Bad"; - case 14: return "DepthGreater"; - case 15: return "DepthLess"; - case 16: return "DepthUnchanged"; - case 17: return "LocalSize"; - case 18: return "LocalSizeHint"; - case 19: return "InputPoints"; - case 20: return "InputLines"; - case 21: return "InputLinesAdjacency"; - case 22: return "Triangles"; - case 23: return "InputTrianglesAdjacency"; - case 24: return "Quads"; - case 25: return "Isolines"; - case 26: return "OutputVertices"; - case 27: return "OutputPoints"; - case 28: return "OutputLineStrip"; - case 29: return "OutputTriangleStrip"; - case 30: return "VecTypeHint"; - case 31: return "ContractionOff"; - case 32: return "Bad"; - - case 4446: return "PostDepthCoverage"; - case ExecutionModeCeiling: - default: return "Bad"; - } -} - -const int StorageClassCeiling = 13; - -const char* StorageClassString(int StorageClass) -{ - switch (StorageClass) { - case 0: return "UniformConstant"; - case 1: return "Input"; - case 2: return "Uniform"; - case 3: return "Output"; - case 4: return "Workgroup"; - case 5: return "CrossWorkgroup"; - case 6: return "Private"; - case 7: return "Function"; - case 8: return "Generic"; - case 9: return "PushConstant"; - case 10: return "AtomicCounter"; - case 11: return "Image"; - case 12: return "StorageBuffer"; - - case StorageClassCeiling: - default: return "Bad"; - } -} - -const int DecorationCeiling = 45; - -const char* DecorationString(int decoration) -{ - switch (decoration) { - case 0: return "RelaxedPrecision"; - case 1: return "SpecId"; - case 2: return "Block"; - case 3: return "BufferBlock"; - case 4: return "RowMajor"; - case 5: return "ColMajor"; - case 6: return "ArrayStride"; - case 7: return "MatrixStride"; - case 8: return "GLSLShared"; - case 9: return "GLSLPacked"; - case 10: return "CPacked"; - case 11: return "BuiltIn"; - case 12: return "Bad"; - case 13: return "NoPerspective"; - case 14: return "Flat"; - case 15: return "Patch"; - case 16: return "Centroid"; - case 17: return "Sample"; - case 18: return "Invariant"; - case 19: return "Restrict"; - case 20: return "Aliased"; - case 21: return "Volatile"; - case 22: return "Constant"; - case 23: return "Coherent"; - case 24: return "NonWritable"; - case 25: return "NonReadable"; - case 26: return "Uniform"; - case 27: return "Bad"; - case 28: return "SaturatedConversion"; - case 29: return "Stream"; - case 30: return "Location"; - case 31: return "Component"; - case 32: return "Index"; - case 33: return "Binding"; - case 34: return "DescriptorSet"; - case 35: return "Offset"; - case 36: return "XfbBuffer"; - case 37: return "XfbStride"; - case 38: return "FuncParamAttr"; - case 39: return "FP Rounding Mode"; - case 40: return "FP Fast Math Mode"; - case 41: return "Linkage Attributes"; - case 42: return "NoContraction"; - case 43: return "InputAttachmentIndex"; - case 44: return "Alignment"; - - case DecorationCeiling: - default: return "Bad"; - -#ifdef AMD_EXTENSIONS - case 4999: return "ExplicitInterpAMD"; -#endif -#ifdef NV_EXTENSIONS - case 5248: return "OverrideCoverageNV"; - case 5250: return "PassthroughNV"; - case 5252: return "ViewportRelativeNV"; - case 5256: return "SecondaryViewportRelativeNV"; -#endif - } -} - -const int BuiltInCeiling = 44; - -const char* BuiltInString(int builtIn) -{ - switch (builtIn) { - case 0: return "Position"; - case 1: return "PointSize"; - case 2: return "Bad"; - case 3: return "ClipDistance"; - case 4: return "CullDistance"; - case 5: return "VertexId"; - case 6: return "InstanceId"; - case 7: return "PrimitiveId"; - case 8: return "InvocationId"; - case 9: return "Layer"; - case 10: return "ViewportIndex"; - case 11: return "TessLevelOuter"; - case 12: return "TessLevelInner"; - case 13: return "TessCoord"; - case 14: return "PatchVertices"; - case 15: return "FragCoord"; - case 16: return "PointCoord"; - case 17: return "FrontFacing"; - case 18: return "SampleId"; - case 19: return "SamplePosition"; - case 20: return "SampleMask"; - case 21: return "Bad"; - case 22: return "FragDepth"; - case 23: return "HelperInvocation"; - case 24: return "NumWorkgroups"; - case 25: return "WorkgroupSize"; - case 26: return "WorkgroupId"; - case 27: return "LocalInvocationId"; - case 28: return "GlobalInvocationId"; - case 29: return "LocalInvocationIndex"; - case 30: return "WorkDim"; - case 31: return "GlobalSize"; - case 32: return "EnqueuedWorkgroupSize"; - case 33: return "GlobalOffset"; - case 34: return "GlobalLinearId"; - case 35: return "Bad"; - case 36: return "SubgroupSize"; - case 37: return "SubgroupMaxSize"; - case 38: return "NumSubgroups"; - case 39: return "NumEnqueuedSubgroups"; - case 40: return "SubgroupId"; - case 41: return "SubgroupLocalInvocationId"; - case 42: return "VertexIndex"; // TBD: put next to VertexId? - case 43: return "InstanceIndex"; // TBD: put next to InstanceId? - - case 4416: return "SubgroupEqMaskKHR"; - case 4417: return "SubgroupGeMaskKHR"; - case 4418: return "SubgroupGtMaskKHR"; - case 4419: return "SubgroupLeMaskKHR"; - case 4420: return "SubgroupLtMaskKHR"; - case 4438: return "DeviceIndex"; - case 4440: return "ViewIndex"; - case 4424: return "BaseVertex"; - case 4425: return "BaseInstance"; - case 4426: return "DrawIndex"; - case 5014: return "FragStencilRefEXT"; - -#ifdef AMD_EXTENSIONS - case 4992: return "BaryCoordNoPerspAMD"; - case 4993: return "BaryCoordNoPerspCentroidAMD"; - case 4994: return "BaryCoordNoPerspSampleAMD"; - case 4995: return "BaryCoordSmoothAMD"; - case 4996: return "BaryCoordSmoothCentroidAMD"; - case 4997: return "BaryCoordSmoothSampleAMD"; - case 4998: return "BaryCoordPullModelAMD"; -#endif - -#ifdef NV_EXTENSIONS - case 5253: return "ViewportMaskNV"; - case 5257: return "SecondaryPositionNV"; - case 5258: return "SecondaryViewportMaskNV"; - case 5261: return "PositionPerViewNV"; - case 5262: return "ViewportMaskPerViewNV"; -#endif - - case 5264: return "FullyCoveredEXT"; - - case BuiltInCeiling: - default: return "Bad"; - } -} - -const int DimensionCeiling = 7; - -const char* DimensionString(int dim) -{ - switch (dim) { - case 0: return "1D"; - case 1: return "2D"; - case 2: return "3D"; - case 3: return "Cube"; - case 4: return "Rect"; - case 5: return "Buffer"; - case 6: return "SubpassData"; - - case DimensionCeiling: - default: return "Bad"; - } -} - -const int SamplerAddressingModeCeiling = 5; - -const char* SamplerAddressingModeString(int mode) -{ - switch (mode) { - case 0: return "None"; - case 1: return "ClampToEdge"; - case 2: return "Clamp"; - case 3: return "Repeat"; - case 4: return "RepeatMirrored"; - - case SamplerAddressingModeCeiling: - default: return "Bad"; - } -} - -const int SamplerFilterModeCeiling = 2; - -const char* SamplerFilterModeString(int mode) -{ - switch (mode) { - case 0: return "Nearest"; - case 1: return "Linear"; - - case SamplerFilterModeCeiling: - default: return "Bad"; - } -} - -const int ImageFormatCeiling = 40; - -const char* ImageFormatString(int format) -{ - switch (format) { - case 0: return "Unknown"; - - // ES/Desktop float - case 1: return "Rgba32f"; - case 2: return "Rgba16f"; - case 3: return "R32f"; - case 4: return "Rgba8"; - case 5: return "Rgba8Snorm"; - - // Desktop float - case 6: return "Rg32f"; - case 7: return "Rg16f"; - case 8: return "R11fG11fB10f"; - case 9: return "R16f"; - case 10: return "Rgba16"; - case 11: return "Rgb10A2"; - case 12: return "Rg16"; - case 13: return "Rg8"; - case 14: return "R16"; - case 15: return "R8"; - case 16: return "Rgba16Snorm"; - case 17: return "Rg16Snorm"; - case 18: return "Rg8Snorm"; - case 19: return "R16Snorm"; - case 20: return "R8Snorm"; - - // ES/Desktop int - case 21: return "Rgba32i"; - case 22: return "Rgba16i"; - case 23: return "Rgba8i"; - case 24: return "R32i"; - - // Desktop int - case 25: return "Rg32i"; - case 26: return "Rg16i"; - case 27: return "Rg8i"; - case 28: return "R16i"; - case 29: return "R8i"; - - // ES/Desktop uint - case 30: return "Rgba32ui"; - case 31: return "Rgba16ui"; - case 32: return "Rgba8ui"; - case 33: return "R32ui"; - - // Desktop uint - case 34: return "Rgb10a2ui"; - case 35: return "Rg32ui"; - case 36: return "Rg16ui"; - case 37: return "Rg8ui"; - case 38: return "R16ui"; - case 39: return "R8ui"; - - case ImageFormatCeiling: - default: - return "Bad"; - } -} - -const int ImageChannelOrderCeiling = 19; - -const char* ImageChannelOrderString(int format) -{ - switch (format) { - case 0: return "R"; - case 1: return "A"; - case 2: return "RG"; - case 3: return "RA"; - case 4: return "RGB"; - case 5: return "RGBA"; - case 6: return "BGRA"; - case 7: return "ARGB"; - case 8: return "Intensity"; - case 9: return "Luminance"; - case 10: return "Rx"; - case 11: return "RGx"; - case 12: return "RGBx"; - case 13: return "Depth"; - case 14: return "DepthStencil"; - case 15: return "sRGB"; - case 16: return "sRGBx"; - case 17: return "sRGBA"; - case 18: return "sBGRA"; - - case ImageChannelOrderCeiling: - default: - return "Bad"; - } -} - -const int ImageChannelDataTypeCeiling = 17; - -const char* ImageChannelDataTypeString(int type) -{ - switch (type) - { - case 0: return "SnormInt8"; - case 1: return "SnormInt16"; - case 2: return "UnormInt8"; - case 3: return "UnormInt16"; - case 4: return "UnormShort565"; - case 5: return "UnormShort555"; - case 6: return "UnormInt101010"; - case 7: return "SignedInt8"; - case 8: return "SignedInt16"; - case 9: return "SignedInt32"; - case 10: return "UnsignedInt8"; - case 11: return "UnsignedInt16"; - case 12: return "UnsignedInt32"; - case 13: return "HalfFloat"; - case 14: return "Float"; - case 15: return "UnormInt24"; - case 16: return "UnormInt101010_2"; - - case ImageChannelDataTypeCeiling: - default: - return "Bad"; - } -} - -const int ImageOperandsCeiling = 8; - -const char* ImageOperandsString(int format) -{ - switch (format) { - case 0: return "Bias"; - case 1: return "Lod"; - case 2: return "Grad"; - case 3: return "ConstOffset"; - case 4: return "Offset"; - case 5: return "ConstOffsets"; - case 6: return "Sample"; - case 7: return "MinLod"; - - case ImageOperandsCeiling: - default: - return "Bad"; - } -} - -const int FPFastMathCeiling = 5; - -const char* FPFastMathString(int mode) -{ - switch (mode) { - case 0: return "NotNaN"; - case 1: return "NotInf"; - case 2: return "NSZ"; - case 3: return "AllowRecip"; - case 4: return "Fast"; - - case FPFastMathCeiling: - default: return "Bad"; - } -} - -const int FPRoundingModeCeiling = 4; - -const char* FPRoundingModeString(int mode) -{ - switch (mode) { - case 0: return "RTE"; - case 1: return "RTZ"; - case 2: return "RTP"; - case 3: return "RTN"; - - case FPRoundingModeCeiling: - default: return "Bad"; - } -} - -const int LinkageTypeCeiling = 2; - -const char* LinkageTypeString(int type) -{ - switch (type) { - case 0: return "Export"; - case 1: return "Import"; - - case LinkageTypeCeiling: - default: return "Bad"; - } -} - -const int FuncParamAttrCeiling = 8; - -const char* FuncParamAttrString(int attr) -{ - switch (attr) { - case 0: return "Zext"; - case 1: return "Sext"; - case 2: return "ByVal"; - case 3: return "Sret"; - case 4: return "NoAlias"; - case 5: return "NoCapture"; - case 6: return "NoWrite"; - case 7: return "NoReadWrite"; - - case FuncParamAttrCeiling: - default: return "Bad"; - } -} - -const int AccessQualifierCeiling = 3; - -const char* AccessQualifierString(int attr) -{ - switch (attr) { - case 0: return "ReadOnly"; - case 1: return "WriteOnly"; - case 2: return "ReadWrite"; - - case AccessQualifierCeiling: - default: return "Bad"; - } -} - -const int SelectControlCeiling = 2; - -const char* SelectControlString(int cont) -{ - switch (cont) { - case 0: return "Flatten"; - case 1: return "DontFlatten"; - - case SelectControlCeiling: - default: return "Bad"; - } -} - -const int LoopControlCeiling = 4; - -const char* LoopControlString(int cont) -{ - switch (cont) { - case 0: return "Unroll"; - case 1: return "DontUnroll"; - case 2: return "DependencyInfinite"; - case 3: return "DependencyLength"; - - case LoopControlCeiling: - default: return "Bad"; - } -} - -const int FunctionControlCeiling = 4; - -const char* FunctionControlString(int cont) -{ - switch (cont) { - case 0: return "Inline"; - case 1: return "DontInline"; - case 2: return "Pure"; - case 3: return "Const"; - - case FunctionControlCeiling: - default: return "Bad"; - } -} - -const int MemorySemanticsCeiling = 12; - -const char* MemorySemanticsString(int mem) -{ - // Note: No bits set (None) means "Relaxed" - switch (mem) { - case 0: return "Bad"; // Note: this is a placeholder for 'Consume' - case 1: return "Acquire"; - case 2: return "Release"; - case 3: return "AcquireRelease"; - case 4: return "SequentiallyConsistent"; - case 5: return "Bad"; // Note: reserved for future expansion - case 6: return "UniformMemory"; - case 7: return "SubgroupMemory"; - case 8: return "WorkgroupMemory"; - case 9: return "CrossWorkgroupMemory"; - case 10: return "AtomicCounterMemory"; - case 11: return "ImageMemory"; - - case MemorySemanticsCeiling: - default: return "Bad"; - } -} - -const int MemoryAccessCeiling = 3; - -const char* MemoryAccessString(int mem) -{ - switch (mem) { - case 0: return "Volatile"; - case 1: return "Aligned"; - case 2: return "Nontemporal"; - - case MemoryAccessCeiling: - default: return "Bad"; - } -} - -const int ScopeCeiling = 5; - -const char* ScopeString(int mem) -{ - switch (mem) { - case 0: return "CrossDevice"; - case 1: return "Device"; - case 2: return "Workgroup"; - case 3: return "Subgroup"; - case 4: return "Invocation"; - - case ScopeCeiling: - default: return "Bad"; - } -} - -const int GroupOperationCeiling = 3; - -const char* GroupOperationString(int gop) -{ - - switch (gop) - { - case 0: return "Reduce"; - case 1: return "InclusiveScan"; - case 2: return "ExclusiveScan"; - - case GroupOperationCeiling: - default: return "Bad"; - } -} - -const int KernelEnqueueFlagsCeiling = 3; - -const char* KernelEnqueueFlagsString(int flag) -{ - switch (flag) - { - case 0: return "NoWait"; - case 1: return "WaitKernel"; - case 2: return "WaitWorkGroup"; - - case KernelEnqueueFlagsCeiling: - default: return "Bad"; - } -} - -const int KernelProfilingInfoCeiling = 1; - -const char* KernelProfilingInfoString(int info) -{ - switch (info) - { - case 0: return "CmdExecTime"; - - case KernelProfilingInfoCeiling: - default: return "Bad"; - } -} - -const int CapabilityCeiling = 58; - -const char* CapabilityString(int info) -{ - switch (info) - { - case 0: return "Matrix"; - case 1: return "Shader"; - case 2: return "Geometry"; - case 3: return "Tessellation"; - case 4: return "Addresses"; - case 5: return "Linkage"; - case 6: return "Kernel"; - case 7: return "Vector16"; - case 8: return "Float16Buffer"; - case 9: return "Float16"; - case 10: return "Float64"; - case 11: return "Int64"; - case 12: return "Int64Atomics"; - case 13: return "ImageBasic"; - case 14: return "ImageReadWrite"; - case 15: return "ImageMipmap"; - case 16: return "Bad"; - case 17: return "Pipes"; - case 18: return "Groups"; - case 19: return "DeviceEnqueue"; - case 20: return "LiteralSampler"; - case 21: return "AtomicStorage"; - case 22: return "Int16"; - case 23: return "TessellationPointSize"; - case 24: return "GeometryPointSize"; - case 25: return "ImageGatherExtended"; - case 26: return "Bad"; - case 27: return "StorageImageMultisample"; - case 28: return "UniformBufferArrayDynamicIndexing"; - case 29: return "SampledImageArrayDynamicIndexing"; - case 30: return "StorageBufferArrayDynamicIndexing"; - case 31: return "StorageImageArrayDynamicIndexing"; - case 32: return "ClipDistance"; - case 33: return "CullDistance"; - case 34: return "ImageCubeArray"; - case 35: return "SampleRateShading"; - case 36: return "ImageRect"; - case 37: return "SampledRect"; - case 38: return "GenericPointer"; - case 39: return "Int8"; - case 40: return "InputAttachment"; - case 41: return "SparseResidency"; - case 42: return "MinLod"; - case 43: return "Sampled1D"; - case 44: return "Image1D"; - case 45: return "SampledCubeArray"; - case 46: return "SampledBuffer"; - case 47: return "ImageBuffer"; - case 48: return "ImageMSArray"; - case 49: return "StorageImageExtendedFormats"; - case 50: return "ImageQuery"; - case 51: return "DerivativeControl"; - case 52: return "InterpolationFunction"; - case 53: return "TransformFeedback"; - case 54: return "GeometryStreams"; - case 55: return "StorageImageReadWithoutFormat"; - case 56: return "StorageImageWriteWithoutFormat"; - case 57: return "MultiViewport"; - - case 4423: return "SubgroupBallotKHR"; - case 4427: return "DrawParameters"; - case 4431: return "SubgroupVoteKHR"; - - case 4433: return "StorageUniformBufferBlock16"; - case 4434: return "StorageUniform16"; - case 4435: return "StoragePushConstant16"; - case 4436: return "StorageInputOutput16"; - - case 4437: return "DeviceGroup"; - case 4439: return "MultiView"; - - case 5013: return "StencilExportEXT"; - -#ifdef AMD_EXTENSIONS - case 5009: return "ImageGatherBiasLodAMD"; - case 5010: return "FragmentMaskAMD"; - case 5015: return "ImageReadWriteLodAMD"; -#endif - - case 4445: return "AtomicStorageOps"; - - case 4447: return "SampleMaskPostDepthCoverage"; -#ifdef NV_EXTENSIONS - case 5251: return "GeometryShaderPassthroughNV"; - case 5254: return "ShaderViewportIndexLayerNV"; - case 5255: return "ShaderViewportMaskNV"; - case 5259: return "ShaderStereoViewNV"; - case 5260: return "PerViewAttributesNV"; -#endif - - case 5265: return "FragmentFullyCoveredEXT"; - - case CapabilityCeiling: - default: return "Bad"; - } -} - -const char* OpcodeString(int op) -{ - switch (op) { - case 0: return "OpNop"; - case 1: return "OpUndef"; - case 2: return "OpSourceContinued"; - case 3: return "OpSource"; - case 4: return "OpSourceExtension"; - case 5: return "OpName"; - case 6: return "OpMemberName"; - case 7: return "OpString"; - case 8: return "OpLine"; - case 9: return "Bad"; - case 10: return "OpExtension"; - case 11: return "OpExtInstImport"; - case 12: return "OpExtInst"; - case 13: return "Bad"; - case 14: return "OpMemoryModel"; - case 15: return "OpEntryPoint"; - case 16: return "OpExecutionMode"; - case 17: return "OpCapability"; - case 18: return "Bad"; - case 19: return "OpTypeVoid"; - case 20: return "OpTypeBool"; - case 21: return "OpTypeInt"; - case 22: return "OpTypeFloat"; - case 23: return "OpTypeVector"; - case 24: return "OpTypeMatrix"; - case 25: return "OpTypeImage"; - case 26: return "OpTypeSampler"; - case 27: return "OpTypeSampledImage"; - case 28: return "OpTypeArray"; - case 29: return "OpTypeRuntimeArray"; - case 30: return "OpTypeStruct"; - case 31: return "OpTypeOpaque"; - case 32: return "OpTypePointer"; - case 33: return "OpTypeFunction"; - case 34: return "OpTypeEvent"; - case 35: return "OpTypeDeviceEvent"; - case 36: return "OpTypeReserveId"; - case 37: return "OpTypeQueue"; - case 38: return "OpTypePipe"; - case 39: return "OpTypeForwardPointer"; - case 40: return "Bad"; - case 41: return "OpConstantTrue"; - case 42: return "OpConstantFalse"; - case 43: return "OpConstant"; - case 44: return "OpConstantComposite"; - case 45: return "OpConstantSampler"; - case 46: return "OpConstantNull"; - case 47: return "Bad"; - case 48: return "OpSpecConstantTrue"; - case 49: return "OpSpecConstantFalse"; - case 50: return "OpSpecConstant"; - case 51: return "OpSpecConstantComposite"; - case 52: return "OpSpecConstantOp"; - case 53: return "Bad"; - case 54: return "OpFunction"; - case 55: return "OpFunctionParameter"; - case 56: return "OpFunctionEnd"; - case 57: return "OpFunctionCall"; - case 58: return "Bad"; - case 59: return "OpVariable"; - case 60: return "OpImageTexelPointer"; - case 61: return "OpLoad"; - case 62: return "OpStore"; - case 63: return "OpCopyMemory"; - case 64: return "OpCopyMemorySized"; - case 65: return "OpAccessChain"; - case 66: return "OpInBoundsAccessChain"; - case 67: return "OpPtrAccessChain"; - case 68: return "OpArrayLength"; - case 69: return "OpGenericPtrMemSemantics"; - case 70: return "OpInBoundsPtrAccessChain"; - case 71: return "OpDecorate"; - case 72: return "OpMemberDecorate"; - case 73: return "OpDecorationGroup"; - case 74: return "OpGroupDecorate"; - case 75: return "OpGroupMemberDecorate"; - case 76: return "Bad"; - case 77: return "OpVectorExtractDynamic"; - case 78: return "OpVectorInsertDynamic"; - case 79: return "OpVectorShuffle"; - case 80: return "OpCompositeConstruct"; - case 81: return "OpCompositeExtract"; - case 82: return "OpCompositeInsert"; - case 83: return "OpCopyObject"; - case 84: return "OpTranspose"; - case 85: return "Bad"; - case 86: return "OpSampledImage"; - case 87: return "OpImageSampleImplicitLod"; - case 88: return "OpImageSampleExplicitLod"; - case 89: return "OpImageSampleDrefImplicitLod"; - case 90: return "OpImageSampleDrefExplicitLod"; - case 91: return "OpImageSampleProjImplicitLod"; - case 92: return "OpImageSampleProjExplicitLod"; - case 93: return "OpImageSampleProjDrefImplicitLod"; - case 94: return "OpImageSampleProjDrefExplicitLod"; - case 95: return "OpImageFetch"; - case 96: return "OpImageGather"; - case 97: return "OpImageDrefGather"; - case 98: return "OpImageRead"; - case 99: return "OpImageWrite"; - case 100: return "OpImage"; - case 101: return "OpImageQueryFormat"; - case 102: return "OpImageQueryOrder"; - case 103: return "OpImageQuerySizeLod"; - case 104: return "OpImageQuerySize"; - case 105: return "OpImageQueryLod"; - case 106: return "OpImageQueryLevels"; - case 107: return "OpImageQuerySamples"; - case 108: return "Bad"; - case 109: return "OpConvertFToU"; - case 110: return "OpConvertFToS"; - case 111: return "OpConvertSToF"; - case 112: return "OpConvertUToF"; - case 113: return "OpUConvert"; - case 114: return "OpSConvert"; - case 115: return "OpFConvert"; - case 116: return "OpQuantizeToF16"; - case 117: return "OpConvertPtrToU"; - case 118: return "OpSatConvertSToU"; - case 119: return "OpSatConvertUToS"; - case 120: return "OpConvertUToPtr"; - case 121: return "OpPtrCastToGeneric"; - case 122: return "OpGenericCastToPtr"; - case 123: return "OpGenericCastToPtrExplicit"; - case 124: return "OpBitcast"; - case 125: return "Bad"; - case 126: return "OpSNegate"; - case 127: return "OpFNegate"; - case 128: return "OpIAdd"; - case 129: return "OpFAdd"; - case 130: return "OpISub"; - case 131: return "OpFSub"; - case 132: return "OpIMul"; - case 133: return "OpFMul"; - case 134: return "OpUDiv"; - case 135: return "OpSDiv"; - case 136: return "OpFDiv"; - case 137: return "OpUMod"; - case 138: return "OpSRem"; - case 139: return "OpSMod"; - case 140: return "OpFRem"; - case 141: return "OpFMod"; - case 142: return "OpVectorTimesScalar"; - case 143: return "OpMatrixTimesScalar"; - case 144: return "OpVectorTimesMatrix"; - case 145: return "OpMatrixTimesVector"; - case 146: return "OpMatrixTimesMatrix"; - case 147: return "OpOuterProduct"; - case 148: return "OpDot"; - case 149: return "OpIAddCarry"; - case 150: return "OpISubBorrow"; - case 151: return "OpUMulExtended"; - case 152: return "OpSMulExtended"; - case 153: return "Bad"; - case 154: return "OpAny"; - case 155: return "OpAll"; - case 156: return "OpIsNan"; - case 157: return "OpIsInf"; - case 158: return "OpIsFinite"; - case 159: return "OpIsNormal"; - case 160: return "OpSignBitSet"; - case 161: return "OpLessOrGreater"; - case 162: return "OpOrdered"; - case 163: return "OpUnordered"; - case 164: return "OpLogicalEqual"; - case 165: return "OpLogicalNotEqual"; - case 166: return "OpLogicalOr"; - case 167: return "OpLogicalAnd"; - case 168: return "OpLogicalNot"; - case 169: return "OpSelect"; - case 170: return "OpIEqual"; - case 171: return "OpINotEqual"; - case 172: return "OpUGreaterThan"; - case 173: return "OpSGreaterThan"; - case 174: return "OpUGreaterThanEqual"; - case 175: return "OpSGreaterThanEqual"; - case 176: return "OpULessThan"; - case 177: return "OpSLessThan"; - case 178: return "OpULessThanEqual"; - case 179: return "OpSLessThanEqual"; - case 180: return "OpFOrdEqual"; - case 181: return "OpFUnordEqual"; - case 182: return "OpFOrdNotEqual"; - case 183: return "OpFUnordNotEqual"; - case 184: return "OpFOrdLessThan"; - case 185: return "OpFUnordLessThan"; - case 186: return "OpFOrdGreaterThan"; - case 187: return "OpFUnordGreaterThan"; - case 188: return "OpFOrdLessThanEqual"; - case 189: return "OpFUnordLessThanEqual"; - case 190: return "OpFOrdGreaterThanEqual"; - case 191: return "OpFUnordGreaterThanEqual"; - case 192: return "Bad"; - case 193: return "Bad"; - case 194: return "OpShiftRightLogical"; - case 195: return "OpShiftRightArithmetic"; - case 196: return "OpShiftLeftLogical"; - case 197: return "OpBitwiseOr"; - case 198: return "OpBitwiseXor"; - case 199: return "OpBitwiseAnd"; - case 200: return "OpNot"; - case 201: return "OpBitFieldInsert"; - case 202: return "OpBitFieldSExtract"; - case 203: return "OpBitFieldUExtract"; - case 204: return "OpBitReverse"; - case 205: return "OpBitCount"; - case 206: return "Bad"; - case 207: return "OpDPdx"; - case 208: return "OpDPdy"; - case 209: return "OpFwidth"; - case 210: return "OpDPdxFine"; - case 211: return "OpDPdyFine"; - case 212: return "OpFwidthFine"; - case 213: return "OpDPdxCoarse"; - case 214: return "OpDPdyCoarse"; - case 215: return "OpFwidthCoarse"; - case 216: return "Bad"; - case 217: return "Bad"; - case 218: return "OpEmitVertex"; - case 219: return "OpEndPrimitive"; - case 220: return "OpEmitStreamVertex"; - case 221: return "OpEndStreamPrimitive"; - case 222: return "Bad"; - case 223: return "Bad"; - case 224: return "OpControlBarrier"; - case 225: return "OpMemoryBarrier"; - case 226: return "Bad"; - case 227: return "OpAtomicLoad"; - case 228: return "OpAtomicStore"; - case 229: return "OpAtomicExchange"; - case 230: return "OpAtomicCompareExchange"; - case 231: return "OpAtomicCompareExchangeWeak"; - case 232: return "OpAtomicIIncrement"; - case 233: return "OpAtomicIDecrement"; - case 234: return "OpAtomicIAdd"; - case 235: return "OpAtomicISub"; - case 236: return "OpAtomicSMin"; - case 237: return "OpAtomicUMin"; - case 238: return "OpAtomicSMax"; - case 239: return "OpAtomicUMax"; - case 240: return "OpAtomicAnd"; - case 241: return "OpAtomicOr"; - case 242: return "OpAtomicXor"; - case 243: return "Bad"; - case 244: return "Bad"; - case 245: return "OpPhi"; - case 246: return "OpLoopMerge"; - case 247: return "OpSelectionMerge"; - case 248: return "OpLabel"; - case 249: return "OpBranch"; - case 250: return "OpBranchConditional"; - case 251: return "OpSwitch"; - case 252: return "OpKill"; - case 253: return "OpReturn"; - case 254: return "OpReturnValue"; - case 255: return "OpUnreachable"; - case 256: return "OpLifetimeStart"; - case 257: return "OpLifetimeStop"; - case 258: return "Bad"; - case 259: return "OpGroupAsyncCopy"; - case 260: return "OpGroupWaitEvents"; - case 261: return "OpGroupAll"; - case 262: return "OpGroupAny"; - case 263: return "OpGroupBroadcast"; - case 264: return "OpGroupIAdd"; - case 265: return "OpGroupFAdd"; - case 266: return "OpGroupFMin"; - case 267: return "OpGroupUMin"; - case 268: return "OpGroupSMin"; - case 269: return "OpGroupFMax"; - case 270: return "OpGroupUMax"; - case 271: return "OpGroupSMax"; - case 272: return "Bad"; - case 273: return "Bad"; - case 274: return "OpReadPipe"; - case 275: return "OpWritePipe"; - case 276: return "OpReservedReadPipe"; - case 277: return "OpReservedWritePipe"; - case 278: return "OpReserveReadPipePackets"; - case 279: return "OpReserveWritePipePackets"; - case 280: return "OpCommitReadPipe"; - case 281: return "OpCommitWritePipe"; - case 282: return "OpIsValidReserveId"; - case 283: return "OpGetNumPipePackets"; - case 284: return "OpGetMaxPipePackets"; - case 285: return "OpGroupReserveReadPipePackets"; - case 286: return "OpGroupReserveWritePipePackets"; - case 287: return "OpGroupCommitReadPipe"; - case 288: return "OpGroupCommitWritePipe"; - case 289: return "Bad"; - case 290: return "Bad"; - case 291: return "OpEnqueueMarker"; - case 292: return "OpEnqueueKernel"; - case 293: return "OpGetKernelNDrangeSubGroupCount"; - case 294: return "OpGetKernelNDrangeMaxSubGroupSize"; - case 295: return "OpGetKernelWorkGroupSize"; - case 296: return "OpGetKernelPreferredWorkGroupSizeMultiple"; - case 297: return "OpRetainEvent"; - case 298: return "OpReleaseEvent"; - case 299: return "OpCreateUserEvent"; - case 300: return "OpIsValidEvent"; - case 301: return "OpSetUserEventStatus"; - case 302: return "OpCaptureEventProfilingInfo"; - case 303: return "OpGetDefaultQueue"; - case 304: return "OpBuildNDRange"; - case 305: return "OpImageSparseSampleImplicitLod"; - case 306: return "OpImageSparseSampleExplicitLod"; - case 307: return "OpImageSparseSampleDrefImplicitLod"; - case 308: return "OpImageSparseSampleDrefExplicitLod"; - case 309: return "OpImageSparseSampleProjImplicitLod"; - case 310: return "OpImageSparseSampleProjExplicitLod"; - case 311: return "OpImageSparseSampleProjDrefImplicitLod"; - case 312: return "OpImageSparseSampleProjDrefExplicitLod"; - case 313: return "OpImageSparseFetch"; - case 314: return "OpImageSparseGather"; - case 315: return "OpImageSparseDrefGather"; - case 316: return "OpImageSparseTexelsResident"; - case 317: return "OpNoLine"; - case 318: return "OpAtomicFlagTestAndSet"; - case 319: return "OpAtomicFlagClear"; - case 320: return "OpImageSparseRead"; - - case 4421: return "OpSubgroupBallotKHR"; - case 4422: return "OpSubgroupFirstInvocationKHR"; - case 4428: return "OpSubgroupAllKHR"; - case 4429: return "OpSubgroupAnyKHR"; - case 4430: return "OpSubgroupAllEqualKHR"; - case 4432: return "OpSubgroupReadInvocationKHR"; - -#ifdef AMD_EXTENSIONS - case 5000: return "OpGroupIAddNonUniformAMD"; - case 5001: return "OpGroupFAddNonUniformAMD"; - case 5002: return "OpGroupFMinNonUniformAMD"; - case 5003: return "OpGroupUMinNonUniformAMD"; - case 5004: return "OpGroupSMinNonUniformAMD"; - case 5005: return "OpGroupFMaxNonUniformAMD"; - case 5006: return "OpGroupUMaxNonUniformAMD"; - case 5007: return "OpGroupSMaxNonUniformAMD"; - - case 5011: return "OpFragmentMaskFetchAMD"; - case 5012: return "OpFragmentFetchAMD"; -#endif - - case OpcodeCeiling: - default: - return "Bad"; - } -} - -// The set of objects that hold all the instruction/operand -// parameterization information. -InstructionParameters InstructionDesc[OpCodeMask + 1]; -OperandParameters ExecutionModeOperands[ExecutionModeCeiling]; -OperandParameters DecorationOperands[DecorationCeiling]; - -EnumDefinition OperandClassParams[OperandCount]; -EnumParameters ExecutionModelParams[ExecutionModelCeiling]; -EnumParameters AddressingParams[AddressingModelCeiling]; -EnumParameters MemoryParams[MemoryModelCeiling]; -EnumParameters ExecutionModeParams[ExecutionModeCeiling]; -EnumParameters StorageParams[StorageClassCeiling]; -EnumParameters SamplerAddressingModeParams[SamplerAddressingModeCeiling]; -EnumParameters SamplerFilterModeParams[SamplerFilterModeCeiling]; -EnumParameters ImageFormatParams[ImageFormatCeiling]; -EnumParameters ImageChannelOrderParams[ImageChannelOrderCeiling]; -EnumParameters ImageChannelDataTypeParams[ImageChannelDataTypeCeiling]; -EnumParameters ImageOperandsParams[ImageOperandsCeiling]; -EnumParameters FPFastMathParams[FPFastMathCeiling]; -EnumParameters FPRoundingModeParams[FPRoundingModeCeiling]; -EnumParameters LinkageTypeParams[LinkageTypeCeiling]; -EnumParameters DecorationParams[DecorationCeiling]; -EnumParameters BuiltInParams[BuiltInCeiling]; -EnumParameters DimensionalityParams[DimensionCeiling]; -EnumParameters FuncParamAttrParams[FuncParamAttrCeiling]; -EnumParameters AccessQualifierParams[AccessQualifierCeiling]; -EnumParameters GroupOperationParams[GroupOperationCeiling]; -EnumParameters LoopControlParams[FunctionControlCeiling]; -EnumParameters SelectionControlParams[SelectControlCeiling]; -EnumParameters FunctionControlParams[FunctionControlCeiling]; -EnumParameters MemorySemanticsParams[MemorySemanticsCeiling]; -EnumParameters MemoryAccessParams[MemoryAccessCeiling]; -EnumParameters ScopeParams[ScopeCeiling]; -EnumParameters KernelEnqueueFlagsParams[KernelEnqueueFlagsCeiling]; -EnumParameters KernelProfilingInfoParams[KernelProfilingInfoCeiling]; -EnumParameters CapabilityParams[CapabilityCeiling]; - -// Set up all the parameterizing descriptions of the opcodes, operands, etc. -void Parameterize() -{ - // only do this once. - static bool initialized = false; - if (initialized) - return; - initialized = true; - - // Exceptions to having a result and a resulting type . - // (Everything is initialized to have both). - - InstructionDesc[OpNop].setResultAndType(false, false); - InstructionDesc[OpSource].setResultAndType(false, false); - InstructionDesc[OpSourceContinued].setResultAndType(false, false); - InstructionDesc[OpSourceExtension].setResultAndType(false, false); - InstructionDesc[OpExtension].setResultAndType(false, false); - InstructionDesc[OpExtInstImport].setResultAndType(true, false); - InstructionDesc[OpCapability].setResultAndType(false, false); - InstructionDesc[OpMemoryModel].setResultAndType(false, false); - InstructionDesc[OpEntryPoint].setResultAndType(false, false); - InstructionDesc[OpExecutionMode].setResultAndType(false, false); - InstructionDesc[OpTypeVoid].setResultAndType(true, false); - InstructionDesc[OpTypeBool].setResultAndType(true, false); - InstructionDesc[OpTypeInt].setResultAndType(true, false); - InstructionDesc[OpTypeFloat].setResultAndType(true, false); - InstructionDesc[OpTypeVector].setResultAndType(true, false); - InstructionDesc[OpTypeMatrix].setResultAndType(true, false); - InstructionDesc[OpTypeImage].setResultAndType(true, false); - InstructionDesc[OpTypeSampler].setResultAndType(true, false); - InstructionDesc[OpTypeSampledImage].setResultAndType(true, false); - InstructionDesc[OpTypeArray].setResultAndType(true, false); - InstructionDesc[OpTypeRuntimeArray].setResultAndType(true, false); - InstructionDesc[OpTypeStruct].setResultAndType(true, false); - InstructionDesc[OpTypeOpaque].setResultAndType(true, false); - InstructionDesc[OpTypePointer].setResultAndType(true, false); - InstructionDesc[OpTypeForwardPointer].setResultAndType(false, false); - InstructionDesc[OpTypeFunction].setResultAndType(true, false); - InstructionDesc[OpTypeEvent].setResultAndType(true, false); - InstructionDesc[OpTypeDeviceEvent].setResultAndType(true, false); - InstructionDesc[OpTypeReserveId].setResultAndType(true, false); - InstructionDesc[OpTypeQueue].setResultAndType(true, false); - InstructionDesc[OpTypePipe].setResultAndType(true, false); - InstructionDesc[OpFunctionEnd].setResultAndType(false, false); - InstructionDesc[OpStore].setResultAndType(false, false); - InstructionDesc[OpImageWrite].setResultAndType(false, false); - InstructionDesc[OpDecorationGroup].setResultAndType(true, false); - InstructionDesc[OpDecorate].setResultAndType(false, false); - InstructionDesc[OpMemberDecorate].setResultAndType(false, false); - InstructionDesc[OpGroupDecorate].setResultAndType(false, false); - InstructionDesc[OpGroupMemberDecorate].setResultAndType(false, false); - InstructionDesc[OpName].setResultAndType(false, false); - InstructionDesc[OpMemberName].setResultAndType(false, false); - InstructionDesc[OpString].setResultAndType(true, false); - InstructionDesc[OpLine].setResultAndType(false, false); - InstructionDesc[OpNoLine].setResultAndType(false, false); - InstructionDesc[OpCopyMemory].setResultAndType(false, false); - InstructionDesc[OpCopyMemorySized].setResultAndType(false, false); - InstructionDesc[OpEmitVertex].setResultAndType(false, false); - InstructionDesc[OpEndPrimitive].setResultAndType(false, false); - InstructionDesc[OpEmitStreamVertex].setResultAndType(false, false); - InstructionDesc[OpEndStreamPrimitive].setResultAndType(false, false); - InstructionDesc[OpControlBarrier].setResultAndType(false, false); - InstructionDesc[OpMemoryBarrier].setResultAndType(false, false); - InstructionDesc[OpAtomicStore].setResultAndType(false, false); - InstructionDesc[OpLoopMerge].setResultAndType(false, false); - InstructionDesc[OpSelectionMerge].setResultAndType(false, false); - InstructionDesc[OpLabel].setResultAndType(true, false); - InstructionDesc[OpBranch].setResultAndType(false, false); - InstructionDesc[OpBranchConditional].setResultAndType(false, false); - InstructionDesc[OpSwitch].setResultAndType(false, false); - InstructionDesc[OpKill].setResultAndType(false, false); - InstructionDesc[OpReturn].setResultAndType(false, false); - InstructionDesc[OpReturnValue].setResultAndType(false, false); - InstructionDesc[OpUnreachable].setResultAndType(false, false); - InstructionDesc[OpLifetimeStart].setResultAndType(false, false); - InstructionDesc[OpLifetimeStop].setResultAndType(false, false); - InstructionDesc[OpCommitReadPipe].setResultAndType(false, false); - InstructionDesc[OpCommitWritePipe].setResultAndType(false, false); - InstructionDesc[OpGroupCommitWritePipe].setResultAndType(false, false); - InstructionDesc[OpGroupCommitReadPipe].setResultAndType(false, false); - InstructionDesc[OpCaptureEventProfilingInfo].setResultAndType(false, false); - InstructionDesc[OpSetUserEventStatus].setResultAndType(false, false); - InstructionDesc[OpRetainEvent].setResultAndType(false, false); - InstructionDesc[OpReleaseEvent].setResultAndType(false, false); - InstructionDesc[OpGroupWaitEvents].setResultAndType(false, false); - InstructionDesc[OpAtomicFlagClear].setResultAndType(false, false); - - // Specific additional context-dependent operands - - ExecutionModeOperands[ExecutionModeInvocations].push(OperandLiteralNumber, "'Number of <>'"); - - ExecutionModeOperands[ExecutionModeLocalSize].push(OperandLiteralNumber, "'x size'"); - ExecutionModeOperands[ExecutionModeLocalSize].push(OperandLiteralNumber, "'y size'"); - ExecutionModeOperands[ExecutionModeLocalSize].push(OperandLiteralNumber, "'z size'"); - - ExecutionModeOperands[ExecutionModeLocalSizeHint].push(OperandLiteralNumber, "'x size'"); - ExecutionModeOperands[ExecutionModeLocalSizeHint].push(OperandLiteralNumber, "'y size'"); - ExecutionModeOperands[ExecutionModeLocalSizeHint].push(OperandLiteralNumber, "'z size'"); - - ExecutionModeOperands[ExecutionModeOutputVertices].push(OperandLiteralNumber, "'Vertex count'"); - ExecutionModeOperands[ExecutionModeVecTypeHint].push(OperandLiteralNumber, "'Vector type'"); - - DecorationOperands[DecorationStream].push(OperandLiteralNumber, "'Stream Number'"); - DecorationOperands[DecorationLocation].push(OperandLiteralNumber, "'Location'"); - DecorationOperands[DecorationComponent].push(OperandLiteralNumber, "'Component'"); - DecorationOperands[DecorationIndex].push(OperandLiteralNumber, "'Index'"); - DecorationOperands[DecorationBinding].push(OperandLiteralNumber, "'Binding Point'"); - DecorationOperands[DecorationDescriptorSet].push(OperandLiteralNumber, "'Descriptor Set'"); - DecorationOperands[DecorationOffset].push(OperandLiteralNumber, "'Byte Offset'"); - DecorationOperands[DecorationXfbBuffer].push(OperandLiteralNumber, "'XFB Buffer Number'"); - DecorationOperands[DecorationXfbStride].push(OperandLiteralNumber, "'XFB Stride'"); - DecorationOperands[DecorationArrayStride].push(OperandLiteralNumber, "'Array Stride'"); - DecorationOperands[DecorationMatrixStride].push(OperandLiteralNumber, "'Matrix Stride'"); - DecorationOperands[DecorationBuiltIn].push(OperandLiteralNumber, "See <>"); - DecorationOperands[DecorationFPRoundingMode].push(OperandFPRoundingMode, "'Floating-Point Rounding Mode'"); - DecorationOperands[DecorationFPFastMathMode].push(OperandFPFastMath, "'Fast-Math Mode'"); - DecorationOperands[DecorationLinkageAttributes].push(OperandLiteralString, "'Name'"); - DecorationOperands[DecorationLinkageAttributes].push(OperandLinkageType, "'Linkage Type'"); - DecorationOperands[DecorationFuncParamAttr].push(OperandFuncParamAttr, "'Function Parameter Attribute'"); - DecorationOperands[DecorationSpecId].push(OperandLiteralNumber, "'Specialization Constant ID'"); - DecorationOperands[DecorationInputAttachmentIndex].push(OperandLiteralNumber, "'Attachment Index'"); - DecorationOperands[DecorationAlignment].push(OperandLiteralNumber, "'Alignment'"); - - OperandClassParams[OperandSource].set(SourceLanguageCeiling, SourceString, 0); - OperandClassParams[OperandExecutionModel].set(ExecutionModelCeiling, ExecutionModelString, ExecutionModelParams); - OperandClassParams[OperandAddressing].set(AddressingModelCeiling, AddressingString, AddressingParams); - OperandClassParams[OperandMemory].set(MemoryModelCeiling, MemoryString, MemoryParams); - OperandClassParams[OperandExecutionMode].set(ExecutionModeCeiling, ExecutionModeString, ExecutionModeParams); - OperandClassParams[OperandExecutionMode].setOperands(ExecutionModeOperands); - OperandClassParams[OperandStorage].set(StorageClassCeiling, StorageClassString, StorageParams); - OperandClassParams[OperandDimensionality].set(DimensionCeiling, DimensionString, DimensionalityParams); - OperandClassParams[OperandSamplerAddressingMode].set(SamplerAddressingModeCeiling, SamplerAddressingModeString, SamplerAddressingModeParams); - OperandClassParams[OperandSamplerFilterMode].set(SamplerFilterModeCeiling, SamplerFilterModeString, SamplerFilterModeParams); - OperandClassParams[OperandSamplerImageFormat].set(ImageFormatCeiling, ImageFormatString, ImageFormatParams); - OperandClassParams[OperandImageChannelOrder].set(ImageChannelOrderCeiling, ImageChannelOrderString, ImageChannelOrderParams); - OperandClassParams[OperandImageChannelDataType].set(ImageChannelDataTypeCeiling, ImageChannelDataTypeString, ImageChannelDataTypeParams); - OperandClassParams[OperandImageOperands].set(ImageOperandsCeiling, ImageOperandsString, ImageOperandsParams, true); - OperandClassParams[OperandFPFastMath].set(FPFastMathCeiling, FPFastMathString, FPFastMathParams, true); - OperandClassParams[OperandFPRoundingMode].set(FPRoundingModeCeiling, FPRoundingModeString, FPRoundingModeParams); - OperandClassParams[OperandLinkageType].set(LinkageTypeCeiling, LinkageTypeString, LinkageTypeParams); - OperandClassParams[OperandFuncParamAttr].set(FuncParamAttrCeiling, FuncParamAttrString, FuncParamAttrParams); - OperandClassParams[OperandAccessQualifier].set(AccessQualifierCeiling, AccessQualifierString, AccessQualifierParams); - OperandClassParams[OperandDecoration].set(DecorationCeiling, DecorationString, DecorationParams); - OperandClassParams[OperandDecoration].setOperands(DecorationOperands); - OperandClassParams[OperandBuiltIn].set(BuiltInCeiling, BuiltInString, BuiltInParams); - OperandClassParams[OperandSelect].set(SelectControlCeiling, SelectControlString, SelectionControlParams, true); - OperandClassParams[OperandLoop].set(LoopControlCeiling, LoopControlString, LoopControlParams, true); - OperandClassParams[OperandFunction].set(FunctionControlCeiling, FunctionControlString, FunctionControlParams, true); - OperandClassParams[OperandMemorySemantics].set(MemorySemanticsCeiling, MemorySemanticsString, MemorySemanticsParams, true); - OperandClassParams[OperandMemoryAccess].set(MemoryAccessCeiling, MemoryAccessString, MemoryAccessParams, true); - OperandClassParams[OperandScope].set(ScopeCeiling, ScopeString, ScopeParams); - OperandClassParams[OperandGroupOperation].set(GroupOperationCeiling, GroupOperationString, GroupOperationParams); - OperandClassParams[OperandKernelEnqueueFlags].set(KernelEnqueueFlagsCeiling, KernelEnqueueFlagsString, KernelEnqueueFlagsParams); - OperandClassParams[OperandKernelProfilingInfo].set(KernelProfilingInfoCeiling, KernelProfilingInfoString, KernelProfilingInfoParams, true); - OperandClassParams[OperandCapability].set(CapabilityCeiling, CapabilityString, CapabilityParams); - OperandClassParams[OperandOpcode].set(OpcodeCeiling, OpcodeString, 0); - - CapabilityParams[CapabilityShader].caps.push_back(CapabilityMatrix); - CapabilityParams[CapabilityGeometry].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityTessellation].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityVector16].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityFloat16Buffer].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityInt64Atomics].caps.push_back(CapabilityInt64); - CapabilityParams[CapabilityImageBasic].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityImageReadWrite].caps.push_back(CapabilityImageBasic); - CapabilityParams[CapabilityImageMipmap].caps.push_back(CapabilityImageBasic); - CapabilityParams[CapabilityPipes].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityDeviceEnqueue].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityLiteralSampler].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityAtomicStorage].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampleRateShading].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityTessellationPointSize].caps.push_back(CapabilityTessellation); - CapabilityParams[CapabilityGeometryPointSize].caps.push_back(CapabilityGeometry); - CapabilityParams[CapabilityImageGatherExtended].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageImageExtendedFormats].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageImageMultisample].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityUniformBufferArrayDynamicIndexing].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampledImageArrayDynamicIndexing].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageBufferArrayDynamicIndexing].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageImageArrayDynamicIndexing].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityClipDistance].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityCullDistance].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityGenericPointer].caps.push_back(CapabilityAddresses); - CapabilityParams[CapabilityInt8].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityInputAttachment].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityMinLod].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySparseResidency].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampled1D].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampledRect].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampledBuffer].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampledCubeArray].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityImageMSArray].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityImage1D].caps.push_back(CapabilitySampled1D); - CapabilityParams[CapabilityImageRect].caps.push_back(CapabilitySampledRect); - CapabilityParams[CapabilityImageBuffer].caps.push_back(CapabilitySampledBuffer); - CapabilityParams[CapabilityImageCubeArray].caps.push_back(CapabilitySampledCubeArray); - CapabilityParams[CapabilityImageQuery].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityDerivativeControl].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityInterpolationFunction].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityTransformFeedback].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityGeometryStreams].caps.push_back(CapabilityGeometry); - CapabilityParams[CapabilityStorageImageReadWithoutFormat].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageImageWriteWithoutFormat].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityMultiViewport].caps.push_back(CapabilityGeometry); - - AddressingParams[AddressingModelPhysical32].caps.push_back(CapabilityAddresses); - AddressingParams[AddressingModelPhysical64].caps.push_back(CapabilityAddresses); - - MemoryParams[MemoryModelSimple].caps.push_back(CapabilityShader); - MemoryParams[MemoryModelGLSL450].caps.push_back(CapabilityShader); - MemoryParams[MemoryModelOpenCL].caps.push_back(CapabilityKernel); - - MemorySemanticsParams[MemorySemanticsUniformMemoryShift].caps.push_back(CapabilityShader); - MemorySemanticsParams[MemorySemanticsAtomicCounterMemoryShift].caps.push_back(CapabilityAtomicStorage); - - ExecutionModelParams[ExecutionModelVertex].caps.push_back(CapabilityShader); - ExecutionModelParams[ExecutionModelTessellationControl].caps.push_back(CapabilityTessellation); - ExecutionModelParams[ExecutionModelTessellationEvaluation].caps.push_back(CapabilityTessellation); - ExecutionModelParams[ExecutionModelGeometry].caps.push_back(CapabilityGeometry); - ExecutionModelParams[ExecutionModelFragment].caps.push_back(CapabilityShader); - ExecutionModelParams[ExecutionModelGLCompute].caps.push_back(CapabilityShader); - ExecutionModelParams[ExecutionModelKernel].caps.push_back(CapabilityKernel); - - // Storage capabilites - StorageParams[StorageClassInput].caps.push_back(CapabilityShader); - StorageParams[StorageClassUniform].caps.push_back(CapabilityShader); - StorageParams[StorageClassOutput].caps.push_back(CapabilityShader); - StorageParams[StorageClassPrivate].caps.push_back(CapabilityShader); - StorageParams[StorageClassGeneric].caps.push_back(CapabilityKernel); - StorageParams[StorageClassAtomicCounter].caps.push_back(CapabilityAtomicStorage); - StorageParams[StorageClassPushConstant].caps.push_back(CapabilityShader); - - // Sampler Filter & Addressing mode capabilities - SamplerAddressingModeParams[SamplerAddressingModeNone].caps.push_back(CapabilityKernel); - SamplerAddressingModeParams[SamplerAddressingModeClampToEdge].caps.push_back(CapabilityKernel); - SamplerAddressingModeParams[SamplerAddressingModeClamp].caps.push_back(CapabilityKernel); - SamplerAddressingModeParams[SamplerAddressingModeRepeat].caps.push_back(CapabilityKernel); - SamplerAddressingModeParams[SamplerAddressingModeRepeatMirrored].caps.push_back(CapabilityKernel); - - SamplerFilterModeParams[SamplerFilterModeNearest].caps.push_back(CapabilityKernel); - SamplerFilterModeParams[SamplerFilterModeLinear].caps.push_back(CapabilityKernel); - - // image format capabilities - - // ES/Desktop float - ImageFormatParams[ImageFormatRgba32f].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba16f].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatR32f].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba8].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba8Snorm].caps.push_back(CapabilityShader); - - // Desktop float - ImageFormatParams[ImageFormatRg32f].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16f].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR11fG11fB10f].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16f].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRgba16].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRgb10A2].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg8].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR8].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRgba16Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg8Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR8Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - - // ES/Desktop int - ImageFormatParams[ImageFormatRgba32i].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba16i].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba8i].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatR32i].caps.push_back(CapabilityShader); - - // Desktop int - ImageFormatParams[ImageFormatRg32i].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16i].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg8i].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16i].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR8i].caps.push_back(CapabilityStorageImageExtendedFormats); - - // ES/Desktop uint - ImageFormatParams[ImageFormatRgba32ui].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba16ui].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba8ui].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatR32ui].caps.push_back(CapabilityShader); - - // Desktop uint - ImageFormatParams[ImageFormatRgb10a2ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg32ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg8ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR8ui].caps.push_back(CapabilityStorageImageExtendedFormats); - - // image channel order capabilities - for (int i = 0; i < ImageChannelOrderCeiling; ++i) { - ImageChannelOrderParams[i].caps.push_back(CapabilityKernel); - } - - // image channel type capabilities - for (int i = 0; i < ImageChannelDataTypeCeiling; ++i) { - ImageChannelDataTypeParams[i].caps.push_back(CapabilityKernel); - } - - // image lookup operands - ImageOperandsParams[ImageOperandsBiasShift].caps.push_back(CapabilityShader); - ImageOperandsParams[ImageOperandsOffsetShift].caps.push_back(CapabilityImageGatherExtended); - ImageOperandsParams[ImageOperandsMinLodShift].caps.push_back(CapabilityMinLod); - - // fast math flags capabilities - for (int i = 0; i < FPFastMathCeiling; ++i) { - FPFastMathParams[i].caps.push_back(CapabilityKernel); - } - - // fp rounding mode capabilities - for (int i = 0; i < FPRoundingModeCeiling; ++i) { - FPRoundingModeParams[i].caps.push_back(CapabilityKernel); - } - - // linkage types - for (int i = 0; i < LinkageTypeCeiling; ++i) { - LinkageTypeParams[i].caps.push_back(CapabilityLinkage); - } - - // function argument types - for (int i = 0; i < FuncParamAttrCeiling; ++i) { - FuncParamAttrParams[i].caps.push_back(CapabilityKernel); - } - - // function argument types - for (int i = 0; i < AccessQualifierCeiling; ++i) { - AccessQualifierParams[i].caps.push_back(CapabilityKernel); - } - - ExecutionModeParams[ExecutionModeInvocations].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeSpacingEqual].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeSpacingFractionalEven].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeSpacingFractionalOdd].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeVertexOrderCw].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeVertexOrderCcw].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModePixelCenterInteger].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeOriginUpperLeft].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeOriginLowerLeft].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeEarlyFragmentTests].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModePointMode].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeXfb].caps.push_back(CapabilityTransformFeedback); - ExecutionModeParams[ExecutionModeDepthReplacing].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeDepthGreater].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeDepthLess].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeDepthUnchanged].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeLocalSizeHint].caps.push_back(CapabilityKernel); - ExecutionModeParams[ExecutionModeInputPoints].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeInputLines].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeInputLinesAdjacency].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeTriangles].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeTriangles].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeInputTrianglesAdjacency].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeQuads].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeIsolines].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeOutputVertices].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeOutputVertices].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeOutputPoints].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeOutputLineStrip].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeOutputTriangleStrip].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeVecTypeHint].caps.push_back(CapabilityKernel); - ExecutionModeParams[ExecutionModeContractionOff].caps.push_back(CapabilityKernel); - - DecorationParams[DecorationRelaxedPrecision].caps.push_back(CapabilityShader); - DecorationParams[DecorationBlock].caps.push_back(CapabilityShader); - DecorationParams[DecorationBufferBlock].caps.push_back(CapabilityShader); - DecorationParams[DecorationRowMajor].caps.push_back(CapabilityMatrix); - DecorationParams[DecorationColMajor].caps.push_back(CapabilityMatrix); - DecorationParams[DecorationGLSLShared].caps.push_back(CapabilityShader); - DecorationParams[DecorationGLSLPacked].caps.push_back(CapabilityShader); - DecorationParams[DecorationNoPerspective].caps.push_back(CapabilityShader); - DecorationParams[DecorationFlat].caps.push_back(CapabilityShader); - DecorationParams[DecorationPatch].caps.push_back(CapabilityTessellation); - DecorationParams[DecorationCentroid].caps.push_back(CapabilityShader); - DecorationParams[DecorationSample].caps.push_back(CapabilitySampleRateShading); - DecorationParams[DecorationInvariant].caps.push_back(CapabilityShader); - DecorationParams[DecorationConstant].caps.push_back(CapabilityKernel); - DecorationParams[DecorationUniform].caps.push_back(CapabilityShader); - DecorationParams[DecorationCPacked].caps.push_back(CapabilityKernel); - DecorationParams[DecorationSaturatedConversion].caps.push_back(CapabilityKernel); - DecorationParams[DecorationStream].caps.push_back(CapabilityGeometryStreams); - DecorationParams[DecorationLocation].caps.push_back(CapabilityShader); - DecorationParams[DecorationComponent].caps.push_back(CapabilityShader); - DecorationParams[DecorationOffset].caps.push_back(CapabilityShader); - DecorationParams[DecorationIndex].caps.push_back(CapabilityShader); - DecorationParams[DecorationBinding].caps.push_back(CapabilityShader); - DecorationParams[DecorationDescriptorSet].caps.push_back(CapabilityShader); - DecorationParams[DecorationXfbBuffer].caps.push_back(CapabilityTransformFeedback); - DecorationParams[DecorationXfbStride].caps.push_back(CapabilityTransformFeedback); - DecorationParams[DecorationArrayStride].caps.push_back(CapabilityShader); - DecorationParams[DecorationMatrixStride].caps.push_back(CapabilityMatrix); - DecorationParams[DecorationFuncParamAttr].caps.push_back(CapabilityKernel); - DecorationParams[DecorationFPRoundingMode].caps.push_back(CapabilityKernel); - DecorationParams[DecorationFPFastMathMode].caps.push_back(CapabilityKernel); - DecorationParams[DecorationLinkageAttributes].caps.push_back(CapabilityLinkage); - DecorationParams[DecorationSpecId].caps.push_back(CapabilityShader); - DecorationParams[DecorationNoContraction].caps.push_back(CapabilityShader); - DecorationParams[DecorationInputAttachmentIndex].caps.push_back(CapabilityInputAttachment); - DecorationParams[DecorationAlignment].caps.push_back(CapabilityKernel); - - BuiltInParams[BuiltInPosition].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInPointSize].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInClipDistance].caps.push_back(CapabilityClipDistance); - BuiltInParams[BuiltInCullDistance].caps.push_back(CapabilityCullDistance); - - BuiltInParams[BuiltInVertexId].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInVertexId].desc = "Vertex ID, which takes on values 0, 1, 2, . . . ."; - - BuiltInParams[BuiltInInstanceId].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInInstanceId].desc = "Instance ID, which takes on values 0, 1, 2, . . . ."; - - BuiltInParams[BuiltInVertexIndex].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInVertexIndex].desc = "Vertex index, which takes on values base, base+1, base+2, . . . ."; - - BuiltInParams[BuiltInInstanceIndex].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInInstanceIndex].desc = "Instance index, which takes on values base, base+1, base+2, . . . ."; - - BuiltInParams[BuiltInPrimitiveId].caps.push_back(CapabilityGeometry); - BuiltInParams[BuiltInPrimitiveId].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInInvocationId].caps.push_back(CapabilityGeometry); - BuiltInParams[BuiltInInvocationId].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInLayer].caps.push_back(CapabilityGeometry); - BuiltInParams[BuiltInViewportIndex].caps.push_back(CapabilityMultiViewport); - BuiltInParams[BuiltInTessLevelOuter].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInTessLevelInner].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInTessCoord].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInPatchVertices].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInFragCoord].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInPointCoord].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInFrontFacing].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInSampleId].caps.push_back(CapabilitySampleRateShading); - BuiltInParams[BuiltInSamplePosition].caps.push_back(CapabilitySampleRateShading); - BuiltInParams[BuiltInSampleMask].caps.push_back(CapabilitySampleRateShading); - BuiltInParams[BuiltInFragDepth].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInHelperInvocation].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInWorkDim].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInGlobalSize].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInEnqueuedWorkgroupSize].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInGlobalOffset].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInGlobalLinearId].caps.push_back(CapabilityKernel); - - BuiltInParams[BuiltInSubgroupSize].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInSubgroupMaxSize].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInNumSubgroups].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInNumEnqueuedSubgroups].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInSubgroupId].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInSubgroupLocalInvocationId].caps.push_back(CapabilityKernel); - - DimensionalityParams[Dim1D].caps.push_back(CapabilitySampled1D); - DimensionalityParams[DimCube].caps.push_back(CapabilityShader); - DimensionalityParams[DimRect].caps.push_back(CapabilitySampledRect); - DimensionalityParams[DimBuffer].caps.push_back(CapabilitySampledBuffer); - DimensionalityParams[DimSubpassData].caps.push_back(CapabilityInputAttachment); - - // Group Operations - for (int i = 0; i < GroupOperationCeiling; ++i) { - GroupOperationParams[i].caps.push_back(CapabilityKernel); - } - - // Enqueue flags - for (int i = 0; i < KernelEnqueueFlagsCeiling; ++i) { - KernelEnqueueFlagsParams[i].caps.push_back(CapabilityKernel); - } - - // Profiling info - KernelProfilingInfoParams[0].caps.push_back(CapabilityKernel); - - // set name of operator, an initial set of style operands, and the description - - InstructionDesc[OpSource].operands.push(OperandSource, ""); - InstructionDesc[OpSource].operands.push(OperandLiteralNumber, "'Version'"); - InstructionDesc[OpSource].operands.push(OperandId, "'File'", true); - InstructionDesc[OpSource].operands.push(OperandLiteralString, "'Source'", true); - - InstructionDesc[OpSourceContinued].operands.push(OperandLiteralString, "'Continued Source'"); - - InstructionDesc[OpSourceExtension].operands.push(OperandLiteralString, "'Extension'"); - - InstructionDesc[OpName].operands.push(OperandId, "'Target'"); - InstructionDesc[OpName].operands.push(OperandLiteralString, "'Name'"); - - InstructionDesc[OpMemberName].operands.push(OperandId, "'Type'"); - InstructionDesc[OpMemberName].operands.push(OperandLiteralNumber, "'Member'"); - InstructionDesc[OpMemberName].operands.push(OperandLiteralString, "'Name'"); - - InstructionDesc[OpString].operands.push(OperandLiteralString, "'String'"); - - InstructionDesc[OpLine].operands.push(OperandId, "'File'"); - InstructionDesc[OpLine].operands.push(OperandLiteralNumber, "'Line'"); - InstructionDesc[OpLine].operands.push(OperandLiteralNumber, "'Column'"); - - InstructionDesc[OpExtension].operands.push(OperandLiteralString, "'Name'"); - - InstructionDesc[OpExtInstImport].operands.push(OperandLiteralString, "'Name'"); - - InstructionDesc[OpCapability].operands.push(OperandCapability, "'Capability'"); - - InstructionDesc[OpMemoryModel].operands.push(OperandAddressing, ""); - InstructionDesc[OpMemoryModel].operands.push(OperandMemory, ""); - - InstructionDesc[OpEntryPoint].operands.push(OperandExecutionModel, ""); - InstructionDesc[OpEntryPoint].operands.push(OperandId, "'Entry Point'"); - InstructionDesc[OpEntryPoint].operands.push(OperandLiteralString, "'Name'"); - InstructionDesc[OpEntryPoint].operands.push(OperandVariableIds, "'Interface'"); - - InstructionDesc[OpExecutionMode].operands.push(OperandId, "'Entry Point'"); - InstructionDesc[OpExecutionMode].operands.push(OperandExecutionMode, "'Mode'"); - InstructionDesc[OpExecutionMode].operands.push(OperandOptionalLiteral, "See <>"); - - InstructionDesc[OpTypeInt].operands.push(OperandLiteralNumber, "'Width'"); - InstructionDesc[OpTypeInt].operands.push(OperandLiteralNumber, "'Signedness'"); - - InstructionDesc[OpTypeFloat].operands.push(OperandLiteralNumber, "'Width'"); - - InstructionDesc[OpTypeVector].operands.push(OperandId, "'Component Type'"); - InstructionDesc[OpTypeVector].operands.push(OperandLiteralNumber, "'Component Count'"); - - InstructionDesc[OpTypeMatrix].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpTypeMatrix].operands.push(OperandId, "'Column Type'"); - InstructionDesc[OpTypeMatrix].operands.push(OperandLiteralNumber, "'Column Count'"); - - InstructionDesc[OpTypeImage].operands.push(OperandId, "'Sampled Type'"); - InstructionDesc[OpTypeImage].operands.push(OperandDimensionality, ""); - InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'Depth'"); - InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'Arrayed'"); - InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'MS'"); - InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'Sampled'"); - InstructionDesc[OpTypeImage].operands.push(OperandSamplerImageFormat, ""); - InstructionDesc[OpTypeImage].operands.push(OperandAccessQualifier, "", true); - - InstructionDesc[OpTypeSampledImage].operands.push(OperandId, "'Image Type'"); - - InstructionDesc[OpTypeArray].operands.push(OperandId, "'Element Type'"); - InstructionDesc[OpTypeArray].operands.push(OperandId, "'Length'"); - - InstructionDesc[OpTypeRuntimeArray].capabilities.push_back(CapabilityShader); - InstructionDesc[OpTypeRuntimeArray].operands.push(OperandId, "'Element Type'"); - - InstructionDesc[OpTypeStruct].operands.push(OperandVariableIds, "'Member 0 type', +\n'member 1 type', +\n..."); - - InstructionDesc[OpTypeOpaque].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpTypeOpaque].operands.push(OperandLiteralString, "The name of the opaque type."); - - InstructionDesc[OpTypePointer].operands.push(OperandStorage, ""); - InstructionDesc[OpTypePointer].operands.push(OperandId, "'Type'"); - - InstructionDesc[OpTypeForwardPointer].capabilities.push_back(CapabilityAddresses); - InstructionDesc[OpTypeForwardPointer].operands.push(OperandId, "'Pointer Type'"); - InstructionDesc[OpTypeForwardPointer].operands.push(OperandStorage, ""); - - InstructionDesc[OpTypeEvent].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpTypeDeviceEvent].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpTypeReserveId].capabilities.push_back(CapabilityPipes); - - InstructionDesc[OpTypeQueue].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpTypePipe].operands.push(OperandAccessQualifier, "'Qualifier'"); - InstructionDesc[OpTypePipe].capabilities.push_back(CapabilityPipes); - - InstructionDesc[OpTypeFunction].operands.push(OperandId, "'Return Type'"); - InstructionDesc[OpTypeFunction].operands.push(OperandVariableIds, "'Parameter 0 Type', +\n'Parameter 1 Type', +\n..."); - - InstructionDesc[OpConstant].operands.push(OperandVariableLiterals, "'Value'"); - - InstructionDesc[OpConstantComposite].operands.push(OperandVariableIds, "'Constituents'"); - - InstructionDesc[OpConstantSampler].capabilities.push_back(CapabilityLiteralSampler); - InstructionDesc[OpConstantSampler].operands.push(OperandSamplerAddressingMode, ""); - InstructionDesc[OpConstantSampler].operands.push(OperandLiteralNumber, "'Param'"); - InstructionDesc[OpConstantSampler].operands.push(OperandSamplerFilterMode, ""); - - InstructionDesc[OpSpecConstant].operands.push(OperandVariableLiterals, "'Value'"); - - InstructionDesc[OpSpecConstantComposite].operands.push(OperandVariableIds, "'Constituents'"); - - InstructionDesc[OpSpecConstantOp].operands.push(OperandLiteralNumber, "'Opcode'"); - InstructionDesc[OpSpecConstantOp].operands.push(OperandVariableIds, "'Operands'"); - - InstructionDesc[OpVariable].operands.push(OperandStorage, ""); - InstructionDesc[OpVariable].operands.push(OperandId, "'Initializer'", true); - - InstructionDesc[OpFunction].operands.push(OperandFunction, ""); - InstructionDesc[OpFunction].operands.push(OperandId, "'Function Type'"); - - InstructionDesc[OpFunctionCall].operands.push(OperandId, "'Function'"); - InstructionDesc[OpFunctionCall].operands.push(OperandVariableIds, "'Argument 0', +\n'Argument 1', +\n..."); - - InstructionDesc[OpExtInst].operands.push(OperandId, "'Set'"); - InstructionDesc[OpExtInst].operands.push(OperandLiteralNumber, "'Instruction'"); - InstructionDesc[OpExtInst].operands.push(OperandVariableIds, "'Operand 1', +\n'Operand 2', +\n..."); - - InstructionDesc[OpLoad].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpLoad].operands.push(OperandMemoryAccess, "", true); - - InstructionDesc[OpStore].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpStore].operands.push(OperandId, "'Object'"); - InstructionDesc[OpStore].operands.push(OperandMemoryAccess, "", true); - - InstructionDesc[OpPhi].operands.push(OperandVariableIds, "'Variable, Parent, ...'"); - - InstructionDesc[OpDecorate].operands.push(OperandId, "'Target'"); - InstructionDesc[OpDecorate].operands.push(OperandDecoration, ""); - InstructionDesc[OpDecorate].operands.push(OperandVariableLiterals, "See <>."); - - InstructionDesc[OpMemberDecorate].operands.push(OperandId, "'Structure Type'"); - InstructionDesc[OpMemberDecorate].operands.push(OperandLiteralNumber, "'Member'"); - InstructionDesc[OpMemberDecorate].operands.push(OperandDecoration, ""); - InstructionDesc[OpMemberDecorate].operands.push(OperandVariableLiterals, "See <>."); - - InstructionDesc[OpGroupDecorate].operands.push(OperandId, "'Decoration Group'"); - InstructionDesc[OpGroupDecorate].operands.push(OperandVariableIds, "'Targets'"); - - InstructionDesc[OpGroupMemberDecorate].operands.push(OperandId, "'Decoration Group'"); - InstructionDesc[OpGroupMemberDecorate].operands.push(OperandVariableIdLiteral, "'Targets'"); - - InstructionDesc[OpVectorExtractDynamic].operands.push(OperandId, "'Vector'"); - InstructionDesc[OpVectorExtractDynamic].operands.push(OperandId, "'Index'"); - - InstructionDesc[OpVectorInsertDynamic].operands.push(OperandId, "'Vector'"); - InstructionDesc[OpVectorInsertDynamic].operands.push(OperandId, "'Component'"); - InstructionDesc[OpVectorInsertDynamic].operands.push(OperandId, "'Index'"); - - InstructionDesc[OpVectorShuffle].operands.push(OperandId, "'Vector 1'"); - InstructionDesc[OpVectorShuffle].operands.push(OperandId, "'Vector 2'"); - InstructionDesc[OpVectorShuffle].operands.push(OperandVariableLiterals, "'Components'"); - - InstructionDesc[OpCompositeConstruct].operands.push(OperandVariableIds, "'Constituents'"); - - InstructionDesc[OpCompositeExtract].operands.push(OperandId, "'Composite'"); - InstructionDesc[OpCompositeExtract].operands.push(OperandVariableLiterals, "'Indexes'"); - - InstructionDesc[OpCompositeInsert].operands.push(OperandId, "'Object'"); - InstructionDesc[OpCompositeInsert].operands.push(OperandId, "'Composite'"); - InstructionDesc[OpCompositeInsert].operands.push(OperandVariableLiterals, "'Indexes'"); - - InstructionDesc[OpCopyObject].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpCopyMemory].operands.push(OperandId, "'Target'"); - InstructionDesc[OpCopyMemory].operands.push(OperandId, "'Source'"); - InstructionDesc[OpCopyMemory].operands.push(OperandMemoryAccess, "", true); - - InstructionDesc[OpCopyMemorySized].operands.push(OperandId, "'Target'"); - InstructionDesc[OpCopyMemorySized].operands.push(OperandId, "'Source'"); - InstructionDesc[OpCopyMemorySized].operands.push(OperandId, "'Size'"); - InstructionDesc[OpCopyMemorySized].operands.push(OperandMemoryAccess, "", true); - - InstructionDesc[OpCopyMemorySized].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpSampledImage].operands.push(OperandId, "'Image'"); - InstructionDesc[OpSampledImage].operands.push(OperandId, "'Sampler'"); - - InstructionDesc[OpImage].operands.push(OperandId, "'Sampled Image'"); - - InstructionDesc[OpImageRead].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageRead].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageRead].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageRead].operands.push(OperandVariableIds, "", true); - - InstructionDesc[OpImageWrite].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageWrite].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageWrite].operands.push(OperandId, "'Texel'"); - InstructionDesc[OpImageWrite].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageWrite].operands.push(OperandVariableIds, "", true); - - InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleImplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandVariableIds, "", true); - - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleDrefImplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleDrefExplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleProjImplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleProjExplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleProjDrefImplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleProjDrefExplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageFetch].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageFetch].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageFetch].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageFetch].operands.push(OperandVariableIds, "", true); - - InstructionDesc[OpImageGather].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageGather].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageGather].operands.push(OperandId, "'Component'"); - InstructionDesc[OpImageGather].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageGather].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageGather].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageDrefGather].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageDrefGather].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageDrefGather].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageDrefGather].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageDrefGather].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageDrefGather].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleImplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleExplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleProjImplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleProjExplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseFetch].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageSparseFetch].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseFetch].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseFetch].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseFetch].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseGather].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseGather].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseGather].operands.push(OperandId, "'Component'"); - InstructionDesc[OpImageSparseGather].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseGather].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseGather].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseDrefGather].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseRead].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageSparseRead].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseRead].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseRead].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseRead].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseTexelsResident].operands.push(OperandId, "'Resident Code'"); - InstructionDesc[OpImageSparseTexelsResident].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageQuerySizeLod].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQuerySizeLod].operands.push(OperandId, "'Level of Detail'"); - InstructionDesc[OpImageQuerySizeLod].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpImageQuerySizeLod].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQuerySize].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQuerySize].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpImageQuerySize].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQueryLod].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQueryLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageQueryLod].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQueryLevels].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQueryLevels].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpImageQueryLevels].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQuerySamples].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQuerySamples].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpImageQuerySamples].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQueryFormat].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQueryFormat].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpImageQueryOrder].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQueryOrder].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpAccessChain].operands.push(OperandId, "'Base'"); - InstructionDesc[OpAccessChain].operands.push(OperandVariableIds, "'Indexes'"); - - InstructionDesc[OpInBoundsAccessChain].operands.push(OperandId, "'Base'"); - InstructionDesc[OpInBoundsAccessChain].operands.push(OperandVariableIds, "'Indexes'"); - - InstructionDesc[OpPtrAccessChain].operands.push(OperandId, "'Base'"); - InstructionDesc[OpPtrAccessChain].operands.push(OperandId, "'Element'"); - InstructionDesc[OpPtrAccessChain].operands.push(OperandVariableIds, "'Indexes'"); - InstructionDesc[OpPtrAccessChain].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpInBoundsPtrAccessChain].operands.push(OperandId, "'Base'"); - InstructionDesc[OpInBoundsPtrAccessChain].operands.push(OperandId, "'Element'"); - InstructionDesc[OpInBoundsPtrAccessChain].operands.push(OperandVariableIds, "'Indexes'"); - InstructionDesc[OpInBoundsPtrAccessChain].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpSNegate].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpFNegate].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpNot].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpAny].operands.push(OperandId, "'Vector'"); - - InstructionDesc[OpAll].operands.push(OperandId, "'Vector'"); - - InstructionDesc[OpConvertFToU].operands.push(OperandId, "'Float Value'"); - - InstructionDesc[OpConvertFToS].operands.push(OperandId, "'Float Value'"); - - InstructionDesc[OpConvertSToF].operands.push(OperandId, "'Signed Value'"); - - InstructionDesc[OpConvertUToF].operands.push(OperandId, "'Unsigned Value'"); - - InstructionDesc[OpUConvert].operands.push(OperandId, "'Unsigned Value'"); - - InstructionDesc[OpSConvert].operands.push(OperandId, "'Signed Value'"); - - InstructionDesc[OpFConvert].operands.push(OperandId, "'Float Value'"); - - InstructionDesc[OpSatConvertSToU].operands.push(OperandId, "'Signed Value'"); - InstructionDesc[OpSatConvertSToU].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpSatConvertUToS].operands.push(OperandId, "'Unsigned Value'"); - InstructionDesc[OpSatConvertUToS].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpConvertPtrToU].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpConvertPtrToU].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpConvertUToPtr].operands.push(OperandId, "'Integer Value'"); - InstructionDesc[OpConvertUToPtr].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpPtrCastToGeneric].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpPtrCastToGeneric].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpGenericCastToPtr].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpGenericCastToPtr].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpGenericCastToPtrExplicit].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpGenericCastToPtrExplicit].operands.push(OperandStorage, "'Storage'"); - InstructionDesc[OpGenericCastToPtrExplicit].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpGenericPtrMemSemantics].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpGenericPtrMemSemantics].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpBitcast].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpQuantizeToF16].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpTranspose].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpTranspose].operands.push(OperandId, "'Matrix'"); - - InstructionDesc[OpIsNan].operands.push(OperandId, "'x'"); - - InstructionDesc[OpIsInf].operands.push(OperandId, "'x'"); - - InstructionDesc[OpIsFinite].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpIsFinite].operands.push(OperandId, "'x'"); - - InstructionDesc[OpIsNormal].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpIsNormal].operands.push(OperandId, "'x'"); - - InstructionDesc[OpSignBitSet].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpSignBitSet].operands.push(OperandId, "'x'"); - - InstructionDesc[OpLessOrGreater].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpLessOrGreater].operands.push(OperandId, "'x'"); - InstructionDesc[OpLessOrGreater].operands.push(OperandId, "'y'"); - - InstructionDesc[OpOrdered].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpOrdered].operands.push(OperandId, "'x'"); - InstructionDesc[OpOrdered].operands.push(OperandId, "'y'"); - - InstructionDesc[OpUnordered].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpUnordered].operands.push(OperandId, "'x'"); - InstructionDesc[OpUnordered].operands.push(OperandId, "'y'"); - - InstructionDesc[OpArrayLength].operands.push(OperandId, "'Structure'"); - InstructionDesc[OpArrayLength].operands.push(OperandLiteralNumber, "'Array member'"); - InstructionDesc[OpArrayLength].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpIAdd].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpIAdd].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFAdd].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFAdd].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpISub].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpISub].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFSub].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFSub].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpIMul].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpIMul].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFMul].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFMul].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUDiv].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUDiv].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSDiv].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSDiv].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFDiv].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFDiv].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUMod].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUMod].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSRem].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSRem].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSMod].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSMod].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFRem].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFRem].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFMod].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFMod].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpVectorTimesScalar].operands.push(OperandId, "'Vector'"); - InstructionDesc[OpVectorTimesScalar].operands.push(OperandId, "'Scalar'"); - - InstructionDesc[OpMatrixTimesScalar].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpMatrixTimesScalar].operands.push(OperandId, "'Matrix'"); - InstructionDesc[OpMatrixTimesScalar].operands.push(OperandId, "'Scalar'"); - - InstructionDesc[OpVectorTimesMatrix].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpVectorTimesMatrix].operands.push(OperandId, "'Vector'"); - InstructionDesc[OpVectorTimesMatrix].operands.push(OperandId, "'Matrix'"); - - InstructionDesc[OpMatrixTimesVector].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpMatrixTimesVector].operands.push(OperandId, "'Matrix'"); - InstructionDesc[OpMatrixTimesVector].operands.push(OperandId, "'Vector'"); - - InstructionDesc[OpMatrixTimesMatrix].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpMatrixTimesMatrix].operands.push(OperandId, "'LeftMatrix'"); - InstructionDesc[OpMatrixTimesMatrix].operands.push(OperandId, "'RightMatrix'"); - - InstructionDesc[OpOuterProduct].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpOuterProduct].operands.push(OperandId, "'Vector 1'"); - InstructionDesc[OpOuterProduct].operands.push(OperandId, "'Vector 2'"); - - InstructionDesc[OpDot].operands.push(OperandId, "'Vector 1'"); - InstructionDesc[OpDot].operands.push(OperandId, "'Vector 2'"); - - InstructionDesc[OpIAddCarry].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpIAddCarry].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpISubBorrow].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpISubBorrow].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUMulExtended].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUMulExtended].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSMulExtended].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSMulExtended].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpShiftRightLogical].operands.push(OperandId, "'Base'"); - InstructionDesc[OpShiftRightLogical].operands.push(OperandId, "'Shift'"); - - InstructionDesc[OpShiftRightArithmetic].operands.push(OperandId, "'Base'"); - InstructionDesc[OpShiftRightArithmetic].operands.push(OperandId, "'Shift'"); - - InstructionDesc[OpShiftLeftLogical].operands.push(OperandId, "'Base'"); - InstructionDesc[OpShiftLeftLogical].operands.push(OperandId, "'Shift'"); - - InstructionDesc[OpLogicalOr].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpLogicalOr].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpLogicalAnd].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpLogicalAnd].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpLogicalEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpLogicalEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpLogicalNotEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpLogicalNotEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpLogicalNot].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpBitwiseOr].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpBitwiseOr].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpBitwiseXor].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpBitwiseXor].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpBitwiseAnd].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpBitwiseAnd].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpBitFieldInsert].capabilities.push_back(CapabilityShader); - InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Base'"); - InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Insert'"); - InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Offset'"); - InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Count'"); - - InstructionDesc[OpBitFieldSExtract].capabilities.push_back(CapabilityShader); - InstructionDesc[OpBitFieldSExtract].operands.push(OperandId, "'Base'"); - InstructionDesc[OpBitFieldSExtract].operands.push(OperandId, "'Offset'"); - InstructionDesc[OpBitFieldSExtract].operands.push(OperandId, "'Count'"); - - InstructionDesc[OpBitFieldUExtract].capabilities.push_back(CapabilityShader); - InstructionDesc[OpBitFieldUExtract].operands.push(OperandId, "'Base'"); - InstructionDesc[OpBitFieldUExtract].operands.push(OperandId, "'Offset'"); - InstructionDesc[OpBitFieldUExtract].operands.push(OperandId, "'Count'"); - - InstructionDesc[OpBitReverse].capabilities.push_back(CapabilityShader); - InstructionDesc[OpBitReverse].operands.push(OperandId, "'Base'"); - - InstructionDesc[OpBitCount].operands.push(OperandId, "'Base'"); - - InstructionDesc[OpSelect].operands.push(OperandId, "'Condition'"); - InstructionDesc[OpSelect].operands.push(OperandId, "'Object 1'"); - InstructionDesc[OpSelect].operands.push(OperandId, "'Object 2'"); - - InstructionDesc[OpIEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpIEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpINotEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpINotEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdNotEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdNotEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordNotEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordNotEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpULessThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpULessThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSLessThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSLessThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdLessThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdLessThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordLessThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordLessThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUGreaterThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUGreaterThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSGreaterThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSGreaterThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdGreaterThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdGreaterThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordGreaterThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordGreaterThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpULessThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpULessThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSLessThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSLessThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdLessThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdLessThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordLessThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordLessThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpDPdx].capabilities.push_back(CapabilityShader); - InstructionDesc[OpDPdx].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdy].capabilities.push_back(CapabilityShader); - InstructionDesc[OpDPdy].operands.push(OperandId, "'P'"); - - InstructionDesc[OpFwidth].capabilities.push_back(CapabilityShader); - InstructionDesc[OpFwidth].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdxFine].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpDPdxFine].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdyFine].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpDPdyFine].operands.push(OperandId, "'P'"); - - InstructionDesc[OpFwidthFine].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpFwidthFine].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdxCoarse].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpDPdxCoarse].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdyCoarse].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpDPdyCoarse].operands.push(OperandId, "'P'"); - - InstructionDesc[OpFwidthCoarse].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpFwidthCoarse].operands.push(OperandId, "'P'"); - - InstructionDesc[OpEmitVertex].capabilities.push_back(CapabilityGeometry); - - InstructionDesc[OpEndPrimitive].capabilities.push_back(CapabilityGeometry); - - InstructionDesc[OpEmitStreamVertex].operands.push(OperandId, "'Stream'"); - InstructionDesc[OpEmitStreamVertex].capabilities.push_back(CapabilityGeometryStreams); - - InstructionDesc[OpEndStreamPrimitive].operands.push(OperandId, "'Stream'"); - InstructionDesc[OpEndStreamPrimitive].capabilities.push_back(CapabilityGeometryStreams); - - InstructionDesc[OpControlBarrier].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpControlBarrier].operands.push(OperandScope, "'Memory'"); - InstructionDesc[OpControlBarrier].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpMemoryBarrier].operands.push(OperandScope, "'Memory'"); - InstructionDesc[OpMemoryBarrier].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpImageTexelPointer].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageTexelPointer].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageTexelPointer].operands.push(OperandId, "'Sample'"); - - InstructionDesc[OpAtomicLoad].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicLoad].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicLoad].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpAtomicStore].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicStore].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicStore].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicStore].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicExchange].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicExchange].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicExchange].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicExchange].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandMemorySemantics, "'Equal'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandMemorySemantics, "'Unequal'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandId, "'Value'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandId, "'Comparator'"); - - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandMemorySemantics, "'Equal'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandMemorySemantics, "'Unequal'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandId, "'Value'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandId, "'Comparator'"); - InstructionDesc[OpAtomicCompareExchangeWeak].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpAtomicIIncrement].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicIIncrement].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicIIncrement].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpAtomicIDecrement].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicIDecrement].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicIDecrement].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpAtomicIAdd].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicIAdd].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicIAdd].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicIAdd].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicISub].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicISub].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicISub].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicISub].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicUMin].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicUMin].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicUMin].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicUMin].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicUMax].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicUMax].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicUMax].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicUMax].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicSMin].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicSMin].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicSMin].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicSMin].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicSMax].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicSMax].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicSMax].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicSMax].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicAnd].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicAnd].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicAnd].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicAnd].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicOr].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicOr].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicOr].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicOr].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicXor].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicXor].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicXor].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicXor].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicFlagTestAndSet].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicFlagTestAndSet].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicFlagTestAndSet].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicFlagTestAndSet].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpAtomicFlagClear].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicFlagClear].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicFlagClear].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicFlagClear].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpLoopMerge].operands.push(OperandId, "'Merge Block'"); - InstructionDesc[OpLoopMerge].operands.push(OperandId, "'Continue Target'"); - InstructionDesc[OpLoopMerge].operands.push(OperandLoop, ""); - InstructionDesc[OpLoopMerge].operands.push(OperandOptionalLiteral, ""); - - InstructionDesc[OpSelectionMerge].operands.push(OperandId, "'Merge Block'"); - InstructionDesc[OpSelectionMerge].operands.push(OperandSelect, ""); - - InstructionDesc[OpBranch].operands.push(OperandId, "'Target Label'"); - - InstructionDesc[OpBranchConditional].operands.push(OperandId, "'Condition'"); - InstructionDesc[OpBranchConditional].operands.push(OperandId, "'True Label'"); - InstructionDesc[OpBranchConditional].operands.push(OperandId, "'False Label'"); - InstructionDesc[OpBranchConditional].operands.push(OperandVariableLiterals, "'Branch weights'"); - - InstructionDesc[OpSwitch].operands.push(OperandId, "'Selector'"); - InstructionDesc[OpSwitch].operands.push(OperandId, "'Default'"); - InstructionDesc[OpSwitch].operands.push(OperandVariableLiteralId, "'Target'"); - - InstructionDesc[OpKill].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpReturnValue].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpLifetimeStart].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpLifetimeStart].operands.push(OperandLiteralNumber, "'Size'"); - InstructionDesc[OpLifetimeStart].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpLifetimeStop].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpLifetimeStop].operands.push(OperandLiteralNumber, "'Size'"); - InstructionDesc[OpLifetimeStop].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpGroupAsyncCopy].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Destination'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Source'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Num Elements'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Stride'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Event'"); - - InstructionDesc[OpGroupWaitEvents].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpGroupWaitEvents].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupWaitEvents].operands.push(OperandId, "'Num Events'"); - InstructionDesc[OpGroupWaitEvents].operands.push(OperandId, "'Events List'"); - - InstructionDesc[OpGroupAll].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupAll].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupAll].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpGroupAny].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupAny].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupAny].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpGroupBroadcast].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupBroadcast].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupBroadcast].operands.push(OperandId, "'Value'"); - InstructionDesc[OpGroupBroadcast].operands.push(OperandId, "'LocalId'"); - - InstructionDesc[OpGroupIAdd].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupIAdd].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupIAdd].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupIAdd].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupFAdd].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFAdd].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFAdd].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFAdd].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupUMin].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupUMin].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupUMin].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupUMin].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupSMin].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupSMin].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupSMin].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupSMin].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupFMin].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFMin].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFMin].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFMin].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupUMax].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupUMax].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupUMax].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupUMax].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupSMax].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupSMax].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupSMax].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupSMax].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupFMax].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFMax].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFMax].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFMax].operands.push(OperandId, "X"); - - InstructionDesc[OpReadPipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReadPipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReadPipe].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpReadPipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReadPipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpWritePipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpWritePipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpWritePipe].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpWritePipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpWritePipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpReservedReadPipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Index'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpReservedWritePipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Index'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpReserveReadPipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Num Packets'"); - InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpReserveWritePipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Num Packets'"); - InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpCommitReadPipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpCommitWritePipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpIsValidReserveId].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpIsValidReserveId].operands.push(OperandId, "'Reserve Id'"); - - InstructionDesc[OpGetNumPipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGetNumPipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGetNumPipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGetNumPipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGetMaxPipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGetMaxPipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGetMaxPipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGetMaxPipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGroupReserveReadPipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Num Packets'"); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGroupReserveWritePipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Num Packets'"); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGroupCommitReadPipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGroupCommitWritePipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpBuildNDRange].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpBuildNDRange].operands.push(OperandId, "'GlobalWorkSize'"); - InstructionDesc[OpBuildNDRange].operands.push(OperandId, "'LocalWorkSize'"); - InstructionDesc[OpBuildNDRange].operands.push(OperandId, "'GlobalWorkOffset'"); - - InstructionDesc[OpGetDefaultQueue].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpCaptureEventProfilingInfo].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpCaptureEventProfilingInfo].operands.push(OperandId, "'Event'"); - InstructionDesc[OpCaptureEventProfilingInfo].operands.push(OperandId, "'Profiling Info'"); - InstructionDesc[OpCaptureEventProfilingInfo].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpSetUserEventStatus].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpSetUserEventStatus].operands.push(OperandId, "'Event'"); - InstructionDesc[OpSetUserEventStatus].operands.push(OperandId, "'Status'"); - - InstructionDesc[OpIsValidEvent].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpIsValidEvent].operands.push(OperandId, "'Event'"); - - InstructionDesc[OpCreateUserEvent].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpRetainEvent].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpRetainEvent].operands.push(OperandId, "'Event'"); - - InstructionDesc[OpReleaseEvent].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpReleaseEvent].operands.push(OperandId, "'Event'"); - - InstructionDesc[OpGetKernelWorkGroupSize].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Param'"); - InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Param Align'"); - - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Param'"); - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Param Align'"); - - InstructionDesc[OpGetKernelNDrangeSubGroupCount].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'ND Range'"); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Param'"); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Param Align'"); - - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'ND Range'"); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Param'"); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Param Align'"); - - InstructionDesc[OpEnqueueKernel].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Queue'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Flags'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'ND Range'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Num Events'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Wait Events'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Ret Event'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Param'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Param Align'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandVariableIds, "'Local Size'"); - - InstructionDesc[OpEnqueueMarker].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Queue'"); - InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Num Events'"); - InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Wait Events'"); - InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Ret Event'"); - - InstructionDesc[OpSubgroupBallotKHR].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpSubgroupFirstInvocationKHR].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpSubgroupAnyKHR].capabilities.push_back(CapabilitySubgroupVoteKHR); - InstructionDesc[OpSubgroupAnyKHR].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpSubgroupAnyKHR].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpSubgroupAllKHR].capabilities.push_back(CapabilitySubgroupVoteKHR); - InstructionDesc[OpSubgroupAllKHR].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpSubgroupAllKHR].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpSubgroupAllEqualKHR].capabilities.push_back(CapabilitySubgroupVoteKHR); - InstructionDesc[OpSubgroupAllEqualKHR].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpSubgroupAllEqualKHR].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpSubgroupReadInvocationKHR].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpSubgroupReadInvocationKHR].operands.push(OperandId, "'Value'"); - InstructionDesc[OpSubgroupReadInvocationKHR].operands.push(OperandId, "'Index'"); - -#ifdef AMD_EXTENSIONS - InstructionDesc[OpGroupIAddNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupIAddNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupIAddNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupIAddNonUniformAMD].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupFAddNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFAddNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFAddNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFAddNonUniformAMD].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupUMinNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupUMinNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupUMinNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupUMinNonUniformAMD].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupSMinNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupSMinNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupSMinNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupSMinNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupFMinNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFMinNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFMinNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFMinNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupUMaxNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupUMaxNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupUMaxNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupUMaxNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupSMaxNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupSMaxNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupSMaxNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupSMaxNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupFMaxNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFMaxNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFMaxNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFMaxNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpFragmentMaskFetchAMD].capabilities.push_back(CapabilityFragmentMaskAMD); - InstructionDesc[OpFragmentMaskFetchAMD].operands.push(OperandId, "'Image'"); - InstructionDesc[OpFragmentMaskFetchAMD].operands.push(OperandId, "'Coordinate'"); - - InstructionDesc[OpFragmentFetchAMD].capabilities.push_back(CapabilityFragmentMaskAMD); - InstructionDesc[OpFragmentFetchAMD].operands.push(OperandId, "'Image'"); - InstructionDesc[OpFragmentFetchAMD].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpFragmentFetchAMD].operands.push(OperandId, "'Fragment Index'"); -#endif -} - -}; // end spv namespace diff --git a/third_party/glslang-spirv/doc.h b/third_party/glslang-spirv/doc.h deleted file mode 100644 index 710ca1a52..000000000 --- a/third_party/glslang-spirv/doc.h +++ /dev/null @@ -1,262 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Parameterize the SPIR-V enumerants. -// - -#pragma once - -#include "spirv.hpp" - -#include - -namespace spv { - -// Fill in all the parameters -void Parameterize(); - -// Return the English names of all the enums. -const char* SourceString(int); -const char* AddressingString(int); -const char* MemoryString(int); -const char* ExecutionModelString(int); -const char* ExecutionModeString(int); -const char* StorageClassString(int); -const char* DecorationString(int); -const char* BuiltInString(int); -const char* DimensionString(int); -const char* SelectControlString(int); -const char* LoopControlString(int); -const char* FunctionControlString(int); -const char* SamplerAddressingModeString(int); -const char* SamplerFilterModeString(int); -const char* ImageFormatString(int); -const char* ImageChannelOrderString(int); -const char* ImageChannelTypeString(int); -const char* ImageChannelDataTypeString(int type); -const char* ImageOperandsString(int format); -const char* ImageOperands(int); -const char* FPFastMathString(int); -const char* FPRoundingModeString(int); -const char* LinkageTypeString(int); -const char* FuncParamAttrString(int); -const char* AccessQualifierString(int); -const char* MemorySemanticsString(int); -const char* MemoryAccessString(int); -const char* ExecutionScopeString(int); -const char* GroupOperationString(int); -const char* KernelEnqueueFlagsString(int); -const char* KernelProfilingInfoString(int); -const char* CapabilityString(int); -const char* OpcodeString(int); -const char* ScopeString(int mem); - -// For grouping opcodes into subsections -enum OpcodeClass { - OpClassMisc, - OpClassDebug, - OpClassAnnotate, - OpClassExtension, - OpClassMode, - OpClassType, - OpClassConstant, - OpClassMemory, - OpClassFunction, - OpClassImage, - OpClassConvert, - OpClassComposite, - OpClassArithmetic, - OpClassBit, - OpClassRelationalLogical, - OpClassDerivative, - OpClassFlowControl, - OpClassAtomic, - OpClassPrimitive, - OpClassBarrier, - OpClassGroup, - OpClassDeviceSideEnqueue, - OpClassPipe, - - OpClassCount, - OpClassMissing // all instructions start out as missing -}; - -// For parameterizing operands. -enum OperandClass { - OperandNone, - OperandId, - OperandVariableIds, - OperandOptionalLiteral, - OperandOptionalLiteralString, - OperandVariableLiterals, - OperandVariableIdLiteral, - OperandVariableLiteralId, - OperandLiteralNumber, - OperandLiteralString, - OperandSource, - OperandExecutionModel, - OperandAddressing, - OperandMemory, - OperandExecutionMode, - OperandStorage, - OperandDimensionality, - OperandSamplerAddressingMode, - OperandSamplerFilterMode, - OperandSamplerImageFormat, - OperandImageChannelOrder, - OperandImageChannelDataType, - OperandImageOperands, - OperandFPFastMath, - OperandFPRoundingMode, - OperandLinkageType, - OperandAccessQualifier, - OperandFuncParamAttr, - OperandDecoration, - OperandBuiltIn, - OperandSelect, - OperandLoop, - OperandFunction, - OperandMemorySemantics, - OperandMemoryAccess, - OperandScope, - OperandGroupOperation, - OperandKernelEnqueueFlags, - OperandKernelProfilingInfo, - OperandCapability, - - OperandOpcode, - - OperandCount -}; - -// Any specific enum can have a set of capabilities that allow it: -typedef std::vector EnumCaps; - -// Parameterize a set of operands with their OperandClass(es) and descriptions. -class OperandParameters { -public: - OperandParameters() { } - void push(OperandClass oc, const char* d, bool opt = false) - { - opClass.push_back(oc); - desc.push_back(d); - optional.push_back(opt); - } - void setOptional(); - OperandClass getClass(int op) const { return opClass[op]; } - const char* getDesc(int op) const { return desc[op]; } - bool isOptional(int op) const { return optional[op]; } - int getNum() const { return (int)opClass.size(); } - -protected: - std::vector opClass; - std::vector desc; - std::vector optional; -}; - -// Parameterize an enumerant -class EnumParameters { -public: - EnumParameters() : desc(0) { } - EnumCaps caps; - const char* desc; -}; - -// Parameterize a set of enumerants that form an enum -class EnumDefinition : public EnumParameters { -public: - EnumDefinition() : - ceiling(0), bitmask(false), getName(0), enumParams(0), operandParams(0) { } - void set(int ceil, const char* (*name)(int), EnumParameters* ep, bool mask = false) - { - ceiling = ceil; - getName = name; - bitmask = mask; - enumParams = ep; - } - void setOperands(OperandParameters* op) { operandParams = op; } - int ceiling; // ceiling of enumerants - bool bitmask; // true if these enumerants combine into a bitmask - const char* (*getName)(int); // a function that returns the name for each enumerant value (or shift) - EnumParameters* enumParams; // parameters for each individual enumerant - OperandParameters* operandParams; // sets of operands -}; - -// Parameterize an instruction's logical format, including its known set of operands, -// per OperandParameters above. -class InstructionParameters { -public: - InstructionParameters() : - opDesc("TBD"), - opClass(OpClassMissing), - typePresent(true), // most normal, only exceptions have to be spelled out - resultPresent(true) // most normal, only exceptions have to be spelled out - { } - - void setResultAndType(bool r, bool t) - { - resultPresent = r; - typePresent = t; - } - - bool hasResult() const { return resultPresent != 0; } - bool hasType() const { return typePresent != 0; } - - const char* opDesc; - EnumCaps capabilities; - OpcodeClass opClass; - OperandParameters operands; - -protected: - int typePresent : 1; - int resultPresent : 1; -}; - -const int OpcodeCeiling = 321; - -// The set of objects that hold all the instruction/operand -// parameterization information. -extern InstructionParameters InstructionDesc[]; - -// These hold definitions of the enumerants used for operands -extern EnumDefinition OperandClassParams[]; - -const char* GetOperandDesc(OperandClass operand); -void PrintImmediateRow(int imm, const char* name, const EnumParameters* enumParams, bool caps, bool hex = false); -const char* AccessQualifierString(int attr); - -void PrintOperands(const OperandParameters& operands, int reservedOperands); - -}; // end namespace spv diff --git a/third_party/glslang-spirv/hex_float.h b/third_party/glslang-spirv/hex_float.h deleted file mode 100644 index 905b21a45..000000000 --- a/third_party/glslang-spirv/hex_float.h +++ /dev/null @@ -1,1078 +0,0 @@ -// Copyright (c) 2015-2016 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef LIBSPIRV_UTIL_HEX_FLOAT_H_ -#define LIBSPIRV_UTIL_HEX_FLOAT_H_ - -#include -#include -#include -#include -#include -#include -#include - -#if defined(_MSC_VER) && _MSC_VER < 1800 -namespace std { -bool isnan(double f) -{ - return ::_isnan(f) != 0; -} -bool isinf(double f) -{ - return ::_finite(f) == 0; -} -} -#endif - -#include "bitutils.h" - -namespace spvutils { - -class Float16 { - public: - Float16(uint16_t v) : val(v) {} - Float16() {} - static bool isNan(const Float16& val) { - return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) != 0); - } - // Returns true if the given value is any kind of infinity. - static bool isInfinity(const Float16& val) { - return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) == 0); - } - Float16(const Float16& other) { val = other.val; } - uint16_t get_value() const { return val; } - - // Returns the maximum normal value. - static Float16 max() { return Float16(0x7bff); } - // Returns the lowest normal value. - static Float16 lowest() { return Float16(0xfbff); } - - private: - uint16_t val; -}; - -// To specialize this type, you must override uint_type to define -// an unsigned integer that can fit your floating point type. -// You must also add a isNan function that returns true if -// a value is Nan. -template -struct FloatProxyTraits { - typedef void uint_type; -}; - -template <> -struct FloatProxyTraits { - typedef uint32_t uint_type; - static bool isNan(float f) { return std::isnan(f); } - // Returns true if the given value is any kind of infinity. - static bool isInfinity(float f) { return std::isinf(f); } - // Returns the maximum normal value. - static float max() { return std::numeric_limits::max(); } - // Returns the lowest normal value. - static float lowest() { return std::numeric_limits::lowest(); } -}; - -template <> -struct FloatProxyTraits { - typedef uint64_t uint_type; - static bool isNan(double f) { return std::isnan(f); } - // Returns true if the given value is any kind of infinity. - static bool isInfinity(double f) { return std::isinf(f); } - // Returns the maximum normal value. - static double max() { return std::numeric_limits::max(); } - // Returns the lowest normal value. - static double lowest() { return std::numeric_limits::lowest(); } -}; - -template <> -struct FloatProxyTraits { - typedef uint16_t uint_type; - static bool isNan(Float16 f) { return Float16::isNan(f); } - // Returns true if the given value is any kind of infinity. - static bool isInfinity(Float16 f) { return Float16::isInfinity(f); } - // Returns the maximum normal value. - static Float16 max() { return Float16::max(); } - // Returns the lowest normal value. - static Float16 lowest() { return Float16::lowest(); } -}; - -// Since copying a floating point number (especially if it is NaN) -// does not guarantee that bits are preserved, this class lets us -// store the type and use it as a float when necessary. -template -class FloatProxy { - public: - typedef typename FloatProxyTraits::uint_type uint_type; - - // Since this is to act similar to the normal floats, - // do not initialize the data by default. - FloatProxy() {} - - // Intentionally non-explicit. This is a proxy type so - // implicit conversions allow us to use it more transparently. - FloatProxy(T val) { data_ = BitwiseCast(val); } - - // Intentionally non-explicit. This is a proxy type so - // implicit conversions allow us to use it more transparently. - FloatProxy(uint_type val) { data_ = val; } - - // This is helpful to have and is guaranteed not to stomp bits. - FloatProxy operator-() const { - return static_cast(data_ ^ - (uint_type(0x1) << (sizeof(T) * 8 - 1))); - } - - // Returns the data as a floating point value. - T getAsFloat() const { return BitwiseCast(data_); } - - // Returns the raw data. - uint_type data() const { return data_; } - - // Returns true if the value represents any type of NaN. - bool isNan() { return FloatProxyTraits::isNan(getAsFloat()); } - // Returns true if the value represents any type of infinity. - bool isInfinity() { return FloatProxyTraits::isInfinity(getAsFloat()); } - - // Returns the maximum normal value. - static FloatProxy max() { - return FloatProxy(FloatProxyTraits::max()); - } - // Returns the lowest normal value. - static FloatProxy lowest() { - return FloatProxy(FloatProxyTraits::lowest()); - } - - private: - uint_type data_; -}; - -template -bool operator==(const FloatProxy& first, const FloatProxy& second) { - return first.data() == second.data(); -} - -// Reads a FloatProxy value as a normal float from a stream. -template -std::istream& operator>>(std::istream& is, FloatProxy& value) { - T float_val; - is >> float_val; - value = FloatProxy(float_val); - return is; -} - -// This is an example traits. It is not meant to be used in practice, but will -// be the default for any non-specialized type. -template -struct HexFloatTraits { - // Integer type that can store this hex-float. - typedef void uint_type; - // Signed integer type that can store this hex-float. - typedef void int_type; - // The numerical type that this HexFloat represents. - typedef void underlying_type; - // The type needed to construct the underlying type. - typedef void native_type; - // The number of bits that are actually relevant in the uint_type. - // This allows us to deal with, for example, 24-bit values in a 32-bit - // integer. - static const uint32_t num_used_bits = 0; - // Number of bits that represent the exponent. - static const uint32_t num_exponent_bits = 0; - // Number of bits that represent the fractional part. - static const uint32_t num_fraction_bits = 0; - // The bias of the exponent. (How much we need to subtract from the stored - // value to get the correct value.) - static const uint32_t exponent_bias = 0; -}; - -// Traits for IEEE float. -// 1 sign bit, 8 exponent bits, 23 fractional bits. -template <> -struct HexFloatTraits> { - typedef uint32_t uint_type; - typedef int32_t int_type; - typedef FloatProxy underlying_type; - typedef float native_type; - static const uint_type num_used_bits = 32; - static const uint_type num_exponent_bits = 8; - static const uint_type num_fraction_bits = 23; - static const uint_type exponent_bias = 127; -}; - -// Traits for IEEE double. -// 1 sign bit, 11 exponent bits, 52 fractional bits. -template <> -struct HexFloatTraits> { - typedef uint64_t uint_type; - typedef int64_t int_type; - typedef FloatProxy underlying_type; - typedef double native_type; - static const uint_type num_used_bits = 64; - static const uint_type num_exponent_bits = 11; - static const uint_type num_fraction_bits = 52; - static const uint_type exponent_bias = 1023; -}; - -// Traits for IEEE half. -// 1 sign bit, 5 exponent bits, 10 fractional bits. -template <> -struct HexFloatTraits> { - typedef uint16_t uint_type; - typedef int16_t int_type; - typedef uint16_t underlying_type; - typedef uint16_t native_type; - static const uint_type num_used_bits = 16; - static const uint_type num_exponent_bits = 5; - static const uint_type num_fraction_bits = 10; - static const uint_type exponent_bias = 15; -}; - -enum round_direction { - kRoundToZero, - kRoundToNearestEven, - kRoundToPositiveInfinity, - kRoundToNegativeInfinity -}; - -// Template class that houses a floating pointer number. -// It exposes a number of constants based on the provided traits to -// assist in interpreting the bits of the value. -template > -class HexFloat { - public: - typedef typename Traits::uint_type uint_type; - typedef typename Traits::int_type int_type; - typedef typename Traits::underlying_type underlying_type; - typedef typename Traits::native_type native_type; - - explicit HexFloat(T f) : value_(f) {} - - T value() const { return value_; } - void set_value(T f) { value_ = f; } - - // These are all written like this because it is convenient to have - // compile-time constants for all of these values. - - // Pass-through values to save typing. - static const uint32_t num_used_bits = Traits::num_used_bits; - static const uint32_t exponent_bias = Traits::exponent_bias; - static const uint32_t num_exponent_bits = Traits::num_exponent_bits; - static const uint32_t num_fraction_bits = Traits::num_fraction_bits; - - // Number of bits to shift left to set the highest relevant bit. - static const uint32_t top_bit_left_shift = num_used_bits - 1; - // How many nibbles (hex characters) the fractional part takes up. - static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4; - // If the fractional part does not fit evenly into a hex character (4-bits) - // then we have to left-shift to get rid of leading 0s. This is the amount - // we have to shift (might be 0). - static const uint32_t num_overflow_bits = - fraction_nibbles * 4 - num_fraction_bits; - - // The representation of the fraction, not the actual bits. This - // includes the leading bit that is usually implicit. - static const uint_type fraction_represent_mask = - spvutils::SetBits::get; - - // The topmost bit in the nibble-aligned fraction. - static const uint_type fraction_top_bit = - uint_type(1) << (num_fraction_bits + num_overflow_bits - 1); - - // The least significant bit in the exponent, which is also the bit - // immediately to the left of the significand. - static const uint_type first_exponent_bit = uint_type(1) - << (num_fraction_bits); - - // The mask for the encoded fraction. It does not include the - // implicit bit. - static const uint_type fraction_encode_mask = - spvutils::SetBits::get; - - // The bit that is used as a sign. - static const uint_type sign_mask = uint_type(1) << top_bit_left_shift; - - // The bits that represent the exponent. - static const uint_type exponent_mask = - spvutils::SetBits::get; - - // How far left the exponent is shifted. - static const uint32_t exponent_left_shift = num_fraction_bits; - - // How far from the right edge the fraction is shifted. - static const uint32_t fraction_right_shift = - static_cast(sizeof(uint_type) * 8) - num_fraction_bits; - - // The maximum representable unbiased exponent. - static const int_type max_exponent = - (exponent_mask >> num_fraction_bits) - exponent_bias; - // The minimum representable exponent for normalized numbers. - static const int_type min_exponent = -static_cast(exponent_bias); - - // Returns the bits associated with the value. - uint_type getBits() const { return spvutils::BitwiseCast(value_); } - - // Returns the bits associated with the value, without the leading sign bit. - uint_type getUnsignedBits() const { - return static_cast(spvutils::BitwiseCast(value_) & - ~sign_mask); - } - - // Returns the bits associated with the exponent, shifted to start at the - // lsb of the type. - const uint_type getExponentBits() const { - return static_cast((getBits() & exponent_mask) >> - num_fraction_bits); - } - - // Returns the exponent in unbiased form. This is the exponent in the - // human-friendly form. - const int_type getUnbiasedExponent() const { - return static_cast(getExponentBits() - exponent_bias); - } - - // Returns just the significand bits from the value. - const uint_type getSignificandBits() const { - return getBits() & fraction_encode_mask; - } - - // If the number was normalized, returns the unbiased exponent. - // If the number was denormal, normalize the exponent first. - const int_type getUnbiasedNormalizedExponent() const { - if ((getBits() & ~sign_mask) == 0) { // special case if everything is 0 - return 0; - } - int_type exp = getUnbiasedExponent(); - if (exp == min_exponent) { // We are in denorm land. - uint_type significand_bits = getSignificandBits(); - while ((significand_bits & (first_exponent_bit >> 1)) == 0) { - significand_bits = static_cast(significand_bits << 1); - exp = static_cast(exp - 1); - } - significand_bits &= fraction_encode_mask; - } - return exp; - } - - // Returns the signficand after it has been normalized. - const uint_type getNormalizedSignificand() const { - int_type unbiased_exponent = getUnbiasedNormalizedExponent(); - uint_type significand = getSignificandBits(); - for (int_type i = unbiased_exponent; i <= min_exponent; ++i) { - significand = static_cast(significand << 1); - } - significand &= fraction_encode_mask; - return significand; - } - - // Returns true if this number represents a negative value. - bool isNegative() const { return (getBits() & sign_mask) != 0; } - - // Sets this HexFloat from the individual components. - // Note this assumes EVERY significand is normalized, and has an implicit - // leading one. This means that the only way that this method will set 0, - // is if you set a number so denormalized that it underflows. - // Do not use this method with raw bits extracted from a subnormal number, - // since subnormals do not have an implicit leading 1 in the significand. - // The significand is also expected to be in the - // lowest-most num_fraction_bits of the uint_type. - // The exponent is expected to be unbiased, meaning an exponent of - // 0 actually means 0. - // If underflow_round_up is set, then on underflow, if a number is non-0 - // and would underflow, we round up to the smallest denorm. - void setFromSignUnbiasedExponentAndNormalizedSignificand( - bool negative, int_type exponent, uint_type significand, - bool round_denorm_up) { - bool significand_is_zero = significand == 0; - - if (exponent <= min_exponent) { - // If this was denormalized, then we have to shift the bit on, meaning - // the significand is not zero. - significand_is_zero = false; - significand |= first_exponent_bit; - significand = static_cast(significand >> 1); - } - - while (exponent < min_exponent) { - significand = static_cast(significand >> 1); - ++exponent; - } - - if (exponent == min_exponent) { - if (significand == 0 && !significand_is_zero && round_denorm_up) { - significand = static_cast(0x1); - } - } - - uint_type new_value = 0; - if (negative) { - new_value = static_cast(new_value | sign_mask); - } - exponent = static_cast(exponent + exponent_bias); - assert(exponent >= 0); - - // put it all together - exponent = static_cast((exponent << exponent_left_shift) & - exponent_mask); - significand = static_cast(significand & fraction_encode_mask); - new_value = static_cast(new_value | (exponent | significand)); - value_ = BitwiseCast(new_value); - } - - // Increments the significand of this number by the given amount. - // If this would spill the significand into the implicit bit, - // carry is set to true and the significand is shifted to fit into - // the correct location, otherwise carry is set to false. - // All significands and to_increment are assumed to be within the bounds - // for a valid significand. - static uint_type incrementSignificand(uint_type significand, - uint_type to_increment, bool* carry) { - significand = static_cast(significand + to_increment); - *carry = false; - if (significand & first_exponent_bit) { - *carry = true; - // The implicit 1-bit will have carried, so we should zero-out the - // top bit and shift back. - significand = static_cast(significand & ~first_exponent_bit); - significand = static_cast(significand >> 1); - } - return significand; - } - - // These exist because MSVC throws warnings on negative right-shifts - // even if they are not going to be executed. Eg: - // constant_number < 0? 0: constant_number - // These convert the negative left-shifts into right shifts. - - template - uint_type negatable_left_shift(int_type N, uint_type val) - { - if(N >= 0) - return val << N; - - return val >> -N; - } - - template - uint_type negatable_right_shift(int_type N, uint_type val) - { - if(N >= 0) - return val >> N; - - return val << -N; - } - - // Returns the significand, rounded to fit in a significand in - // other_T. This is shifted so that the most significant - // bit of the rounded number lines up with the most significant bit - // of the returned significand. - template - typename other_T::uint_type getRoundedNormalizedSignificand( - round_direction dir, bool* carry_bit) { - typedef typename other_T::uint_type other_uint_type; - static const int_type num_throwaway_bits = - static_cast(num_fraction_bits) - - static_cast(other_T::num_fraction_bits); - - static const uint_type last_significant_bit = - (num_throwaway_bits < 0) - ? 0 - : negatable_left_shift(num_throwaway_bits, 1u); - static const uint_type first_rounded_bit = - (num_throwaway_bits < 1) - ? 0 - : negatable_left_shift(num_throwaway_bits - 1, 1u); - - static const uint_type throwaway_mask_bits = - num_throwaway_bits > 0 ? num_throwaway_bits : 0; - static const uint_type throwaway_mask = - spvutils::SetBits::get; - - *carry_bit = false; - other_uint_type out_val = 0; - uint_type significand = getNormalizedSignificand(); - // If we are up-casting, then we just have to shift to the right location. - if (num_throwaway_bits <= 0) { - out_val = static_cast(significand); - uint_type shift_amount = static_cast(-num_throwaway_bits); - out_val = static_cast(out_val << shift_amount); - return out_val; - } - - // If every non-representable bit is 0, then we don't have any casting to - // do. - if ((significand & throwaway_mask) == 0) { - return static_cast( - negatable_right_shift(num_throwaway_bits, significand)); - } - - bool round_away_from_zero = false; - // We actually have to narrow the significand here, so we have to follow the - // rounding rules. - switch (dir) { - case kRoundToZero: - break; - case kRoundToPositiveInfinity: - round_away_from_zero = !isNegative(); - break; - case kRoundToNegativeInfinity: - round_away_from_zero = isNegative(); - break; - case kRoundToNearestEven: - // Have to round down, round bit is 0 - if ((first_rounded_bit & significand) == 0) { - break; - } - if (((significand & throwaway_mask) & ~first_rounded_bit) != 0) { - // If any subsequent bit of the rounded portion is non-0 then we round - // up. - round_away_from_zero = true; - break; - } - // We are exactly half-way between 2 numbers, pick even. - if ((significand & last_significant_bit) != 0) { - // 1 for our last bit, round up. - round_away_from_zero = true; - break; - } - break; - } - - if (round_away_from_zero) { - return static_cast( - negatable_right_shift(num_throwaway_bits, incrementSignificand( - significand, last_significant_bit, carry_bit))); - } else { - return static_cast( - negatable_right_shift(num_throwaway_bits, significand)); - } - } - - // Casts this value to another HexFloat. If the cast is widening, - // then round_dir is ignored. If the cast is narrowing, then - // the result is rounded in the direction specified. - // This number will retain Nan and Inf values. - // It will also saturate to Inf if the number overflows, and - // underflow to (0 or min depending on rounding) if the number underflows. - template - void castTo(other_T& other, round_direction round_dir) { - other = other_T(static_cast(0)); - bool negate = isNegative(); - if (getUnsignedBits() == 0) { - if (negate) { - other.set_value(-other.value()); - } - return; - } - uint_type significand = getSignificandBits(); - bool carried = false; - typename other_T::uint_type rounded_significand = - getRoundedNormalizedSignificand(round_dir, &carried); - - int_type exponent = getUnbiasedExponent(); - if (exponent == min_exponent) { - // If we are denormal, normalize the exponent, so that we can encode - // easily. - exponent = static_cast(exponent + 1); - for (uint_type check_bit = first_exponent_bit >> 1; check_bit != 0; - check_bit = static_cast(check_bit >> 1)) { - exponent = static_cast(exponent - 1); - if (check_bit & significand) break; - } - } - - bool is_nan = - (getBits() & exponent_mask) == exponent_mask && significand != 0; - bool is_inf = - !is_nan && - ((exponent + carried) > static_cast(other_T::exponent_bias) || - (significand == 0 && (getBits() & exponent_mask) == exponent_mask)); - - // If we are Nan or Inf we should pass that through. - if (is_inf) { - other.set_value(BitwiseCast( - static_cast( - (negate ? other_T::sign_mask : 0) | other_T::exponent_mask))); - return; - } - if (is_nan) { - typename other_T::uint_type shifted_significand; - shifted_significand = static_cast( - negatable_left_shift( - static_cast(other_T::num_fraction_bits) - - static_cast(num_fraction_bits), significand)); - - // We are some sort of Nan. We try to keep the bit-pattern of the Nan - // as close as possible. If we had to shift off bits so we are 0, then we - // just set the last bit. - other.set_value(BitwiseCast( - static_cast( - (negate ? other_T::sign_mask : 0) | other_T::exponent_mask | - (shifted_significand == 0 ? 0x1 : shifted_significand)))); - return; - } - - bool round_underflow_up = - isNegative() ? round_dir == kRoundToNegativeInfinity - : round_dir == kRoundToPositiveInfinity; - typedef typename other_T::int_type other_int_type; - // setFromSignUnbiasedExponentAndNormalizedSignificand will - // zero out any underflowing value (but retain the sign). - other.setFromSignUnbiasedExponentAndNormalizedSignificand( - negate, static_cast(exponent), rounded_significand, - round_underflow_up); - return; - } - - private: - T value_; - - static_assert(num_used_bits == - Traits::num_exponent_bits + Traits::num_fraction_bits + 1, - "The number of bits do not fit"); - static_assert(sizeof(T) == sizeof(uint_type), "The type sizes do not match"); -}; - -// Returns 4 bits represented by the hex character. -inline uint8_t get_nibble_from_character(int character) { - const char* dec = "0123456789"; - const char* lower = "abcdef"; - const char* upper = "ABCDEF"; - const char* p = nullptr; - if ((p = strchr(dec, character))) { - return static_cast(p - dec); - } else if ((p = strchr(lower, character))) { - return static_cast(p - lower + 0xa); - } else if ((p = strchr(upper, character))) { - return static_cast(p - upper + 0xa); - } - - assert(false && "This was called with a non-hex character"); - return 0; -} - -// Outputs the given HexFloat to the stream. -template -std::ostream& operator<<(std::ostream& os, const HexFloat& value) { - typedef HexFloat HF; - typedef typename HF::uint_type uint_type; - typedef typename HF::int_type int_type; - - static_assert(HF::num_used_bits != 0, - "num_used_bits must be non-zero for a valid float"); - static_assert(HF::num_exponent_bits != 0, - "num_exponent_bits must be non-zero for a valid float"); - static_assert(HF::num_fraction_bits != 0, - "num_fractin_bits must be non-zero for a valid float"); - - const uint_type bits = spvutils::BitwiseCast(value.value()); - const char* const sign = (bits & HF::sign_mask) ? "-" : ""; - const uint_type exponent = static_cast( - (bits & HF::exponent_mask) >> HF::num_fraction_bits); - - uint_type fraction = static_cast((bits & HF::fraction_encode_mask) - << HF::num_overflow_bits); - - const bool is_zero = exponent == 0 && fraction == 0; - const bool is_denorm = exponent == 0 && !is_zero; - - // exponent contains the biased exponent we have to convert it back into - // the normal range. - int_type int_exponent = static_cast(exponent - HF::exponent_bias); - // If the number is all zeros, then we actually have to NOT shift the - // exponent. - int_exponent = is_zero ? 0 : int_exponent; - - // If we are denorm, then start shifting, and decreasing the exponent until - // our leading bit is 1. - - if (is_denorm) { - while ((fraction & HF::fraction_top_bit) == 0) { - fraction = static_cast(fraction << 1); - int_exponent = static_cast(int_exponent - 1); - } - // Since this is denormalized, we have to consume the leading 1 since it - // will end up being implicit. - fraction = static_cast(fraction << 1); // eat the leading 1 - fraction &= HF::fraction_represent_mask; - } - - uint_type fraction_nibbles = HF::fraction_nibbles; - // We do not have to display any trailing 0s, since this represents the - // fractional part. - while (fraction_nibbles > 0 && (fraction & 0xF) == 0) { - // Shift off any trailing values; - fraction = static_cast(fraction >> 4); - --fraction_nibbles; - } - - const auto saved_flags = os.flags(); - const auto saved_fill = os.fill(); - - os << sign << "0x" << (is_zero ? '0' : '1'); - if (fraction_nibbles) { - // Make sure to keep the leading 0s in place, since this is the fractional - // part. - os << "." << std::setw(static_cast(fraction_nibbles)) - << std::setfill('0') << std::hex << fraction; - } - os << "p" << std::dec << (int_exponent >= 0 ? "+" : "") << int_exponent; - - os.flags(saved_flags); - os.fill(saved_fill); - - return os; -} - -// Returns true if negate_value is true and the next character on the -// input stream is a plus or minus sign. In that case we also set the fail bit -// on the stream and set the value to the zero value for its type. -template -inline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value, - HexFloat& value) { - if (negate_value) { - auto next_char = is.peek(); - if (next_char == '-' || next_char == '+') { - // Fail the parse. Emulate standard behaviour by setting the value to - // the zero value, and set the fail bit on the stream. - value = HexFloat(typename HexFloat::uint_type(0)); - is.setstate(std::ios_base::failbit); - return true; - } - } - return false; -} - -// Parses a floating point number from the given stream and stores it into the -// value parameter. -// If negate_value is true then the number may not have a leading minus or -// plus, and if it successfully parses, then the number is negated before -// being stored into the value parameter. -// If the value cannot be correctly parsed or overflows the target floating -// point type, then set the fail bit on the stream. -// TODO(dneto): Promise C++11 standard behavior in how the value is set in -// the error case, but only after all target platforms implement it correctly. -// In particular, the Microsoft C++ runtime appears to be out of spec. -template -inline std::istream& ParseNormalFloat(std::istream& is, bool negate_value, - HexFloat& value) { - if (RejectParseDueToLeadingSign(is, negate_value, value)) { - return is; - } - T val; - is >> val; - if (negate_value) { - val = -val; - } - value.set_value(val); - // In the failure case, map -0.0 to 0.0. - if (is.fail() && value.getUnsignedBits() == 0u) { - value = HexFloat(typename HexFloat::uint_type(0)); - } - if (val.isInfinity()) { - // Fail the parse. Emulate standard behaviour by setting the value to - // the closest normal value, and set the fail bit on the stream. - value.set_value((value.isNegative() | negate_value) ? T::lowest() - : T::max()); - is.setstate(std::ios_base::failbit); - } - return is; -} - -// Specialization of ParseNormalFloat for FloatProxy values. -// This will parse the float as it were a 32-bit floating point number, -// and then round it down to fit into a Float16 value. -// The number is rounded towards zero. -// If negate_value is true then the number may not have a leading minus or -// plus, and if it successfully parses, then the number is negated before -// being stored into the value parameter. -// If the value cannot be correctly parsed or overflows the target floating -// point type, then set the fail bit on the stream. -// TODO(dneto): Promise C++11 standard behavior in how the value is set in -// the error case, but only after all target platforms implement it correctly. -// In particular, the Microsoft C++ runtime appears to be out of spec. -template <> -inline std::istream& -ParseNormalFloat, HexFloatTraits>>( - std::istream& is, bool negate_value, - HexFloat, HexFloatTraits>>& value) { - // First parse as a 32-bit float. - HexFloat> float_val(0.0f); - ParseNormalFloat(is, negate_value, float_val); - - // Then convert to 16-bit float, saturating at infinities, and - // rounding toward zero. - float_val.castTo(value, kRoundToZero); - - // Overflow on 16-bit behaves the same as for 32- and 64-bit: set the - // fail bit and set the lowest or highest value. - if (Float16::isInfinity(value.value().getAsFloat())) { - value.set_value(value.isNegative() ? Float16::lowest() : Float16::max()); - is.setstate(std::ios_base::failbit); - } - return is; -} - -// Reads a HexFloat from the given stream. -// If the float is not encoded as a hex-float then it will be parsed -// as a regular float. -// This may fail if your stream does not support at least one unget. -// Nan values can be encoded with "0x1.p+exponent_bias". -// This would normally overflow a float and round to -// infinity but this special pattern is the exact representation for a NaN, -// and therefore is actually encoded as the correct NaN. To encode inf, -// either 0x0p+exponent_bias can be specified or any exponent greater than -// exponent_bias. -// Examples using IEEE 32-bit float encoding. -// 0x1.0p+128 (+inf) -// -0x1.0p-128 (-inf) -// -// 0x1.1p+128 (+Nan) -// -0x1.1p+128 (-Nan) -// -// 0x1p+129 (+inf) -// -0x1p+129 (-inf) -template -std::istream& operator>>(std::istream& is, HexFloat& value) { - using HF = HexFloat; - using uint_type = typename HF::uint_type; - using int_type = typename HF::int_type; - - value.set_value(static_cast(0.f)); - - if (is.flags() & std::ios::skipws) { - // If the user wants to skip whitespace , then we should obey that. - while (std::isspace(is.peek())) { - is.get(); - } - } - - auto next_char = is.peek(); - bool negate_value = false; - - if (next_char != '-' && next_char != '0') { - return ParseNormalFloat(is, negate_value, value); - } - - if (next_char == '-') { - negate_value = true; - is.get(); - next_char = is.peek(); - } - - if (next_char == '0') { - is.get(); // We may have to unget this. - auto maybe_hex_start = is.peek(); - if (maybe_hex_start != 'x' && maybe_hex_start != 'X') { - is.unget(); - return ParseNormalFloat(is, negate_value, value); - } else { - is.get(); // Throw away the 'x'; - } - } else { - return ParseNormalFloat(is, negate_value, value); - } - - // This "looks" like a hex-float so treat it as one. - bool seen_p = false; - bool seen_dot = false; - uint_type fraction_index = 0; - - uint_type fraction = 0; - int_type exponent = HF::exponent_bias; - - // Strip off leading zeros so we don't have to special-case them later. - while ((next_char = is.peek()) == '0') { - is.get(); - } - - bool is_denorm = - true; // Assume denorm "representation" until we hear otherwise. - // NB: This does not mean the value is actually denorm, - // it just means that it was written 0. - bool bits_written = false; // Stays false until we write a bit. - while (!seen_p && !seen_dot) { - // Handle characters that are left of the fractional part. - if (next_char == '.') { - seen_dot = true; - } else if (next_char == 'p') { - seen_p = true; - } else if (::isxdigit(next_char)) { - // We know this is not denormalized since we have stripped all leading - // zeroes and we are not a ".". - is_denorm = false; - int number = get_nibble_from_character(next_char); - for (int i = 0; i < 4; ++i, number <<= 1) { - uint_type write_bit = (number & 0x8) ? 0x1 : 0x0; - if (bits_written) { - // If we are here the bits represented belong in the fractional - // part of the float, and we have to adjust the exponent accordingly. - fraction = static_cast( - fraction | - static_cast( - write_bit << (HF::top_bit_left_shift - fraction_index++))); - exponent = static_cast(exponent + 1); - } - bits_written |= write_bit != 0; - } - } else { - // We have not found our exponent yet, so we have to fail. - is.setstate(std::ios::failbit); - return is; - } - is.get(); - next_char = is.peek(); - } - bits_written = false; - while (seen_dot && !seen_p) { - // Handle only fractional parts now. - if (next_char == 'p') { - seen_p = true; - } else if (::isxdigit(next_char)) { - int number = get_nibble_from_character(next_char); - for (int i = 0; i < 4; ++i, number <<= 1) { - uint_type write_bit = (number & 0x8) ? 0x01 : 0x00; - bits_written |= write_bit != 0; - if (is_denorm && !bits_written) { - // Handle modifying the exponent here this way we can handle - // an arbitrary number of hex values without overflowing our - // integer. - exponent = static_cast(exponent - 1); - } else { - fraction = static_cast( - fraction | - static_cast( - write_bit << (HF::top_bit_left_shift - fraction_index++))); - } - } - } else { - // We still have not found our 'p' exponent yet, so this is not a valid - // hex-float. - is.setstate(std::ios::failbit); - return is; - } - is.get(); - next_char = is.peek(); - } - - bool seen_sign = false; - int8_t exponent_sign = 1; - int_type written_exponent = 0; - while (true) { - if ((next_char == '-' || next_char == '+')) { - if (seen_sign) { - is.setstate(std::ios::failbit); - return is; - } - seen_sign = true; - exponent_sign = (next_char == '-') ? -1 : 1; - } else if (::isdigit(next_char)) { - // Hex-floats express their exponent as decimal. - written_exponent = static_cast(written_exponent * 10); - written_exponent = - static_cast(written_exponent + (next_char - '0')); - } else { - break; - } - is.get(); - next_char = is.peek(); - } - - written_exponent = static_cast(written_exponent * exponent_sign); - exponent = static_cast(exponent + written_exponent); - - bool is_zero = is_denorm && (fraction == 0); - if (is_denorm && !is_zero) { - fraction = static_cast(fraction << 1); - exponent = static_cast(exponent - 1); - } else if (is_zero) { - exponent = 0; - } - - if (exponent <= 0 && !is_zero) { - fraction = static_cast(fraction >> 1); - fraction |= static_cast(1) << HF::top_bit_left_shift; - } - - fraction = (fraction >> HF::fraction_right_shift) & HF::fraction_encode_mask; - - const int_type max_exponent = - SetBits::get; - - // Handle actual denorm numbers - while (exponent < 0 && !is_zero) { - fraction = static_cast(fraction >> 1); - exponent = static_cast(exponent + 1); - - fraction &= HF::fraction_encode_mask; - if (fraction == 0) { - // We have underflowed our fraction. We should clamp to zero. - is_zero = true; - exponent = 0; - } - } - - // We have overflowed so we should be inf/-inf. - if (exponent > max_exponent) { - exponent = max_exponent; - fraction = 0; - } - - uint_type output_bits = static_cast( - static_cast(negate_value ? 1 : 0) << HF::top_bit_left_shift); - output_bits |= fraction; - - uint_type shifted_exponent = static_cast( - static_cast(exponent << HF::exponent_left_shift) & - HF::exponent_mask); - output_bits |= shifted_exponent; - - T output_float = spvutils::BitwiseCast(output_bits); - value.set_value(output_float); - - return is; -} - -// Writes a FloatProxy value to a stream. -// Zero and normal numbers are printed in the usual notation, but with -// enough digits to fully reproduce the value. Other values (subnormal, -// NaN, and infinity) are printed as a hex float. -template -std::ostream& operator<<(std::ostream& os, const FloatProxy& value) { - auto float_val = value.getAsFloat(); - switch (std::fpclassify(float_val)) { - case FP_ZERO: - case FP_NORMAL: { - auto saved_precision = os.precision(); - os.precision(std::numeric_limits::digits10); - os << float_val; - os.precision(saved_precision); - } break; - default: - os << HexFloat>(value); - break; - } - return os; -} - -template <> -inline std::ostream& operator<<(std::ostream& os, - const FloatProxy& value) { - os << HexFloat>(value); - return os; -} -} - -#endif // LIBSPIRV_UTIL_HEX_FLOAT_H_ diff --git a/third_party/glslang-spirv/spirv.hpp b/third_party/glslang-spirv/spirv.hpp deleted file mode 100644 index c6776638e..000000000 --- a/third_party/glslang-spirv/spirv.hpp +++ /dev/null @@ -1,1028 +0,0 @@ -// Copyright (c) 2014-2018 The Khronos Group Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and/or associated documentation files (the "Materials"), -// to deal in the Materials without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Materials, and to permit persons to whom the -// Materials are furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Materials. -// -// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -// -// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -// IN THE MATERIALS. - -// This header is automatically generated by the same tool that creates -// the Binary Section of the SPIR-V specification. - -// Enumeration tokens for SPIR-V, in various styles: -// C, C++, C++11, JSON, Lua, Python -// -// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -// -// Some tokens act like mask values, which can be OR'd together, -// while others are mutually exclusive. The mask-like ones have -// "Mask" in their name, and a parallel enum that has the shift -// amount (1 << x) for each corresponding enumerant. - -#ifndef spirv_HPP -#define spirv_HPP - -namespace spv { - -typedef unsigned int Id; - -#define SPV_VERSION 0x10200 -#define SPV_REVISION 3 - -static const unsigned int MagicNumber = 0x07230203; -static const unsigned int Version = 0x00010200; -static const unsigned int Revision = 3; -static const unsigned int OpCodeMask = 0xffff; -static const unsigned int WordCountShift = 16; - -enum SourceLanguage { - SourceLanguageUnknown = 0, - SourceLanguageESSL = 1, - SourceLanguageGLSL = 2, - SourceLanguageOpenCL_C = 3, - SourceLanguageOpenCL_CPP = 4, - SourceLanguageHLSL = 5, - SourceLanguageMax = 0x7fffffff, -}; - -enum ExecutionModel { - ExecutionModelVertex = 0, - ExecutionModelTessellationControl = 1, - ExecutionModelTessellationEvaluation = 2, - ExecutionModelGeometry = 3, - ExecutionModelFragment = 4, - ExecutionModelGLCompute = 5, - ExecutionModelKernel = 6, - ExecutionModelMax = 0x7fffffff, -}; - -enum AddressingModel { - AddressingModelLogical = 0, - AddressingModelPhysical32 = 1, - AddressingModelPhysical64 = 2, - AddressingModelMax = 0x7fffffff, -}; - -enum MemoryModel { - MemoryModelSimple = 0, - MemoryModelGLSL450 = 1, - MemoryModelOpenCL = 2, - MemoryModelMax = 0x7fffffff, -}; - -enum ExecutionMode { - ExecutionModeInvocations = 0, - ExecutionModeSpacingEqual = 1, - ExecutionModeSpacingFractionalEven = 2, - ExecutionModeSpacingFractionalOdd = 3, - ExecutionModeVertexOrderCw = 4, - ExecutionModeVertexOrderCcw = 5, - ExecutionModePixelCenterInteger = 6, - ExecutionModeOriginUpperLeft = 7, - ExecutionModeOriginLowerLeft = 8, - ExecutionModeEarlyFragmentTests = 9, - ExecutionModePointMode = 10, - ExecutionModeXfb = 11, - ExecutionModeDepthReplacing = 12, - ExecutionModeDepthGreater = 14, - ExecutionModeDepthLess = 15, - ExecutionModeDepthUnchanged = 16, - ExecutionModeLocalSize = 17, - ExecutionModeLocalSizeHint = 18, - ExecutionModeInputPoints = 19, - ExecutionModeInputLines = 20, - ExecutionModeInputLinesAdjacency = 21, - ExecutionModeTriangles = 22, - ExecutionModeInputTrianglesAdjacency = 23, - ExecutionModeQuads = 24, - ExecutionModeIsolines = 25, - ExecutionModeOutputVertices = 26, - ExecutionModeOutputPoints = 27, - ExecutionModeOutputLineStrip = 28, - ExecutionModeOutputTriangleStrip = 29, - ExecutionModeVecTypeHint = 30, - ExecutionModeContractionOff = 31, - ExecutionModeInitializer = 33, - ExecutionModeFinalizer = 34, - ExecutionModeSubgroupSize = 35, - ExecutionModeSubgroupsPerWorkgroup = 36, - ExecutionModeSubgroupsPerWorkgroupId = 37, - ExecutionModeLocalSizeId = 38, - ExecutionModeLocalSizeHintId = 39, - ExecutionModePostDepthCoverage = 4446, - ExecutionModeStencilRefReplacingEXT = 5027, - ExecutionModeMax = 0x7fffffff, -}; - -enum StorageClass { - StorageClassUniformConstant = 0, - StorageClassInput = 1, - StorageClassUniform = 2, - StorageClassOutput = 3, - StorageClassWorkgroup = 4, - StorageClassCrossWorkgroup = 5, - StorageClassPrivate = 6, - StorageClassFunction = 7, - StorageClassGeneric = 8, - StorageClassPushConstant = 9, - StorageClassAtomicCounter = 10, - StorageClassImage = 11, - StorageClassStorageBuffer = 12, - StorageClassMax = 0x7fffffff, -}; - -enum Dim { - Dim1D = 0, - Dim2D = 1, - Dim3D = 2, - DimCube = 3, - DimRect = 4, - DimBuffer = 5, - DimSubpassData = 6, - DimMax = 0x7fffffff, -}; - -enum SamplerAddressingMode { - SamplerAddressingModeNone = 0, - SamplerAddressingModeClampToEdge = 1, - SamplerAddressingModeClamp = 2, - SamplerAddressingModeRepeat = 3, - SamplerAddressingModeRepeatMirrored = 4, - SamplerAddressingModeMax = 0x7fffffff, -}; - -enum SamplerFilterMode { - SamplerFilterModeNearest = 0, - SamplerFilterModeLinear = 1, - SamplerFilterModeMax = 0x7fffffff, -}; - -enum ImageFormat { - ImageFormatUnknown = 0, - ImageFormatRgba32f = 1, - ImageFormatRgba16f = 2, - ImageFormatR32f = 3, - ImageFormatRgba8 = 4, - ImageFormatRgba8Snorm = 5, - ImageFormatRg32f = 6, - ImageFormatRg16f = 7, - ImageFormatR11fG11fB10f = 8, - ImageFormatR16f = 9, - ImageFormatRgba16 = 10, - ImageFormatRgb10A2 = 11, - ImageFormatRg16 = 12, - ImageFormatRg8 = 13, - ImageFormatR16 = 14, - ImageFormatR8 = 15, - ImageFormatRgba16Snorm = 16, - ImageFormatRg16Snorm = 17, - ImageFormatRg8Snorm = 18, - ImageFormatR16Snorm = 19, - ImageFormatR8Snorm = 20, - ImageFormatRgba32i = 21, - ImageFormatRgba16i = 22, - ImageFormatRgba8i = 23, - ImageFormatR32i = 24, - ImageFormatRg32i = 25, - ImageFormatRg16i = 26, - ImageFormatRg8i = 27, - ImageFormatR16i = 28, - ImageFormatR8i = 29, - ImageFormatRgba32ui = 30, - ImageFormatRgba16ui = 31, - ImageFormatRgba8ui = 32, - ImageFormatR32ui = 33, - ImageFormatRgb10a2ui = 34, - ImageFormatRg32ui = 35, - ImageFormatRg16ui = 36, - ImageFormatRg8ui = 37, - ImageFormatR16ui = 38, - ImageFormatR8ui = 39, - ImageFormatMax = 0x7fffffff, -}; - -enum ImageChannelOrder { - ImageChannelOrderR = 0, - ImageChannelOrderA = 1, - ImageChannelOrderRG = 2, - ImageChannelOrderRA = 3, - ImageChannelOrderRGB = 4, - ImageChannelOrderRGBA = 5, - ImageChannelOrderBGRA = 6, - ImageChannelOrderARGB = 7, - ImageChannelOrderIntensity = 8, - ImageChannelOrderLuminance = 9, - ImageChannelOrderRx = 10, - ImageChannelOrderRGx = 11, - ImageChannelOrderRGBx = 12, - ImageChannelOrderDepth = 13, - ImageChannelOrderDepthStencil = 14, - ImageChannelOrdersRGB = 15, - ImageChannelOrdersRGBx = 16, - ImageChannelOrdersRGBA = 17, - ImageChannelOrdersBGRA = 18, - ImageChannelOrderABGR = 19, - ImageChannelOrderMax = 0x7fffffff, -}; - -enum ImageChannelDataType { - ImageChannelDataTypeSnormInt8 = 0, - ImageChannelDataTypeSnormInt16 = 1, - ImageChannelDataTypeUnormInt8 = 2, - ImageChannelDataTypeUnormInt16 = 3, - ImageChannelDataTypeUnormShort565 = 4, - ImageChannelDataTypeUnormShort555 = 5, - ImageChannelDataTypeUnormInt101010 = 6, - ImageChannelDataTypeSignedInt8 = 7, - ImageChannelDataTypeSignedInt16 = 8, - ImageChannelDataTypeSignedInt32 = 9, - ImageChannelDataTypeUnsignedInt8 = 10, - ImageChannelDataTypeUnsignedInt16 = 11, - ImageChannelDataTypeUnsignedInt32 = 12, - ImageChannelDataTypeHalfFloat = 13, - ImageChannelDataTypeFloat = 14, - ImageChannelDataTypeUnormInt24 = 15, - ImageChannelDataTypeUnormInt101010_2 = 16, - ImageChannelDataTypeMax = 0x7fffffff, -}; - -enum ImageOperandsShift { - ImageOperandsBiasShift = 0, - ImageOperandsLodShift = 1, - ImageOperandsGradShift = 2, - ImageOperandsConstOffsetShift = 3, - ImageOperandsOffsetShift = 4, - ImageOperandsConstOffsetsShift = 5, - ImageOperandsSampleShift = 6, - ImageOperandsMinLodShift = 7, - ImageOperandsMax = 0x7fffffff, -}; - -enum ImageOperandsMask { - ImageOperandsMaskNone = 0, - ImageOperandsBiasMask = 0x00000001, - ImageOperandsLodMask = 0x00000002, - ImageOperandsGradMask = 0x00000004, - ImageOperandsConstOffsetMask = 0x00000008, - ImageOperandsOffsetMask = 0x00000010, - ImageOperandsConstOffsetsMask = 0x00000020, - ImageOperandsSampleMask = 0x00000040, - ImageOperandsMinLodMask = 0x00000080, -}; - -enum FPFastMathModeShift { - FPFastMathModeNotNaNShift = 0, - FPFastMathModeNotInfShift = 1, - FPFastMathModeNSZShift = 2, - FPFastMathModeAllowRecipShift = 3, - FPFastMathModeFastShift = 4, - FPFastMathModeMax = 0x7fffffff, -}; - -enum FPFastMathModeMask { - FPFastMathModeMaskNone = 0, - FPFastMathModeNotNaNMask = 0x00000001, - FPFastMathModeNotInfMask = 0x00000002, - FPFastMathModeNSZMask = 0x00000004, - FPFastMathModeAllowRecipMask = 0x00000008, - FPFastMathModeFastMask = 0x00000010, -}; - -enum FPRoundingMode { - FPRoundingModeRTE = 0, - FPRoundingModeRTZ = 1, - FPRoundingModeRTP = 2, - FPRoundingModeRTN = 3, - FPRoundingModeMax = 0x7fffffff, -}; - -enum LinkageType { - LinkageTypeExport = 0, - LinkageTypeImport = 1, - LinkageTypeMax = 0x7fffffff, -}; - -enum AccessQualifier { - AccessQualifierReadOnly = 0, - AccessQualifierWriteOnly = 1, - AccessQualifierReadWrite = 2, - AccessQualifierMax = 0x7fffffff, -}; - -enum FunctionParameterAttribute { - FunctionParameterAttributeZext = 0, - FunctionParameterAttributeSext = 1, - FunctionParameterAttributeByVal = 2, - FunctionParameterAttributeSret = 3, - FunctionParameterAttributeNoAlias = 4, - FunctionParameterAttributeNoCapture = 5, - FunctionParameterAttributeNoWrite = 6, - FunctionParameterAttributeNoReadWrite = 7, - FunctionParameterAttributeMax = 0x7fffffff, -}; - -enum Decoration { - DecorationRelaxedPrecision = 0, - DecorationSpecId = 1, - DecorationBlock = 2, - DecorationBufferBlock = 3, - DecorationRowMajor = 4, - DecorationColMajor = 5, - DecorationArrayStride = 6, - DecorationMatrixStride = 7, - DecorationGLSLShared = 8, - DecorationGLSLPacked = 9, - DecorationCPacked = 10, - DecorationBuiltIn = 11, - DecorationNoPerspective = 13, - DecorationFlat = 14, - DecorationPatch = 15, - DecorationCentroid = 16, - DecorationSample = 17, - DecorationInvariant = 18, - DecorationRestrict = 19, - DecorationAliased = 20, - DecorationVolatile = 21, - DecorationConstant = 22, - DecorationCoherent = 23, - DecorationNonWritable = 24, - DecorationNonReadable = 25, - DecorationUniform = 26, - DecorationSaturatedConversion = 28, - DecorationStream = 29, - DecorationLocation = 30, - DecorationComponent = 31, - DecorationIndex = 32, - DecorationBinding = 33, - DecorationDescriptorSet = 34, - DecorationOffset = 35, - DecorationXfbBuffer = 36, - DecorationXfbStride = 37, - DecorationFuncParamAttr = 38, - DecorationFPRoundingMode = 39, - DecorationFPFastMathMode = 40, - DecorationLinkageAttributes = 41, - DecorationNoContraction = 42, - DecorationInputAttachmentIndex = 43, - DecorationAlignment = 44, - DecorationMaxByteOffset = 45, - DecorationAlignmentId = 46, - DecorationMaxByteOffsetId = 47, - DecorationExplicitInterpAMD = 4999, - DecorationOverrideCoverageNV = 5248, - DecorationPassthroughNV = 5250, - DecorationViewportRelativeNV = 5252, - DecorationSecondaryViewportRelativeNV = 5256, - DecorationMax = 0x7fffffff, -}; - -enum BuiltIn { - BuiltInPosition = 0, - BuiltInPointSize = 1, - BuiltInClipDistance = 3, - BuiltInCullDistance = 4, - BuiltInVertexId = 5, - BuiltInInstanceId = 6, - BuiltInPrimitiveId = 7, - BuiltInInvocationId = 8, - BuiltInLayer = 9, - BuiltInViewportIndex = 10, - BuiltInTessLevelOuter = 11, - BuiltInTessLevelInner = 12, - BuiltInTessCoord = 13, - BuiltInPatchVertices = 14, - BuiltInFragCoord = 15, - BuiltInPointCoord = 16, - BuiltInFrontFacing = 17, - BuiltInSampleId = 18, - BuiltInSamplePosition = 19, - BuiltInSampleMask = 20, - BuiltInFragDepth = 22, - BuiltInHelperInvocation = 23, - BuiltInNumWorkgroups = 24, - BuiltInWorkgroupSize = 25, - BuiltInWorkgroupId = 26, - BuiltInLocalInvocationId = 27, - BuiltInGlobalInvocationId = 28, - BuiltInLocalInvocationIndex = 29, - BuiltInWorkDim = 30, - BuiltInGlobalSize = 31, - BuiltInEnqueuedWorkgroupSize = 32, - BuiltInGlobalOffset = 33, - BuiltInGlobalLinearId = 34, - BuiltInSubgroupSize = 36, - BuiltInSubgroupMaxSize = 37, - BuiltInNumSubgroups = 38, - BuiltInNumEnqueuedSubgroups = 39, - BuiltInSubgroupId = 40, - BuiltInSubgroupLocalInvocationId = 41, - BuiltInVertexIndex = 42, - BuiltInInstanceIndex = 43, - BuiltInSubgroupEqMaskKHR = 4416, - BuiltInSubgroupGeMaskKHR = 4417, - BuiltInSubgroupGtMaskKHR = 4418, - BuiltInSubgroupLeMaskKHR = 4419, - BuiltInSubgroupLtMaskKHR = 4420, - BuiltInBaseVertex = 4424, - BuiltInBaseInstance = 4425, - BuiltInDrawIndex = 4426, - BuiltInDeviceIndex = 4438, - BuiltInViewIndex = 4440, - BuiltInBaryCoordNoPerspAMD = 4992, - BuiltInBaryCoordNoPerspCentroidAMD = 4993, - BuiltInBaryCoordNoPerspSampleAMD = 4994, - BuiltInBaryCoordSmoothAMD = 4995, - BuiltInBaryCoordSmoothCentroidAMD = 4996, - BuiltInBaryCoordSmoothSampleAMD = 4997, - BuiltInBaryCoordPullModelAMD = 4998, - BuiltInFragStencilRefEXT = 5014, - BuiltInViewportMaskNV = 5253, - BuiltInSecondaryPositionNV = 5257, - BuiltInSecondaryViewportMaskNV = 5258, - BuiltInPositionPerViewNV = 5261, - BuiltInViewportMaskPerViewNV = 5262, - BuiltInFullyCoveredEXT = 5264, - BuiltInMax = 0x7fffffff, -}; - -enum SelectionControlShift { - SelectionControlFlattenShift = 0, - SelectionControlDontFlattenShift = 1, - SelectionControlMax = 0x7fffffff, -}; - -enum SelectionControlMask { - SelectionControlMaskNone = 0, - SelectionControlFlattenMask = 0x00000001, - SelectionControlDontFlattenMask = 0x00000002, -}; - -enum LoopControlShift { - LoopControlUnrollShift = 0, - LoopControlDontUnrollShift = 1, - LoopControlDependencyInfiniteShift = 2, - LoopControlDependencyLengthShift = 3, - LoopControlMax = 0x7fffffff, -}; - -enum LoopControlMask { - LoopControlMaskNone = 0, - LoopControlUnrollMask = 0x00000001, - LoopControlDontUnrollMask = 0x00000002, - LoopControlDependencyInfiniteMask = 0x00000004, - LoopControlDependencyLengthMask = 0x00000008, -}; - -enum FunctionControlShift { - FunctionControlInlineShift = 0, - FunctionControlDontInlineShift = 1, - FunctionControlPureShift = 2, - FunctionControlConstShift = 3, - FunctionControlMax = 0x7fffffff, -}; - -enum FunctionControlMask { - FunctionControlMaskNone = 0, - FunctionControlInlineMask = 0x00000001, - FunctionControlDontInlineMask = 0x00000002, - FunctionControlPureMask = 0x00000004, - FunctionControlConstMask = 0x00000008, -}; - -enum MemorySemanticsShift { - MemorySemanticsAcquireShift = 1, - MemorySemanticsReleaseShift = 2, - MemorySemanticsAcquireReleaseShift = 3, - MemorySemanticsSequentiallyConsistentShift = 4, - MemorySemanticsUniformMemoryShift = 6, - MemorySemanticsSubgroupMemoryShift = 7, - MemorySemanticsWorkgroupMemoryShift = 8, - MemorySemanticsCrossWorkgroupMemoryShift = 9, - MemorySemanticsAtomicCounterMemoryShift = 10, - MemorySemanticsImageMemoryShift = 11, - MemorySemanticsMax = 0x7fffffff, -}; - -enum MemorySemanticsMask { - MemorySemanticsMaskNone = 0, - MemorySemanticsAcquireMask = 0x00000002, - MemorySemanticsReleaseMask = 0x00000004, - MemorySemanticsAcquireReleaseMask = 0x00000008, - MemorySemanticsSequentiallyConsistentMask = 0x00000010, - MemorySemanticsUniformMemoryMask = 0x00000040, - MemorySemanticsSubgroupMemoryMask = 0x00000080, - MemorySemanticsWorkgroupMemoryMask = 0x00000100, - MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, - MemorySemanticsAtomicCounterMemoryMask = 0x00000400, - MemorySemanticsImageMemoryMask = 0x00000800, -}; - -enum MemoryAccessShift { - MemoryAccessVolatileShift = 0, - MemoryAccessAlignedShift = 1, - MemoryAccessNontemporalShift = 2, - MemoryAccessMax = 0x7fffffff, -}; - -enum MemoryAccessMask { - MemoryAccessMaskNone = 0, - MemoryAccessVolatileMask = 0x00000001, - MemoryAccessAlignedMask = 0x00000002, - MemoryAccessNontemporalMask = 0x00000004, -}; - -enum Scope { - ScopeCrossDevice = 0, - ScopeDevice = 1, - ScopeWorkgroup = 2, - ScopeSubgroup = 3, - ScopeInvocation = 4, - ScopeMax = 0x7fffffff, -}; - -enum GroupOperation { - GroupOperationReduce = 0, - GroupOperationInclusiveScan = 1, - GroupOperationExclusiveScan = 2, - GroupOperationMax = 0x7fffffff, -}; - -enum KernelEnqueueFlags { - KernelEnqueueFlagsNoWait = 0, - KernelEnqueueFlagsWaitKernel = 1, - KernelEnqueueFlagsWaitWorkGroup = 2, - KernelEnqueueFlagsMax = 0x7fffffff, -}; - -enum KernelProfilingInfoShift { - KernelProfilingInfoCmdExecTimeShift = 0, - KernelProfilingInfoMax = 0x7fffffff, -}; - -enum KernelProfilingInfoMask { - KernelProfilingInfoMaskNone = 0, - KernelProfilingInfoCmdExecTimeMask = 0x00000001, -}; - -enum Capability { - CapabilityMatrix = 0, - CapabilityShader = 1, - CapabilityGeometry = 2, - CapabilityTessellation = 3, - CapabilityAddresses = 4, - CapabilityLinkage = 5, - CapabilityKernel = 6, - CapabilityVector16 = 7, - CapabilityFloat16Buffer = 8, - CapabilityFloat16 = 9, - CapabilityFloat64 = 10, - CapabilityInt64 = 11, - CapabilityInt64Atomics = 12, - CapabilityImageBasic = 13, - CapabilityImageReadWrite = 14, - CapabilityImageMipmap = 15, - CapabilityPipes = 17, - CapabilityGroups = 18, - CapabilityDeviceEnqueue = 19, - CapabilityLiteralSampler = 20, - CapabilityAtomicStorage = 21, - CapabilityInt16 = 22, - CapabilityTessellationPointSize = 23, - CapabilityGeometryPointSize = 24, - CapabilityImageGatherExtended = 25, - CapabilityStorageImageMultisample = 27, - CapabilityUniformBufferArrayDynamicIndexing = 28, - CapabilitySampledImageArrayDynamicIndexing = 29, - CapabilityStorageBufferArrayDynamicIndexing = 30, - CapabilityStorageImageArrayDynamicIndexing = 31, - CapabilityClipDistance = 32, - CapabilityCullDistance = 33, - CapabilityImageCubeArray = 34, - CapabilitySampleRateShading = 35, - CapabilityImageRect = 36, - CapabilitySampledRect = 37, - CapabilityGenericPointer = 38, - CapabilityInt8 = 39, - CapabilityInputAttachment = 40, - CapabilitySparseResidency = 41, - CapabilityMinLod = 42, - CapabilitySampled1D = 43, - CapabilityImage1D = 44, - CapabilitySampledCubeArray = 45, - CapabilitySampledBuffer = 46, - CapabilityImageBuffer = 47, - CapabilityImageMSArray = 48, - CapabilityStorageImageExtendedFormats = 49, - CapabilityImageQuery = 50, - CapabilityDerivativeControl = 51, - CapabilityInterpolationFunction = 52, - CapabilityTransformFeedback = 53, - CapabilityGeometryStreams = 54, - CapabilityStorageImageReadWithoutFormat = 55, - CapabilityStorageImageWriteWithoutFormat = 56, - CapabilityMultiViewport = 57, - CapabilitySubgroupDispatch = 58, - CapabilityNamedBarrier = 59, - CapabilityPipeStorage = 60, - CapabilitySubgroupBallotKHR = 4423, - CapabilityDrawParameters = 4427, - CapabilitySubgroupVoteKHR = 4431, - CapabilityStorageBuffer16BitAccess = 4433, - CapabilityStorageUniformBufferBlock16 = 4433, - CapabilityStorageUniform16 = 4434, - CapabilityUniformAndStorageBuffer16BitAccess = 4434, - CapabilityStoragePushConstant16 = 4435, - CapabilityStorageInputOutput16 = 4436, - CapabilityDeviceGroup = 4437, - CapabilityMultiView = 4439, - CapabilityVariablePointersStorageBuffer = 4441, - CapabilityVariablePointers = 4442, - CapabilityAtomicStorageOps = 4445, - CapabilitySampleMaskPostDepthCoverage = 4447, - CapabilityImageGatherBiasLodAMD = 5009, - CapabilityFragmentMaskAMD = 5010, - CapabilityStencilExportEXT = 5013, - CapabilityImageReadWriteLodAMD = 5015, - CapabilitySampleMaskOverrideCoverageNV = 5249, - CapabilityGeometryShaderPassthroughNV = 5251, - CapabilityShaderViewportIndexLayerEXT = 5254, - CapabilityShaderViewportIndexLayerNV = 5254, - CapabilityShaderViewportMaskNV = 5255, - CapabilityShaderStereoViewNV = 5259, - CapabilityPerViewAttributesNV = 5260, - CapabilityFragmentFullyCoveredEXT = 5265, - CapabilitySubgroupShuffleINTEL = 5568, - CapabilitySubgroupBufferBlockIOINTEL = 5569, - CapabilitySubgroupImageBlockIOINTEL = 5570, - CapabilityMax = 0x7fffffff, -}; - -enum Op { - OpNop = 0, - OpUndef = 1, - OpSourceContinued = 2, - OpSource = 3, - OpSourceExtension = 4, - OpName = 5, - OpMemberName = 6, - OpString = 7, - OpLine = 8, - OpExtension = 10, - OpExtInstImport = 11, - OpExtInst = 12, - OpMemoryModel = 14, - OpEntryPoint = 15, - OpExecutionMode = 16, - OpCapability = 17, - OpTypeVoid = 19, - OpTypeBool = 20, - OpTypeInt = 21, - OpTypeFloat = 22, - OpTypeVector = 23, - OpTypeMatrix = 24, - OpTypeImage = 25, - OpTypeSampler = 26, - OpTypeSampledImage = 27, - OpTypeArray = 28, - OpTypeRuntimeArray = 29, - OpTypeStruct = 30, - OpTypeOpaque = 31, - OpTypePointer = 32, - OpTypeFunction = 33, - OpTypeEvent = 34, - OpTypeDeviceEvent = 35, - OpTypeReserveId = 36, - OpTypeQueue = 37, - OpTypePipe = 38, - OpTypeForwardPointer = 39, - OpConstantTrue = 41, - OpConstantFalse = 42, - OpConstant = 43, - OpConstantComposite = 44, - OpConstantSampler = 45, - OpConstantNull = 46, - OpSpecConstantTrue = 48, - OpSpecConstantFalse = 49, - OpSpecConstant = 50, - OpSpecConstantComposite = 51, - OpSpecConstantOp = 52, - OpFunction = 54, - OpFunctionParameter = 55, - OpFunctionEnd = 56, - OpFunctionCall = 57, - OpVariable = 59, - OpImageTexelPointer = 60, - OpLoad = 61, - OpStore = 62, - OpCopyMemory = 63, - OpCopyMemorySized = 64, - OpAccessChain = 65, - OpInBoundsAccessChain = 66, - OpPtrAccessChain = 67, - OpArrayLength = 68, - OpGenericPtrMemSemantics = 69, - OpInBoundsPtrAccessChain = 70, - OpDecorate = 71, - OpMemberDecorate = 72, - OpDecorationGroup = 73, - OpGroupDecorate = 74, - OpGroupMemberDecorate = 75, - OpVectorExtractDynamic = 77, - OpVectorInsertDynamic = 78, - OpVectorShuffle = 79, - OpCompositeConstruct = 80, - OpCompositeExtract = 81, - OpCompositeInsert = 82, - OpCopyObject = 83, - OpTranspose = 84, - OpSampledImage = 86, - OpImageSampleImplicitLod = 87, - OpImageSampleExplicitLod = 88, - OpImageSampleDrefImplicitLod = 89, - OpImageSampleDrefExplicitLod = 90, - OpImageSampleProjImplicitLod = 91, - OpImageSampleProjExplicitLod = 92, - OpImageSampleProjDrefImplicitLod = 93, - OpImageSampleProjDrefExplicitLod = 94, - OpImageFetch = 95, - OpImageGather = 96, - OpImageDrefGather = 97, - OpImageRead = 98, - OpImageWrite = 99, - OpImage = 100, - OpImageQueryFormat = 101, - OpImageQueryOrder = 102, - OpImageQuerySizeLod = 103, - OpImageQuerySize = 104, - OpImageQueryLod = 105, - OpImageQueryLevels = 106, - OpImageQuerySamples = 107, - OpConvertFToU = 109, - OpConvertFToS = 110, - OpConvertSToF = 111, - OpConvertUToF = 112, - OpUConvert = 113, - OpSConvert = 114, - OpFConvert = 115, - OpQuantizeToF16 = 116, - OpConvertPtrToU = 117, - OpSatConvertSToU = 118, - OpSatConvertUToS = 119, - OpConvertUToPtr = 120, - OpPtrCastToGeneric = 121, - OpGenericCastToPtr = 122, - OpGenericCastToPtrExplicit = 123, - OpBitcast = 124, - OpSNegate = 126, - OpFNegate = 127, - OpIAdd = 128, - OpFAdd = 129, - OpISub = 130, - OpFSub = 131, - OpIMul = 132, - OpFMul = 133, - OpUDiv = 134, - OpSDiv = 135, - OpFDiv = 136, - OpUMod = 137, - OpSRem = 138, - OpSMod = 139, - OpFRem = 140, - OpFMod = 141, - OpVectorTimesScalar = 142, - OpMatrixTimesScalar = 143, - OpVectorTimesMatrix = 144, - OpMatrixTimesVector = 145, - OpMatrixTimesMatrix = 146, - OpOuterProduct = 147, - OpDot = 148, - OpIAddCarry = 149, - OpISubBorrow = 150, - OpUMulExtended = 151, - OpSMulExtended = 152, - OpAny = 154, - OpAll = 155, - OpIsNan = 156, - OpIsInf = 157, - OpIsFinite = 158, - OpIsNormal = 159, - OpSignBitSet = 160, - OpLessOrGreater = 161, - OpOrdered = 162, - OpUnordered = 163, - OpLogicalEqual = 164, - OpLogicalNotEqual = 165, - OpLogicalOr = 166, - OpLogicalAnd = 167, - OpLogicalNot = 168, - OpSelect = 169, - OpIEqual = 170, - OpINotEqual = 171, - OpUGreaterThan = 172, - OpSGreaterThan = 173, - OpUGreaterThanEqual = 174, - OpSGreaterThanEqual = 175, - OpULessThan = 176, - OpSLessThan = 177, - OpULessThanEqual = 178, - OpSLessThanEqual = 179, - OpFOrdEqual = 180, - OpFUnordEqual = 181, - OpFOrdNotEqual = 182, - OpFUnordNotEqual = 183, - OpFOrdLessThan = 184, - OpFUnordLessThan = 185, - OpFOrdGreaterThan = 186, - OpFUnordGreaterThan = 187, - OpFOrdLessThanEqual = 188, - OpFUnordLessThanEqual = 189, - OpFOrdGreaterThanEqual = 190, - OpFUnordGreaterThanEqual = 191, - OpShiftRightLogical = 194, - OpShiftRightArithmetic = 195, - OpShiftLeftLogical = 196, - OpBitwiseOr = 197, - OpBitwiseXor = 198, - OpBitwiseAnd = 199, - OpNot = 200, - OpBitFieldInsert = 201, - OpBitFieldSExtract = 202, - OpBitFieldUExtract = 203, - OpBitReverse = 204, - OpBitCount = 205, - OpDPdx = 207, - OpDPdy = 208, - OpFwidth = 209, - OpDPdxFine = 210, - OpDPdyFine = 211, - OpFwidthFine = 212, - OpDPdxCoarse = 213, - OpDPdyCoarse = 214, - OpFwidthCoarse = 215, - OpEmitVertex = 218, - OpEndPrimitive = 219, - OpEmitStreamVertex = 220, - OpEndStreamPrimitive = 221, - OpControlBarrier = 224, - OpMemoryBarrier = 225, - OpAtomicLoad = 227, - OpAtomicStore = 228, - OpAtomicExchange = 229, - OpAtomicCompareExchange = 230, - OpAtomicCompareExchangeWeak = 231, - OpAtomicIIncrement = 232, - OpAtomicIDecrement = 233, - OpAtomicIAdd = 234, - OpAtomicISub = 235, - OpAtomicSMin = 236, - OpAtomicUMin = 237, - OpAtomicSMax = 238, - OpAtomicUMax = 239, - OpAtomicAnd = 240, - OpAtomicOr = 241, - OpAtomicXor = 242, - OpPhi = 245, - OpLoopMerge = 246, - OpSelectionMerge = 247, - OpLabel = 248, - OpBranch = 249, - OpBranchConditional = 250, - OpSwitch = 251, - OpKill = 252, - OpReturn = 253, - OpReturnValue = 254, - OpUnreachable = 255, - OpLifetimeStart = 256, - OpLifetimeStop = 257, - OpGroupAsyncCopy = 259, - OpGroupWaitEvents = 260, - OpGroupAll = 261, - OpGroupAny = 262, - OpGroupBroadcast = 263, - OpGroupIAdd = 264, - OpGroupFAdd = 265, - OpGroupFMin = 266, - OpGroupUMin = 267, - OpGroupSMin = 268, - OpGroupFMax = 269, - OpGroupUMax = 270, - OpGroupSMax = 271, - OpReadPipe = 274, - OpWritePipe = 275, - OpReservedReadPipe = 276, - OpReservedWritePipe = 277, - OpReserveReadPipePackets = 278, - OpReserveWritePipePackets = 279, - OpCommitReadPipe = 280, - OpCommitWritePipe = 281, - OpIsValidReserveId = 282, - OpGetNumPipePackets = 283, - OpGetMaxPipePackets = 284, - OpGroupReserveReadPipePackets = 285, - OpGroupReserveWritePipePackets = 286, - OpGroupCommitReadPipe = 287, - OpGroupCommitWritePipe = 288, - OpEnqueueMarker = 291, - OpEnqueueKernel = 292, - OpGetKernelNDrangeSubGroupCount = 293, - OpGetKernelNDrangeMaxSubGroupSize = 294, - OpGetKernelWorkGroupSize = 295, - OpGetKernelPreferredWorkGroupSizeMultiple = 296, - OpRetainEvent = 297, - OpReleaseEvent = 298, - OpCreateUserEvent = 299, - OpIsValidEvent = 300, - OpSetUserEventStatus = 301, - OpCaptureEventProfilingInfo = 302, - OpGetDefaultQueue = 303, - OpBuildNDRange = 304, - OpImageSparseSampleImplicitLod = 305, - OpImageSparseSampleExplicitLod = 306, - OpImageSparseSampleDrefImplicitLod = 307, - OpImageSparseSampleDrefExplicitLod = 308, - OpImageSparseSampleProjImplicitLod = 309, - OpImageSparseSampleProjExplicitLod = 310, - OpImageSparseSampleProjDrefImplicitLod = 311, - OpImageSparseSampleProjDrefExplicitLod = 312, - OpImageSparseFetch = 313, - OpImageSparseGather = 314, - OpImageSparseDrefGather = 315, - OpImageSparseTexelsResident = 316, - OpNoLine = 317, - OpAtomicFlagTestAndSet = 318, - OpAtomicFlagClear = 319, - OpImageSparseRead = 320, - OpSizeOf = 321, - OpTypePipeStorage = 322, - OpConstantPipeStorage = 323, - OpCreatePipeFromPipeStorage = 324, - OpGetKernelLocalSizeForSubgroupCount = 325, - OpGetKernelMaxNumSubgroups = 326, - OpTypeNamedBarrier = 327, - OpNamedBarrierInitialize = 328, - OpMemoryNamedBarrier = 329, - OpModuleProcessed = 330, - OpExecutionModeId = 331, - OpDecorateId = 332, - OpSubgroupBallotKHR = 4421, - OpSubgroupFirstInvocationKHR = 4422, - OpSubgroupAllKHR = 4428, - OpSubgroupAnyKHR = 4429, - OpSubgroupAllEqualKHR = 4430, - OpSubgroupReadInvocationKHR = 4432, - OpGroupIAddNonUniformAMD = 5000, - OpGroupFAddNonUniformAMD = 5001, - OpGroupFMinNonUniformAMD = 5002, - OpGroupUMinNonUniformAMD = 5003, - OpGroupSMinNonUniformAMD = 5004, - OpGroupFMaxNonUniformAMD = 5005, - OpGroupUMaxNonUniformAMD = 5006, - OpGroupSMaxNonUniformAMD = 5007, - OpFragmentMaskFetchAMD = 5011, - OpFragmentFetchAMD = 5012, - OpSubgroupShuffleINTEL = 5571, - OpSubgroupShuffleDownINTEL = 5572, - OpSubgroupShuffleUpINTEL = 5573, - OpSubgroupShuffleXorINTEL = 5574, - OpSubgroupBlockReadINTEL = 5575, - OpSubgroupBlockWriteINTEL = 5576, - OpSubgroupImageBlockReadINTEL = 5577, - OpSubgroupImageBlockWriteINTEL = 5578, - OpMax = 0x7fffffff, -}; - -// Overload operator| for mask bit combining - -inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } -inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } -inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } -inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } -inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } -inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } -inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } -inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } - -} // end namespace spv - -#endif // #ifndef spirv_HPP - diff --git a/third_party/glslang-spirv/spvIR.h b/third_party/glslang-spirv/spvIR.h deleted file mode 100644 index faa2701ff..000000000 --- a/third_party/glslang-spirv/spvIR.h +++ /dev/null @@ -1,407 +0,0 @@ -// -// Copyright (C) 2014 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// SPIRV-IR -// -// Simple in-memory representation (IR) of SPIRV. Just for holding -// Each function's CFG of blocks. Has this hierarchy: -// - Module, which is a list of -// - Function, which is a list of -// - Block, which is a list of -// - Instruction -// - -#pragma once -#ifndef spvIR_H -#define spvIR_H - -#include "spirv.hpp" - -#include -#include -#include -#include -#include -#include - -namespace spv { - -class Block; -class Function; -class Module; - -const Id NoResult = 0; -const Id NoType = 0; - -const Decoration NoPrecision = DecorationMax; - -#ifdef __GNUC__ -# define POTENTIALLY_UNUSED __attribute__((unused)) -#else -# define POTENTIALLY_UNUSED -#endif - -POTENTIALLY_UNUSED -const MemorySemanticsMask MemorySemanticsAllMemory = - (MemorySemanticsMask)(MemorySemanticsUniformMemoryMask | - MemorySemanticsWorkgroupMemoryMask | - MemorySemanticsAtomicCounterMemoryMask | - MemorySemanticsImageMemoryMask); - -// -// SPIR-V IR instruction. -// - -class Instruction { -public: - Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { } - explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { } - virtual ~Instruction() {} - void addIdOperand(Id id) { operands.push_back(id); } - void addImmediateOperand(unsigned int immediate) { operands.push_back(immediate); } - void addStringOperand(const char* str) - { - unsigned int word; - char* wordString = (char*)&word; - char* wordPtr = wordString; - int charCount = 0; - char c; - do { - c = *(str++); - *(wordPtr++) = c; - ++charCount; - if (charCount == 4) { - addImmediateOperand(word); - wordPtr = wordString; - charCount = 0; - } - } while (c != 0); - - // deal with partial last word - if (charCount > 0) { - // pad with 0s - for (; charCount < 4; ++charCount) - *(wordPtr++) = 0; - addImmediateOperand(word); - } - } - void setBlock(Block* b) { block = b; } - Block* getBlock() const { return block; } - Op getOpCode() const { return opCode; } - int getNumOperands() const { return (int)operands.size(); } - Id getResultId() const { return resultId; } - Id getTypeId() const { return typeId; } - Id getIdOperand(int op) const { return operands[op]; } - unsigned int getImmediateOperand(int op) const { return operands[op]; } - - // Write out the binary form. - void dump(std::vector& out) const - { - // Compute the wordCount - unsigned int wordCount = 1; - if (typeId) - ++wordCount; - if (resultId) - ++wordCount; - wordCount += (unsigned int)operands.size(); - - // Write out the beginning of the instruction - out.push_back(((wordCount) << WordCountShift) | opCode); - if (typeId) - out.push_back(typeId); - if (resultId) - out.push_back(resultId); - - // Write out the operands - for (int op = 0; op < (int)operands.size(); ++op) - out.push_back(operands[op]); - } - -protected: - Instruction(const Instruction&); - Id resultId; - Id typeId; - Op opCode; - std::vector operands; - Block* block; -}; - -// -// SPIR-V IR block. -// - -class Block { -public: - Block(Id id, Function& parent); - virtual ~Block() - { - } - - Id getId() { return instructions.front()->getResultId(); } - - Function& getParent() const { return parent; } - void addInstruction(std::unique_ptr inst); - void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);} - void addLocalVariable(std::unique_ptr inst) { localVariables.push_back(std::move(inst)); } - const std::vector& getPredecessors() const { return predecessors; } - const std::vector& getSuccessors() const { return successors; } - const std::vector >& getInstructions() const { - return instructions; - } - void setUnreachable() { unreachable = true; } - bool isUnreachable() const { return unreachable; } - // Returns the block's merge instruction, if one exists (otherwise null). - const Instruction* getMergeInstruction() const { - if (instructions.size() < 2) return nullptr; - const Instruction* nextToLast = (instructions.cend() - 2)->get(); - switch (nextToLast->getOpCode()) { - case OpSelectionMerge: - case OpLoopMerge: - return nextToLast; - default: - return nullptr; - } - return nullptr; - } - - bool isTerminated() const - { - switch (instructions.back()->getOpCode()) { - case OpBranch: - case OpBranchConditional: - case OpSwitch: - case OpKill: - case OpReturn: - case OpReturnValue: - return true; - default: - return false; - } - } - - void dump(std::vector& out) const - { - instructions[0]->dump(out); - for (int i = 0; i < (int)localVariables.size(); ++i) - localVariables[i]->dump(out); - for (int i = 1; i < (int)instructions.size(); ++i) - instructions[i]->dump(out); - } - -protected: - Block(const Block&); - Block& operator=(Block&); - - // To enforce keeping parent and ownership in sync: - friend Function; - - std::vector > instructions; - std::vector predecessors, successors; - std::vector > localVariables; - Function& parent; - - // track whether this block is known to be uncreachable (not necessarily - // true for all unreachable blocks, but should be set at least - // for the extraneous ones introduced by the builder). - bool unreachable; -}; - -// Traverses the control-flow graph rooted at root in an order suited for -// readable code generation. Invokes callback at every node in the traversal -// order. -void inReadableOrder(Block* root, std::function callback); - -// -// SPIR-V IR Function. -// - -class Function { -public: - Function(Id id, Id resultType, Id functionType, Id firstParam, Module& parent); - virtual ~Function() - { - for (int i = 0; i < (int)parameterInstructions.size(); ++i) - delete parameterInstructions[i]; - - for (int i = 0; i < (int)blocks.size(); ++i) - delete blocks[i]; - } - Id getId() const { return functionInstruction.getResultId(); } - Id getParamId(int p) { return parameterInstructions[p]->getResultId(); } - - void addBlock(Block* block) { blocks.push_back(block); } - void removeBlock(Block* block) - { - auto found = find(blocks.begin(), blocks.end(), block); - assert(found != blocks.end()); - blocks.erase(found); - delete block; - } - - Module& getParent() const { return parent; } - Block* getEntryBlock() const { return blocks.front(); } - Block* getLastBlock() const { return blocks.back(); } - const std::vector& getBlocks() const { return blocks; } - void addLocalVariable(std::unique_ptr inst); - Id getReturnType() const { return functionInstruction.getTypeId(); } - - void setImplicitThis() { implicitThis = true; } - bool hasImplicitThis() const { return implicitThis; } - - void dump(std::vector& out) const - { - // OpFunction - functionInstruction.dump(out); - - // OpFunctionParameter - for (int p = 0; p < (int)parameterInstructions.size(); ++p) - parameterInstructions[p]->dump(out); - - // Blocks - inReadableOrder(blocks[0], [&out](const Block* b) { b->dump(out); }); - Instruction end(0, 0, OpFunctionEnd); - end.dump(out); - } - -protected: - Function(const Function&); - Function& operator=(Function&); - - Module& parent; - Instruction functionInstruction; - std::vector parameterInstructions; - std::vector blocks; - bool implicitThis; // true if this is a member function expecting to be passed a 'this' as the first argument -}; - -// -// SPIR-V IR Module. -// - -class Module { -public: - Module() {} - virtual ~Module() - { - // TODO delete things - } - - void addFunction(Function *fun) { functions.push_back(fun); } - - void mapInstruction(Instruction *instruction) - { - spv::Id resultId = instruction->getResultId(); - // map the instruction's result id - if (resultId >= idToInstruction.size()) - idToInstruction.resize(resultId + 16); - idToInstruction[resultId] = instruction; - } - - Instruction* getInstruction(Id id) const { return idToInstruction[id]; } - const std::vector& getFunctions() const { return functions; } - spv::Id getTypeId(Id resultId) const { return idToInstruction[resultId]->getTypeId(); } - StorageClass getStorageClass(Id typeId) const - { - assert(idToInstruction[typeId]->getOpCode() == spv::OpTypePointer); - return (StorageClass)idToInstruction[typeId]->getImmediateOperand(0); - } - - void dump(std::vector& out) const - { - for (int f = 0; f < (int)functions.size(); ++f) - functions[f]->dump(out); - } - -protected: - Module(const Module&); - std::vector functions; - - // map from result id to instruction having that result id - std::vector idToInstruction; - - // map from a result id to its type id -}; - -// -// Implementation (it's here due to circular type definitions). -// - -// Add both -// - the OpFunction instruction -// - all the OpFunctionParameter instructions -__inline Function::Function(Id id, Id resultType, Id functionType, Id firstParamId, Module& parent) - : parent(parent), functionInstruction(id, resultType, OpFunction), implicitThis(false) -{ - // OpFunction - functionInstruction.addImmediateOperand(FunctionControlMaskNone); - functionInstruction.addIdOperand(functionType); - parent.mapInstruction(&functionInstruction); - parent.addFunction(this); - - // OpFunctionParameter - Instruction* typeInst = parent.getInstruction(functionType); - int numParams = typeInst->getNumOperands() - 1; - for (int p = 0; p < numParams; ++p) { - Instruction* param = new Instruction(firstParamId + p, typeInst->getIdOperand(p + 1), OpFunctionParameter); - parent.mapInstruction(param); - parameterInstructions.push_back(param); - } -} - -__inline void Function::addLocalVariable(std::unique_ptr inst) -{ - Instruction* raw_instruction = inst.get(); - blocks[0]->addLocalVariable(std::move(inst)); - parent.mapInstruction(raw_instruction); -} - -__inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false) -{ - instructions.push_back(std::unique_ptr(new Instruction(id, NoType, OpLabel))); - instructions.back()->setBlock(this); - parent.getParent().mapInstruction(instructions.back().get()); -} - -__inline void Block::addInstruction(std::unique_ptr inst) -{ - Instruction* raw_instruction = inst.get(); - instructions.push_back(std::move(inst)); - raw_instruction->setBlock(this); - if (raw_instruction->getResultId()) - parent.getParent().mapInstruction(raw_instruction); -} - -}; // end spv namespace - -#endif // spvIR_H diff --git a/third_party/premake-core b/third_party/premake-core index fe71eb790..7eba28258 160000 --- a/third_party/premake-core +++ b/third_party/premake-core @@ -1 +1 @@ -Subproject commit fe71eb790c7d085cd3c6a7b71a50167b4da06e69 +Subproject commit 7eba2825887e49d3a72b30e0a7480bd427a5bab0 diff --git a/third_party/spirv-headers b/third_party/spirv-headers deleted file mode 160000 index 2bf91d32b..000000000 --- a/third_party/spirv-headers +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2bf91d32b2ce17df9ca6c1e62cf478b24e7d2644 diff --git a/third_party/spirv-tools b/third_party/spirv-tools deleted file mode 160000 index b390553f4..000000000 --- a/third_party/spirv-tools +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b390553f4229bb7790efab6121be2bb5e15e2433 diff --git a/third_party/spirv-tools.lua b/third_party/spirv-tools.lua deleted file mode 100644 index 6b3f66fa3..000000000 --- a/third_party/spirv-tools.lua +++ /dev/null @@ -1,71 +0,0 @@ -group("third_party") -project("spirv-tools") - uuid("621512da-bb50-40f2-85ba-ae615ff13e68") - kind("StaticLib") - language("C++") - links({ - }) - defines({ - "_LIB", - }) - includedirs({ - "spirv-headers/include", - "spirv-tools/include", - "spirv-tools/source", - }) - files({ - "spirv-tools/include/spirv-tools/libspirv.h", - "spirv-tools/source/val/basic_block.cpp", - "spirv-tools/source/val/basic_block.h", - "spirv-tools/source/val/construct.cpp", - "spirv-tools/source/val/construct.h", - "spirv-tools/source/val/function.cpp", - "spirv-tools/source/val/function.h", - "spirv-tools/source/val/validation_state.cpp", - "spirv-tools/source/val/validation_state.h", - "spirv-tools/source/util/*.cpp", - "spirv-tools/source/util/*.h", - "spirv-tools/source/val/*.cpp", - "spirv-tools/source/val/*.h", - "spirv-tools/source/*.inc", - "spirv-tools/source/*.cpp", - "spirv-tools/source/*.h", - "spirv-tools/source/assembly_grammar.cpp", - "spirv-tools/source/assembly_grammar.h", - "spirv-tools/source/binary.cpp", - "spirv-tools/source/binary.h", - "spirv-tools/source/diagnostic.cpp", - "spirv-tools/source/diagnostic.h", - "spirv-tools/source/disassemble.cpp", - "spirv-tools/source/ext_inst.cpp", - "spirv-tools/source/ext_inst.h", - "spirv-tools/source/instruction.h", - "spirv-tools/source/macro.h", - "spirv-tools/source/opcode.cpp", - "spirv-tools/source/opcode.h", - "spirv-tools/source/operand.cpp", - "spirv-tools/source/operand.h", - "spirv-tools/source/print.cpp", - "spirv-tools/source/print.h", - -- "spirv-tools/source/software_version.cpp", - "spirv-tools/source/spirv_constant.h", - "spirv-tools/source/spirv_definition.h", - "spirv-tools/source/spirv_endian.cpp", - "spirv-tools/source/spirv_endian.h", - "spirv-tools/source/spirv_target_env.cpp", - "spirv-tools/source/spirv_target_env.h", - "spirv-tools/source/table.cpp", - "spirv-tools/source/table.h", - "spirv-tools/source/text.cpp", - "spirv-tools/source/text.h", - "spirv-tools/source/text_handler.cpp", - "spirv-tools/source/text_handler.h", - "spirv-tools/source/validate.cpp", - "spirv-tools/source/validate.h", - "spirv-tools/source/validate_cfg.cpp", - "spirv-tools/source/validate_id.cpp", - "spirv-tools/source/validate_instruction.cpp", - "spirv-tools/source/validate_layout.cpp", - "spirv-tools/source/util/bitutils.h", - "spirv-tools/source/util/hex_float.h", - }) diff --git a/third_party/spirv/GLSL.std.450.h b/third_party/spirv/GLSL.std.450.h deleted file mode 100644 index df31092be..000000000 --- a/third_party/spirv/GLSL.std.450.h +++ /dev/null @@ -1,131 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_H -#define GLSLstd450_H - -static const int GLSLstd450Version = 100; -static const int GLSLstd450Revision = 1; - -enum GLSLstd450 { - GLSLstd450Bad = 0, // Don't use - - GLSLstd450Round = 1, - GLSLstd450RoundEven = 2, - GLSLstd450Trunc = 3, - GLSLstd450FAbs = 4, - GLSLstd450SAbs = 5, - GLSLstd450FSign = 6, - GLSLstd450SSign = 7, - GLSLstd450Floor = 8, - GLSLstd450Ceil = 9, - GLSLstd450Fract = 10, - - GLSLstd450Radians = 11, - GLSLstd450Degrees = 12, - GLSLstd450Sin = 13, - GLSLstd450Cos = 14, - GLSLstd450Tan = 15, - GLSLstd450Asin = 16, - GLSLstd450Acos = 17, - GLSLstd450Atan = 18, - GLSLstd450Sinh = 19, - GLSLstd450Cosh = 20, - GLSLstd450Tanh = 21, - GLSLstd450Asinh = 22, - GLSLstd450Acosh = 23, - GLSLstd450Atanh = 24, - GLSLstd450Atan2 = 25, - - GLSLstd450Pow = 26, - GLSLstd450Exp = 27, - GLSLstd450Log = 28, - GLSLstd450Exp2 = 29, - GLSLstd450Log2 = 30, - GLSLstd450Sqrt = 31, - GLSLstd450InverseSqrt = 32, - - GLSLstd450Determinant = 33, - GLSLstd450MatrixInverse = 34, - - GLSLstd450Modf = 35, // second operand needs an OpVariable to write to - GLSLstd450ModfStruct = 36, // no OpVariable operand - GLSLstd450FMin = 37, - GLSLstd450UMin = 38, - GLSLstd450SMin = 39, - GLSLstd450FMax = 40, - GLSLstd450UMax = 41, - GLSLstd450SMax = 42, - GLSLstd450FClamp = 43, - GLSLstd450UClamp = 44, - GLSLstd450SClamp = 45, - GLSLstd450FMix = 46, - GLSLstd450IMix = 47, // Reserved - GLSLstd450Step = 48, - GLSLstd450SmoothStep = 49, - - GLSLstd450Fma = 50, - GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 52, // no OpVariable operand - GLSLstd450Ldexp = 53, - - GLSLstd450PackSnorm4x8 = 54, - GLSLstd450PackUnorm4x8 = 55, - GLSLstd450PackSnorm2x16 = 56, - GLSLstd450PackUnorm2x16 = 57, - GLSLstd450PackHalf2x16 = 58, - GLSLstd450PackDouble2x32 = 59, - GLSLstd450UnpackSnorm2x16 = 60, - GLSLstd450UnpackUnorm2x16 = 61, - GLSLstd450UnpackHalf2x16 = 62, - GLSLstd450UnpackSnorm4x8 = 63, - GLSLstd450UnpackUnorm4x8 = 64, - GLSLstd450UnpackDouble2x32 = 65, - - GLSLstd450Length = 66, - GLSLstd450Distance = 67, - GLSLstd450Cross = 68, - GLSLstd450Normalize = 69, - GLSLstd450FaceForward = 70, - GLSLstd450Reflect = 71, - GLSLstd450Refract = 72, - - GLSLstd450FindILsb = 73, - GLSLstd450FindSMsb = 74, - GLSLstd450FindUMsb = 75, - - GLSLstd450InterpolateAtCentroid = 76, - GLSLstd450InterpolateAtSample = 77, - GLSLstd450InterpolateAtOffset = 78, - - GLSLstd450NMin = 79, - GLSLstd450NMax = 80, - GLSLstd450NClamp = 81, - - GLSLstd450Count -}; - -#endif // #ifndef GLSLstd450_H diff --git a/third_party/spirv/GLSL.std.450.hpp11 b/third_party/spirv/GLSL.std.450.hpp11 deleted file mode 100644 index 526912006..000000000 --- a/third_party/spirv/GLSL.std.450.hpp11 +++ /dev/null @@ -1,135 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_HPP -#define GLSLstd450_HPP - -namespace spv { - -constexpr int GLSLstd450Version = 100; -constexpr int GLSLstd450Revision = 1; - -enum class GLSLstd450 { - kBad = 0, // Don't use - - kRound = 1, - kRoundEven = 2, - kTrunc = 3, - kFAbs = 4, - kSAbs = 5, - kFSign = 6, - kSSign = 7, - kFloor = 8, - kCeil = 9, - kFract = 10, - - kRadians = 11, - kDegrees = 12, - kSin = 13, - kCos = 14, - kTan = 15, - kAsin = 16, - kAcos = 17, - kAtan = 18, - kSinh = 19, - kCosh = 20, - kTanh = 21, - kAsinh = 22, - kAcosh = 23, - kAtanh = 24, - kAtan2 = 25, - - kPow = 26, - kExp = 27, - kLog = 28, - kExp2 = 29, - kLog2 = 30, - kSqrt = 31, - kInverseSqrt = 32, - - kDeterminant = 33, - kMatrixInverse = 34, - - kModf = 35, // second operand needs an OpVariable to write to - kModfStruct = 36, // no OpVariable operand - kFMin = 37, - kUMin = 38, - kSMin = 39, - kFMax = 40, - kUMax = 41, - kSMax = 42, - kFClamp = 43, - kUClamp = 44, - kSClamp = 45, - kFMix = 46, - kIMix = 47, // Reserved - kStep = 48, - kSmoothStep = 49, - - kFma = 50, - kFrexp = 51, // second operand needs an OpVariable to write to - kFrexpStruct = 52, // no OpVariable operand - kLdexp = 53, - - kPackSnorm4x8 = 54, - kPackUnorm4x8 = 55, - kPackSnorm2x16 = 56, - kPackUnorm2x16 = 57, - kPackHalf2x16 = 58, - kPackDouble2x32 = 59, - kUnpackSnorm2x16 = 60, - kUnpackUnorm2x16 = 61, - kUnpackHalf2x16 = 62, - kUnpackSnorm4x8 = 63, - kUnpackUnorm4x8 = 64, - kUnpackDouble2x32 = 65, - - kLength = 66, - kDistance = 67, - kCross = 68, - kNormalize = 69, - kFaceForward = 70, - kReflect = 71, - kRefract = 72, - - kFindILsb = 73, - kFindSMsb = 74, - kFindUMsb = 75, - - kInterpolateAtCentroid = 76, - kInterpolateAtSample = 77, - kInterpolateAtOffset = 78, - - kNMin = 79, - kNMax = 80, - kNClamp = 81, - - kCount -}; - -} // namespace spv - -#endif // #ifndef GLSLstd450_HPP diff --git a/third_party/spirv/OpenCL.std.h b/third_party/spirv/OpenCL.std.h deleted file mode 100644 index af29c527e..000000000 --- a/third_party/spirv/OpenCL.std.h +++ /dev/null @@ -1,272 +0,0 @@ -/* -** Copyright (c) 2015-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -// -// Author: Boaz Ouriel, Intel -// - -namespace OpenCLLIB { - -enum Entrypoints { - - // math functions - Acos = 0, - Acosh = 1, - Acospi = 2, - Asin = 3, - Asinh = 4, - Asinpi = 5, - Atan = 6, - Atan2 = 7, - Atanh = 8, - Atanpi = 9, - Atan2pi = 10, - Cbrt = 11, - Ceil = 12, - Copysign = 13, - Cos = 14, - Cosh = 15, - Cospi = 16, - Erfc = 17, - Erf = 18, - Exp = 19, - Exp2 = 20, - Exp10 = 21, - Expm1 = 22, - Fabs = 23, - Fdim = 24, - Floor = 25, - Fma = 26, - Fmax = 27, - Fmin = 28, - Fmod = 29, - Fract = 30, - Frexp = 31, - Hypot = 32, - Ilogb = 33, - Ldexp = 34, - Lgamma = 35, - Lgamma_r = 36, - Log = 37, - Log2 = 38, - Log10 = 39, - Log1p = 40, - Logb = 41, - Mad = 42, - Maxmag = 43, - Minmag = 44, - Modf = 45, - Nan = 46, - Nextafter = 47, - Pow = 48, - Pown = 49, - Powr = 50, - Remainder = 51, - Remquo = 52, - Rint = 53, - Rootn = 54, - Round = 55, - Rsqrt = 56, - Sin = 57, - Sincos = 58, - Sinh = 59, - Sinpi = 60, - Sqrt = 61, - Tan = 62, - Tanh = 63, - Tanpi = 64, - Tgamma = 65, - Trunc = 66, - Half_cos = 67, - Half_divide = 68, - Half_exp = 69, - Half_exp2 = 70, - Half_exp10 = 71, - Half_log = 72, - Half_log2 = 73, - Half_log10 = 74, - Half_powr = 75, - Half_recip = 76, - Half_rsqrt = 77, - Half_sin = 78, - Half_sqrt = 79, - Half_tan = 80, - Native_cos = 81, - Native_divide = 82, - Native_exp = 83, - Native_exp2 = 84, - Native_exp10 = 85, - Native_log = 86, - Native_log2 = 87, - Native_log10 = 88, - Native_powr = 89, - Native_recip = 90, - Native_rsqrt = 91, - Native_sin = 92, - Native_sqrt = 93, - Native_tan = 94, - - // Common - FClamp = 95, - Degrees = 96, - FMax_common = 97, - FMin_common = 98, - Mix = 99, - Radians = 100, - Step = 101, - Smoothstep = 102, - Sign = 103, - - // Geometrics - Cross = 104, - Distance = 105, - Length = 106, - Normalize = 107, - Fast_distance = 108, - Fast_length = 109, - Fast_normalize = 110, - - // Images - Deprecated - Read_imagef = 111, - Read_imagei = 112, - Read_imageui = 113, - Read_imageh = 114, - - Read_imagef_samplerless = 115, - Read_imagei_samplerless = 116, - Read_imageui_samplerless = 117, - Read_imageh_samplerless = 118, - - Write_imagef = 119, - Write_imagei = 120, - Write_imageui = 121, - Write_imageh = 122, - Read_imagef_mipmap_lod = 123, - Read_imagei_mipmap_lod = 124, - Read_imageui_mipmap_lod = 125, - Read_imagef_mipmap_grad = 126, - Read_imagei_mipmap_grad = 127, - Read_imageui_mipmap_grad = 128, - - // Image write with LOD - Write_imagef_mipmap_lod = 129, - Write_imagei_mipmap_lod = 130, - Write_imageui_mipmap_lod = 131, - - // Images - Deprecated - Get_image_width = 132, - Get_image_height = 133, - Get_image_depth = 134, - Get_image_channel_data_type = 135, - Get_image_channel_order = 136, - Get_image_dim = 137, - Get_image_array_size = 138, - Get_image_num_samples = 139, - Get_image_num_mip_levels = 140, - - // Integers - SAbs = 141, - SAbs_diff = 142, - SAdd_sat = 143, - UAdd_sat = 144, - SHadd = 145, - UHadd = 146, - SRhadd = 147, - URhadd = 148, - SClamp = 149, - UClamp = 150, - Clz = 151, - Ctz = 152, - SMad_hi = 153, - UMad_sat = 154, - SMad_sat = 155, - SMax = 156, - UMax = 157, - SMin = 158, - UMin = 159, - SMul_hi = 160, - Rotate = 161, - SSub_sat = 162, - USub_sat = 163, - U_Upsample = 164, - S_Upsample = 165, - Popcount = 166, - SMad24 = 167, - UMad24 = 168, - SMul24 = 169, - UMul24 = 170, - - // Vector Loads/Stores - Vloadn = 171, - Vstoren = 172, - Vload_half = 173, - Vload_halfn = 174, - Vstore_half = 175, - Vstore_half_r = 176, - Vstore_halfn = 177, - Vstore_halfn_r = 178, - Vloada_halfn = 179, - Vstorea_halfn = 180, - Vstorea_halfn_r = 181, - - // Vector Misc - Shuffle = 182, - Shuffle2 = 183, - - // - Printf = 184, - Prefetch = 185, - - // Relationals - Bitselect = 186, - Select = 187, - - // pipes - Read_pipe = 188, - Write_pipe = 189, - Reserve_read_pipe = 190, - Reserve_write_pipe = 191, - Commit_read_pipe = 192, - Commit_write_pipe = 193, - Is_valid_reserve_id = 194, - Work_group_reserve_read_pipe = 195, - Work_group_reserve_write_pipe = 196, - Work_group_commit_read_pipe = 197, - Work_group_commit_write_pipe = 198, - Get_pipe_num_packets = 199, - Get_pipe_max_packets = 200, - - // more integers - UAbs = 201, - UAbs_diff = 202, - UMul_hi = 203, - UMad_hi = 204, -}; - - - -}; // end namespace OpenCL20 - diff --git a/third_party/spirv/spirv.h b/third_party/spirv/spirv.h deleted file mode 100644 index d48488e94..000000000 --- a/third_party/spirv/spirv.h +++ /dev/null @@ -1,871 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -/* -** This header is automatically generated by the same tool that creates -** the Binary Section of the SPIR-V specification. -*/ - -/* -** Enumeration tokens for SPIR-V, in various styles: -** C, C++, C++11, JSON, Lua, Python -** -** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -** -** Some tokens act like mask values, which can be OR'd together, -** while others are mutually exclusive. The mask-like ones have -** "Mask" in their name, and a parallel enum that has the shift -** amount (1 << x) for each corresponding enumerant. -*/ - -#ifndef spirv_H -#define spirv_H - -typedef unsigned int SpvId; - -#define SPV_VERSION 0x10000 -#define SPV_REVISION 3 - -static const unsigned int SpvMagicNumber = 0x07230203; -static const unsigned int SpvVersion = 0x00010000; -static const unsigned int SpvRevision = 3; -static const unsigned int SpvOpCodeMask = 0xffff; -static const unsigned int SpvWordCountShift = 16; - -typedef enum SpvSourceLanguage_ { - SpvSourceLanguageUnknown = 0, - SpvSourceLanguageESSL = 1, - SpvSourceLanguageGLSL = 2, - SpvSourceLanguageOpenCL_C = 3, - SpvSourceLanguageOpenCL_CPP = 4, -} SpvSourceLanguage; - -typedef enum SpvExecutionModel_ { - SpvExecutionModelVertex = 0, - SpvExecutionModelTessellationControl = 1, - SpvExecutionModelTessellationEvaluation = 2, - SpvExecutionModelGeometry = 3, - SpvExecutionModelFragment = 4, - SpvExecutionModelGLCompute = 5, - SpvExecutionModelKernel = 6, -} SpvExecutionModel; - -typedef enum SpvAddressingModel_ { - SpvAddressingModelLogical = 0, - SpvAddressingModelPhysical32 = 1, - SpvAddressingModelPhysical64 = 2, -} SpvAddressingModel; - -typedef enum SpvMemoryModel_ { - SpvMemoryModelSimple = 0, - SpvMemoryModelGLSL450 = 1, - SpvMemoryModelOpenCL = 2, -} SpvMemoryModel; - -typedef enum SpvExecutionMode_ { - SpvExecutionModeInvocations = 0, - SpvExecutionModeSpacingEqual = 1, - SpvExecutionModeSpacingFractionalEven = 2, - SpvExecutionModeSpacingFractionalOdd = 3, - SpvExecutionModeVertexOrderCw = 4, - SpvExecutionModeVertexOrderCcw = 5, - SpvExecutionModePixelCenterInteger = 6, - SpvExecutionModeOriginUpperLeft = 7, - SpvExecutionModeOriginLowerLeft = 8, - SpvExecutionModeEarlyFragmentTests = 9, - SpvExecutionModePointMode = 10, - SpvExecutionModeXfb = 11, - SpvExecutionModeDepthReplacing = 12, - SpvExecutionModeDepthGreater = 14, - SpvExecutionModeDepthLess = 15, - SpvExecutionModeDepthUnchanged = 16, - SpvExecutionModeLocalSize = 17, - SpvExecutionModeLocalSizeHint = 18, - SpvExecutionModeInputPoints = 19, - SpvExecutionModeInputLines = 20, - SpvExecutionModeInputLinesAdjacency = 21, - SpvExecutionModeTriangles = 22, - SpvExecutionModeInputTrianglesAdjacency = 23, - SpvExecutionModeQuads = 24, - SpvExecutionModeIsolines = 25, - SpvExecutionModeOutputVertices = 26, - SpvExecutionModeOutputPoints = 27, - SpvExecutionModeOutputLineStrip = 28, - SpvExecutionModeOutputTriangleStrip = 29, - SpvExecutionModeVecTypeHint = 30, - SpvExecutionModeContractionOff = 31, -} SpvExecutionMode; - -typedef enum SpvStorageClass_ { - SpvStorageClassUniformConstant = 0, - SpvStorageClassInput = 1, - SpvStorageClassUniform = 2, - SpvStorageClassOutput = 3, - SpvStorageClassWorkgroup = 4, - SpvStorageClassCrossWorkgroup = 5, - SpvStorageClassPrivate = 6, - SpvStorageClassFunction = 7, - SpvStorageClassGeneric = 8, - SpvStorageClassPushConstant = 9, - SpvStorageClassAtomicCounter = 10, - SpvStorageClassImage = 11, -} SpvStorageClass; - -typedef enum SpvDim_ { - SpvDim1D = 0, - SpvDim2D = 1, - SpvDim3D = 2, - SpvDimCube = 3, - SpvDimRect = 4, - SpvDimBuffer = 5, - SpvDimSubpassData = 6, -} SpvDim; - -typedef enum SpvSamplerAddressingMode_ { - SpvSamplerAddressingModeNone = 0, - SpvSamplerAddressingModeClampToEdge = 1, - SpvSamplerAddressingModeClamp = 2, - SpvSamplerAddressingModeRepeat = 3, - SpvSamplerAddressingModeRepeatMirrored = 4, -} SpvSamplerAddressingMode; - -typedef enum SpvSamplerFilterMode_ { - SpvSamplerFilterModeNearest = 0, - SpvSamplerFilterModeLinear = 1, -} SpvSamplerFilterMode; - -typedef enum SpvImageFormat_ { - SpvImageFormatUnknown = 0, - SpvImageFormatRgba32f = 1, - SpvImageFormatRgba16f = 2, - SpvImageFormatR32f = 3, - SpvImageFormatRgba8 = 4, - SpvImageFormatRgba8Snorm = 5, - SpvImageFormatRg32f = 6, - SpvImageFormatRg16f = 7, - SpvImageFormatR11fG11fB10f = 8, - SpvImageFormatR16f = 9, - SpvImageFormatRgba16 = 10, - SpvImageFormatRgb10A2 = 11, - SpvImageFormatRg16 = 12, - SpvImageFormatRg8 = 13, - SpvImageFormatR16 = 14, - SpvImageFormatR8 = 15, - SpvImageFormatRgba16Snorm = 16, - SpvImageFormatRg16Snorm = 17, - SpvImageFormatRg8Snorm = 18, - SpvImageFormatR16Snorm = 19, - SpvImageFormatR8Snorm = 20, - SpvImageFormatRgba32i = 21, - SpvImageFormatRgba16i = 22, - SpvImageFormatRgba8i = 23, - SpvImageFormatR32i = 24, - SpvImageFormatRg32i = 25, - SpvImageFormatRg16i = 26, - SpvImageFormatRg8i = 27, - SpvImageFormatR16i = 28, - SpvImageFormatR8i = 29, - SpvImageFormatRgba32ui = 30, - SpvImageFormatRgba16ui = 31, - SpvImageFormatRgba8ui = 32, - SpvImageFormatR32ui = 33, - SpvImageFormatRgb10a2ui = 34, - SpvImageFormatRg32ui = 35, - SpvImageFormatRg16ui = 36, - SpvImageFormatRg8ui = 37, - SpvImageFormatR16ui = 38, - SpvImageFormatR8ui = 39, -} SpvImageFormat; - -typedef enum SpvImageChannelOrder_ { - SpvImageChannelOrderR = 0, - SpvImageChannelOrderA = 1, - SpvImageChannelOrderRG = 2, - SpvImageChannelOrderRA = 3, - SpvImageChannelOrderRGB = 4, - SpvImageChannelOrderRGBA = 5, - SpvImageChannelOrderBGRA = 6, - SpvImageChannelOrderARGB = 7, - SpvImageChannelOrderIntensity = 8, - SpvImageChannelOrderLuminance = 9, - SpvImageChannelOrderRx = 10, - SpvImageChannelOrderRGx = 11, - SpvImageChannelOrderRGBx = 12, - SpvImageChannelOrderDepth = 13, - SpvImageChannelOrderDepthStencil = 14, - SpvImageChannelOrdersRGB = 15, - SpvImageChannelOrdersRGBx = 16, - SpvImageChannelOrdersRGBA = 17, - SpvImageChannelOrdersBGRA = 18, -} SpvImageChannelOrder; - -typedef enum SpvImageChannelDataType_ { - SpvImageChannelDataTypeSnormInt8 = 0, - SpvImageChannelDataTypeSnormInt16 = 1, - SpvImageChannelDataTypeUnormInt8 = 2, - SpvImageChannelDataTypeUnormInt16 = 3, - SpvImageChannelDataTypeUnormShort565 = 4, - SpvImageChannelDataTypeUnormShort555 = 5, - SpvImageChannelDataTypeUnormInt101010 = 6, - SpvImageChannelDataTypeSignedInt8 = 7, - SpvImageChannelDataTypeSignedInt16 = 8, - SpvImageChannelDataTypeSignedInt32 = 9, - SpvImageChannelDataTypeUnsignedInt8 = 10, - SpvImageChannelDataTypeUnsignedInt16 = 11, - SpvImageChannelDataTypeUnsignedInt32 = 12, - SpvImageChannelDataTypeHalfFloat = 13, - SpvImageChannelDataTypeFloat = 14, - SpvImageChannelDataTypeUnormInt24 = 15, - SpvImageChannelDataTypeUnormInt101010_2 = 16, -} SpvImageChannelDataType; - -typedef enum SpvImageOperandsShift_ { - SpvImageOperandsBiasShift = 0, - SpvImageOperandsLodShift = 1, - SpvImageOperandsGradShift = 2, - SpvImageOperandsConstOffsetShift = 3, - SpvImageOperandsOffsetShift = 4, - SpvImageOperandsConstOffsetsShift = 5, - SpvImageOperandsSampleShift = 6, - SpvImageOperandsMinLodShift = 7, -} SpvImageOperandsShift; - -typedef enum SpvImageOperandsMask_ { - SpvImageOperandsMaskNone = 0, - SpvImageOperandsBiasMask = 0x00000001, - SpvImageOperandsLodMask = 0x00000002, - SpvImageOperandsGradMask = 0x00000004, - SpvImageOperandsConstOffsetMask = 0x00000008, - SpvImageOperandsOffsetMask = 0x00000010, - SpvImageOperandsConstOffsetsMask = 0x00000020, - SpvImageOperandsSampleMask = 0x00000040, - SpvImageOperandsMinLodMask = 0x00000080, -} SpvImageOperandsMask; - -typedef enum SpvFPFastMathModeShift_ { - SpvFPFastMathModeNotNaNShift = 0, - SpvFPFastMathModeNotInfShift = 1, - SpvFPFastMathModeNSZShift = 2, - SpvFPFastMathModeAllowRecipShift = 3, - SpvFPFastMathModeFastShift = 4, -} SpvFPFastMathModeShift; - -typedef enum SpvFPFastMathModeMask_ { - SpvFPFastMathModeMaskNone = 0, - SpvFPFastMathModeNotNaNMask = 0x00000001, - SpvFPFastMathModeNotInfMask = 0x00000002, - SpvFPFastMathModeNSZMask = 0x00000004, - SpvFPFastMathModeAllowRecipMask = 0x00000008, - SpvFPFastMathModeFastMask = 0x00000010, -} SpvFPFastMathModeMask; - -typedef enum SpvFPRoundingMode_ { - SpvFPRoundingModeRTE = 0, - SpvFPRoundingModeRTZ = 1, - SpvFPRoundingModeRTP = 2, - SpvFPRoundingModeRTN = 3, -} SpvFPRoundingMode; - -typedef enum SpvLinkageType_ { - SpvLinkageTypeExport = 0, - SpvLinkageTypeImport = 1, -} SpvLinkageType; - -typedef enum SpvAccessQualifier_ { - SpvAccessQualifierReadOnly = 0, - SpvAccessQualifierWriteOnly = 1, - SpvAccessQualifierReadWrite = 2, -} SpvAccessQualifier; - -typedef enum SpvFunctionParameterAttribute_ { - SpvFunctionParameterAttributeZext = 0, - SpvFunctionParameterAttributeSext = 1, - SpvFunctionParameterAttributeByVal = 2, - SpvFunctionParameterAttributeSret = 3, - SpvFunctionParameterAttributeNoAlias = 4, - SpvFunctionParameterAttributeNoCapture = 5, - SpvFunctionParameterAttributeNoWrite = 6, - SpvFunctionParameterAttributeNoReadWrite = 7, -} SpvFunctionParameterAttribute; - -typedef enum SpvDecoration_ { - SpvDecorationRelaxedPrecision = 0, - SpvDecorationSpecId = 1, - SpvDecorationBlock = 2, - SpvDecorationBufferBlock = 3, - SpvDecorationRowMajor = 4, - SpvDecorationColMajor = 5, - SpvDecorationArrayStride = 6, - SpvDecorationMatrixStride = 7, - SpvDecorationGLSLShared = 8, - SpvDecorationGLSLPacked = 9, - SpvDecorationCPacked = 10, - SpvDecorationBuiltIn = 11, - SpvDecorationNoPerspective = 13, - SpvDecorationFlat = 14, - SpvDecorationPatch = 15, - SpvDecorationCentroid = 16, - SpvDecorationSample = 17, - SpvDecorationInvariant = 18, - SpvDecorationRestrict = 19, - SpvDecorationAliased = 20, - SpvDecorationVolatile = 21, - SpvDecorationConstant = 22, - SpvDecorationCoherent = 23, - SpvDecorationNonWritable = 24, - SpvDecorationNonReadable = 25, - SpvDecorationUniform = 26, - SpvDecorationSaturatedConversion = 28, - SpvDecorationStream = 29, - SpvDecorationLocation = 30, - SpvDecorationComponent = 31, - SpvDecorationIndex = 32, - SpvDecorationBinding = 33, - SpvDecorationDescriptorSet = 34, - SpvDecorationOffset = 35, - SpvDecorationXfbBuffer = 36, - SpvDecorationXfbStride = 37, - SpvDecorationFuncParamAttr = 38, - SpvDecorationFPRoundingMode = 39, - SpvDecorationFPFastMathMode = 40, - SpvDecorationLinkageAttributes = 41, - SpvDecorationNoContraction = 42, - SpvDecorationInputAttachmentIndex = 43, - SpvDecorationAlignment = 44, -} SpvDecoration; - -typedef enum SpvBuiltIn_ { - SpvBuiltInPosition = 0, - SpvBuiltInPointSize = 1, - SpvBuiltInClipDistance = 3, - SpvBuiltInCullDistance = 4, - SpvBuiltInVertexId = 5, - SpvBuiltInInstanceId = 6, - SpvBuiltInPrimitiveId = 7, - SpvBuiltInInvocationId = 8, - SpvBuiltInLayer = 9, - SpvBuiltInViewportIndex = 10, - SpvBuiltInTessLevelOuter = 11, - SpvBuiltInTessLevelInner = 12, - SpvBuiltInTessCoord = 13, - SpvBuiltInPatchVertices = 14, - SpvBuiltInFragCoord = 15, - SpvBuiltInPointCoord = 16, - SpvBuiltInFrontFacing = 17, - SpvBuiltInSampleId = 18, - SpvBuiltInSamplePosition = 19, - SpvBuiltInSampleMask = 20, - SpvBuiltInFragDepth = 22, - SpvBuiltInHelperInvocation = 23, - SpvBuiltInNumWorkgroups = 24, - SpvBuiltInWorkgroupSize = 25, - SpvBuiltInWorkgroupId = 26, - SpvBuiltInLocalInvocationId = 27, - SpvBuiltInGlobalInvocationId = 28, - SpvBuiltInLocalInvocationIndex = 29, - SpvBuiltInWorkDim = 30, - SpvBuiltInGlobalSize = 31, - SpvBuiltInEnqueuedWorkgroupSize = 32, - SpvBuiltInGlobalOffset = 33, - SpvBuiltInGlobalLinearId = 34, - SpvBuiltInSubgroupSize = 36, - SpvBuiltInSubgroupMaxSize = 37, - SpvBuiltInNumSubgroups = 38, - SpvBuiltInNumEnqueuedSubgroups = 39, - SpvBuiltInSubgroupId = 40, - SpvBuiltInSubgroupLocalInvocationId = 41, - SpvBuiltInVertexIndex = 42, - SpvBuiltInInstanceIndex = 43, -} SpvBuiltIn; - -typedef enum SpvSelectionControlShift_ { - SpvSelectionControlFlattenShift = 0, - SpvSelectionControlDontFlattenShift = 1, -} SpvSelectionControlShift; - -typedef enum SpvSelectionControlMask_ { - SpvSelectionControlMaskNone = 0, - SpvSelectionControlFlattenMask = 0x00000001, - SpvSelectionControlDontFlattenMask = 0x00000002, -} SpvSelectionControlMask; - -typedef enum SpvLoopControlShift_ { - SpvLoopControlUnrollShift = 0, - SpvLoopControlDontUnrollShift = 1, -} SpvLoopControlShift; - -typedef enum SpvLoopControlMask_ { - SpvLoopControlMaskNone = 0, - SpvLoopControlUnrollMask = 0x00000001, - SpvLoopControlDontUnrollMask = 0x00000002, -} SpvLoopControlMask; - -typedef enum SpvFunctionControlShift_ { - SpvFunctionControlInlineShift = 0, - SpvFunctionControlDontInlineShift = 1, - SpvFunctionControlPureShift = 2, - SpvFunctionControlConstShift = 3, -} SpvFunctionControlShift; - -typedef enum SpvFunctionControlMask_ { - SpvFunctionControlMaskNone = 0, - SpvFunctionControlInlineMask = 0x00000001, - SpvFunctionControlDontInlineMask = 0x00000002, - SpvFunctionControlPureMask = 0x00000004, - SpvFunctionControlConstMask = 0x00000008, -} SpvFunctionControlMask; - -typedef enum SpvMemorySemanticsShift_ { - SpvMemorySemanticsAcquireShift = 1, - SpvMemorySemanticsReleaseShift = 2, - SpvMemorySemanticsAcquireReleaseShift = 3, - SpvMemorySemanticsSequentiallyConsistentShift = 4, - SpvMemorySemanticsUniformMemoryShift = 6, - SpvMemorySemanticsSubgroupMemoryShift = 7, - SpvMemorySemanticsWorkgroupMemoryShift = 8, - SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, - SpvMemorySemanticsAtomicCounterMemoryShift = 10, - SpvMemorySemanticsImageMemoryShift = 11, -} SpvMemorySemanticsShift; - -typedef enum SpvMemorySemanticsMask_ { - SpvMemorySemanticsMaskNone = 0, - SpvMemorySemanticsAcquireMask = 0x00000002, - SpvMemorySemanticsReleaseMask = 0x00000004, - SpvMemorySemanticsAcquireReleaseMask = 0x00000008, - SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, - SpvMemorySemanticsUniformMemoryMask = 0x00000040, - SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, - SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, - SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, - SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, - SpvMemorySemanticsImageMemoryMask = 0x00000800, -} SpvMemorySemanticsMask; - -typedef enum SpvMemoryAccessShift_ { - SpvMemoryAccessVolatileShift = 0, - SpvMemoryAccessAlignedShift = 1, - SpvMemoryAccessNontemporalShift = 2, -} SpvMemoryAccessShift; - -typedef enum SpvMemoryAccessMask_ { - SpvMemoryAccessMaskNone = 0, - SpvMemoryAccessVolatileMask = 0x00000001, - SpvMemoryAccessAlignedMask = 0x00000002, - SpvMemoryAccessNontemporalMask = 0x00000004, -} SpvMemoryAccessMask; - -typedef enum SpvScope_ { - SpvScopeCrossDevice = 0, - SpvScopeDevice = 1, - SpvScopeWorkgroup = 2, - SpvScopeSubgroup = 3, - SpvScopeInvocation = 4, -} SpvScope; - -typedef enum SpvGroupOperation_ { - SpvGroupOperationReduce = 0, - SpvGroupOperationInclusiveScan = 1, - SpvGroupOperationExclusiveScan = 2, -} SpvGroupOperation; - -typedef enum SpvKernelEnqueueFlags_ { - SpvKernelEnqueueFlagsNoWait = 0, - SpvKernelEnqueueFlagsWaitKernel = 1, - SpvKernelEnqueueFlagsWaitWorkGroup = 2, -} SpvKernelEnqueueFlags; - -typedef enum SpvKernelProfilingInfoShift_ { - SpvKernelProfilingInfoCmdExecTimeShift = 0, -} SpvKernelProfilingInfoShift; - -typedef enum SpvKernelProfilingInfoMask_ { - SpvKernelProfilingInfoMaskNone = 0, - SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, -} SpvKernelProfilingInfoMask; - -typedef enum SpvCapability_ { - SpvCapabilityMatrix = 0, - SpvCapabilityShader = 1, - SpvCapabilityGeometry = 2, - SpvCapabilityTessellation = 3, - SpvCapabilityAddresses = 4, - SpvCapabilityLinkage = 5, - SpvCapabilityKernel = 6, - SpvCapabilityVector16 = 7, - SpvCapabilityFloat16Buffer = 8, - SpvCapabilityFloat16 = 9, - SpvCapabilityFloat64 = 10, - SpvCapabilityInt64 = 11, - SpvCapabilityInt64Atomics = 12, - SpvCapabilityImageBasic = 13, - SpvCapabilityImageReadWrite = 14, - SpvCapabilityImageMipmap = 15, - SpvCapabilityPipes = 17, - SpvCapabilityGroups = 18, - SpvCapabilityDeviceEnqueue = 19, - SpvCapabilityLiteralSampler = 20, - SpvCapabilityAtomicStorage = 21, - SpvCapabilityInt16 = 22, - SpvCapabilityTessellationPointSize = 23, - SpvCapabilityGeometryPointSize = 24, - SpvCapabilityImageGatherExtended = 25, - SpvCapabilityStorageImageMultisample = 27, - SpvCapabilityUniformBufferArrayDynamicIndexing = 28, - SpvCapabilitySampledImageArrayDynamicIndexing = 29, - SpvCapabilityStorageBufferArrayDynamicIndexing = 30, - SpvCapabilityStorageImageArrayDynamicIndexing = 31, - SpvCapabilityClipDistance = 32, - SpvCapabilityCullDistance = 33, - SpvCapabilityImageCubeArray = 34, - SpvCapabilitySampleRateShading = 35, - SpvCapabilityImageRect = 36, - SpvCapabilitySampledRect = 37, - SpvCapabilityGenericPointer = 38, - SpvCapabilityInt8 = 39, - SpvCapabilityInputAttachment = 40, - SpvCapabilitySparseResidency = 41, - SpvCapabilityMinLod = 42, - SpvCapabilitySampled1D = 43, - SpvCapabilityImage1D = 44, - SpvCapabilitySampledCubeArray = 45, - SpvCapabilitySampledBuffer = 46, - SpvCapabilityImageBuffer = 47, - SpvCapabilityImageMSArray = 48, - SpvCapabilityStorageImageExtendedFormats = 49, - SpvCapabilityImageQuery = 50, - SpvCapabilityDerivativeControl = 51, - SpvCapabilityInterpolationFunction = 52, - SpvCapabilityTransformFeedback = 53, - SpvCapabilityGeometryStreams = 54, - SpvCapabilityStorageImageReadWithoutFormat = 55, - SpvCapabilityStorageImageWriteWithoutFormat = 56, - SpvCapabilityMultiViewport = 57, -} SpvCapability; - -typedef enum SpvOp_ { - SpvOpNop = 0, - SpvOpUndef = 1, - SpvOpSourceContinued = 2, - SpvOpSource = 3, - SpvOpSourceExtension = 4, - SpvOpName = 5, - SpvOpMemberName = 6, - SpvOpString = 7, - SpvOpLine = 8, - SpvOpExtension = 10, - SpvOpExtInstImport = 11, - SpvOpExtInst = 12, - SpvOpMemoryModel = 14, - SpvOpEntryPoint = 15, - SpvOpExecutionMode = 16, - SpvOpCapability = 17, - SpvOpTypeVoid = 19, - SpvOpTypeBool = 20, - SpvOpTypeInt = 21, - SpvOpTypeFloat = 22, - SpvOpTypeVector = 23, - SpvOpTypeMatrix = 24, - SpvOpTypeImage = 25, - SpvOpTypeSampler = 26, - SpvOpTypeSampledImage = 27, - SpvOpTypeArray = 28, - SpvOpTypeRuntimeArray = 29, - SpvOpTypeStruct = 30, - SpvOpTypeOpaque = 31, - SpvOpTypePointer = 32, - SpvOpTypeFunction = 33, - SpvOpTypeEvent = 34, - SpvOpTypeDeviceEvent = 35, - SpvOpTypeReserveId = 36, - SpvOpTypeQueue = 37, - SpvOpTypePipe = 38, - SpvOpTypeForwardPointer = 39, - SpvOpConstantTrue = 41, - SpvOpConstantFalse = 42, - SpvOpConstant = 43, - SpvOpConstantComposite = 44, - SpvOpConstantSampler = 45, - SpvOpConstantNull = 46, - SpvOpSpecConstantTrue = 48, - SpvOpSpecConstantFalse = 49, - SpvOpSpecConstant = 50, - SpvOpSpecConstantComposite = 51, - SpvOpSpecConstantOp = 52, - SpvOpFunction = 54, - SpvOpFunctionParameter = 55, - SpvOpFunctionEnd = 56, - SpvOpFunctionCall = 57, - SpvOpVariable = 59, - SpvOpImageTexelPointer = 60, - SpvOpLoad = 61, - SpvOpStore = 62, - SpvOpCopyMemory = 63, - SpvOpCopyMemorySized = 64, - SpvOpAccessChain = 65, - SpvOpInBoundsAccessChain = 66, - SpvOpPtrAccessChain = 67, - SpvOpArrayLength = 68, - SpvOpGenericPtrMemSemantics = 69, - SpvOpInBoundsPtrAccessChain = 70, - SpvOpDecorate = 71, - SpvOpMemberDecorate = 72, - SpvOpDecorationGroup = 73, - SpvOpGroupDecorate = 74, - SpvOpGroupMemberDecorate = 75, - SpvOpVectorExtractDynamic = 77, - SpvOpVectorInsertDynamic = 78, - SpvOpVectorShuffle = 79, - SpvOpCompositeConstruct = 80, - SpvOpCompositeExtract = 81, - SpvOpCompositeInsert = 82, - SpvOpCopyObject = 83, - SpvOpTranspose = 84, - SpvOpSampledImage = 86, - SpvOpImageSampleImplicitLod = 87, - SpvOpImageSampleExplicitLod = 88, - SpvOpImageSampleDrefImplicitLod = 89, - SpvOpImageSampleDrefExplicitLod = 90, - SpvOpImageSampleProjImplicitLod = 91, - SpvOpImageSampleProjExplicitLod = 92, - SpvOpImageSampleProjDrefImplicitLod = 93, - SpvOpImageSampleProjDrefExplicitLod = 94, - SpvOpImageFetch = 95, - SpvOpImageGather = 96, - SpvOpImageDrefGather = 97, - SpvOpImageRead = 98, - SpvOpImageWrite = 99, - SpvOpImage = 100, - SpvOpImageQueryFormat = 101, - SpvOpImageQueryOrder = 102, - SpvOpImageQuerySizeLod = 103, - SpvOpImageQuerySize = 104, - SpvOpImageQueryLod = 105, - SpvOpImageQueryLevels = 106, - SpvOpImageQuerySamples = 107, - SpvOpConvertFToU = 109, - SpvOpConvertFToS = 110, - SpvOpConvertSToF = 111, - SpvOpConvertUToF = 112, - SpvOpUConvert = 113, - SpvOpSConvert = 114, - SpvOpFConvert = 115, - SpvOpQuantizeToF16 = 116, - SpvOpConvertPtrToU = 117, - SpvOpSatConvertSToU = 118, - SpvOpSatConvertUToS = 119, - SpvOpConvertUToPtr = 120, - SpvOpPtrCastToGeneric = 121, - SpvOpGenericCastToPtr = 122, - SpvOpGenericCastToPtrExplicit = 123, - SpvOpBitcast = 124, - SpvOpSNegate = 126, - SpvOpFNegate = 127, - SpvOpIAdd = 128, - SpvOpFAdd = 129, - SpvOpISub = 130, - SpvOpFSub = 131, - SpvOpIMul = 132, - SpvOpFMul = 133, - SpvOpUDiv = 134, - SpvOpSDiv = 135, - SpvOpFDiv = 136, - SpvOpUMod = 137, - SpvOpSRem = 138, - SpvOpSMod = 139, - SpvOpFRem = 140, - SpvOpFMod = 141, - SpvOpVectorTimesScalar = 142, - SpvOpMatrixTimesScalar = 143, - SpvOpVectorTimesMatrix = 144, - SpvOpMatrixTimesVector = 145, - SpvOpMatrixTimesMatrix = 146, - SpvOpOuterProduct = 147, - SpvOpDot = 148, - SpvOpIAddCarry = 149, - SpvOpISubBorrow = 150, - SpvOpUMulExtended = 151, - SpvOpSMulExtended = 152, - SpvOpAny = 154, - SpvOpAll = 155, - SpvOpIsNan = 156, - SpvOpIsInf = 157, - SpvOpIsFinite = 158, - SpvOpIsNormal = 159, - SpvOpSignBitSet = 160, - SpvOpLessOrGreater = 161, - SpvOpOrdered = 162, - SpvOpUnordered = 163, - SpvOpLogicalEqual = 164, - SpvOpLogicalNotEqual = 165, - SpvOpLogicalOr = 166, - SpvOpLogicalAnd = 167, - SpvOpLogicalNot = 168, - SpvOpSelect = 169, - SpvOpIEqual = 170, - SpvOpINotEqual = 171, - SpvOpUGreaterThan = 172, - SpvOpSGreaterThan = 173, - SpvOpUGreaterThanEqual = 174, - SpvOpSGreaterThanEqual = 175, - SpvOpULessThan = 176, - SpvOpSLessThan = 177, - SpvOpULessThanEqual = 178, - SpvOpSLessThanEqual = 179, - SpvOpFOrdEqual = 180, - SpvOpFUnordEqual = 181, - SpvOpFOrdNotEqual = 182, - SpvOpFUnordNotEqual = 183, - SpvOpFOrdLessThan = 184, - SpvOpFUnordLessThan = 185, - SpvOpFOrdGreaterThan = 186, - SpvOpFUnordGreaterThan = 187, - SpvOpFOrdLessThanEqual = 188, - SpvOpFUnordLessThanEqual = 189, - SpvOpFOrdGreaterThanEqual = 190, - SpvOpFUnordGreaterThanEqual = 191, - SpvOpShiftRightLogical = 194, - SpvOpShiftRightArithmetic = 195, - SpvOpShiftLeftLogical = 196, - SpvOpBitwiseOr = 197, - SpvOpBitwiseXor = 198, - SpvOpBitwiseAnd = 199, - SpvOpNot = 200, - SpvOpBitFieldInsert = 201, - SpvOpBitFieldSExtract = 202, - SpvOpBitFieldUExtract = 203, - SpvOpBitReverse = 204, - SpvOpBitCount = 205, - SpvOpDPdx = 207, - SpvOpDPdy = 208, - SpvOpFwidth = 209, - SpvOpDPdxFine = 210, - SpvOpDPdyFine = 211, - SpvOpFwidthFine = 212, - SpvOpDPdxCoarse = 213, - SpvOpDPdyCoarse = 214, - SpvOpFwidthCoarse = 215, - SpvOpEmitVertex = 218, - SpvOpEndPrimitive = 219, - SpvOpEmitStreamVertex = 220, - SpvOpEndStreamPrimitive = 221, - SpvOpControlBarrier = 224, - SpvOpMemoryBarrier = 225, - SpvOpAtomicLoad = 227, - SpvOpAtomicStore = 228, - SpvOpAtomicExchange = 229, - SpvOpAtomicCompareExchange = 230, - SpvOpAtomicCompareExchangeWeak = 231, - SpvOpAtomicIIncrement = 232, - SpvOpAtomicIDecrement = 233, - SpvOpAtomicIAdd = 234, - SpvOpAtomicISub = 235, - SpvOpAtomicSMin = 236, - SpvOpAtomicUMin = 237, - SpvOpAtomicSMax = 238, - SpvOpAtomicUMax = 239, - SpvOpAtomicAnd = 240, - SpvOpAtomicOr = 241, - SpvOpAtomicXor = 242, - SpvOpPhi = 245, - SpvOpLoopMerge = 246, - SpvOpSelectionMerge = 247, - SpvOpLabel = 248, - SpvOpBranch = 249, - SpvOpBranchConditional = 250, - SpvOpSwitch = 251, - SpvOpKill = 252, - SpvOpReturn = 253, - SpvOpReturnValue = 254, - SpvOpUnreachable = 255, - SpvOpLifetimeStart = 256, - SpvOpLifetimeStop = 257, - SpvOpGroupAsyncCopy = 259, - SpvOpGroupWaitEvents = 260, - SpvOpGroupAll = 261, - SpvOpGroupAny = 262, - SpvOpGroupBroadcast = 263, - SpvOpGroupIAdd = 264, - SpvOpGroupFAdd = 265, - SpvOpGroupFMin = 266, - SpvOpGroupUMin = 267, - SpvOpGroupSMin = 268, - SpvOpGroupFMax = 269, - SpvOpGroupUMax = 270, - SpvOpGroupSMax = 271, - SpvOpReadPipe = 274, - SpvOpWritePipe = 275, - SpvOpReservedReadPipe = 276, - SpvOpReservedWritePipe = 277, - SpvOpReserveReadPipePackets = 278, - SpvOpReserveWritePipePackets = 279, - SpvOpCommitReadPipe = 280, - SpvOpCommitWritePipe = 281, - SpvOpIsValidReserveId = 282, - SpvOpGetNumPipePackets = 283, - SpvOpGetMaxPipePackets = 284, - SpvOpGroupReserveReadPipePackets = 285, - SpvOpGroupReserveWritePipePackets = 286, - SpvOpGroupCommitReadPipe = 287, - SpvOpGroupCommitWritePipe = 288, - SpvOpEnqueueMarker = 291, - SpvOpEnqueueKernel = 292, - SpvOpGetKernelNDrangeSubGroupCount = 293, - SpvOpGetKernelNDrangeMaxSubGroupSize = 294, - SpvOpGetKernelWorkGroupSize = 295, - SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, - SpvOpRetainEvent = 297, - SpvOpReleaseEvent = 298, - SpvOpCreateUserEvent = 299, - SpvOpIsValidEvent = 300, - SpvOpSetUserEventStatus = 301, - SpvOpCaptureEventProfilingInfo = 302, - SpvOpGetDefaultQueue = 303, - SpvOpBuildNDRange = 304, - SpvOpImageSparseSampleImplicitLod = 305, - SpvOpImageSparseSampleExplicitLod = 306, - SpvOpImageSparseSampleDrefImplicitLod = 307, - SpvOpImageSparseSampleDrefExplicitLod = 308, - SpvOpImageSparseSampleProjImplicitLod = 309, - SpvOpImageSparseSampleProjExplicitLod = 310, - SpvOpImageSparseSampleProjDrefImplicitLod = 311, - SpvOpImageSparseSampleProjDrefExplicitLod = 312, - SpvOpImageSparseFetch = 313, - SpvOpImageSparseGather = 314, - SpvOpImageSparseDrefGather = 315, - SpvOpImageSparseTexelsResident = 316, - SpvOpNoLine = 317, - SpvOpAtomicFlagTestAndSet = 318, - SpvOpAtomicFlagClear = 319, - SpvOpImageSparseRead = 320, -} SpvOp; - -#endif // #ifndef spirv_H - diff --git a/third_party/spirv/spirv.hpp11 b/third_party/spirv/spirv.hpp11 deleted file mode 100644 index 03faaac38..000000000 --- a/third_party/spirv/spirv.hpp11 +++ /dev/null @@ -1,880 +0,0 @@ -// Copyright (c) 2014-2016 The Khronos Group Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and/or associated documentation files (the "Materials"), -// to deal in the Materials without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Materials, and to permit persons to whom the -// Materials are furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Materials. -// -// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -// -// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -// IN THE MATERIALS. - -// This header is automatically generated by the same tool that creates -// the Binary Section of the SPIR-V specification. - -// Enumeration tokens for SPIR-V, in various styles: -// C, C++, C++11, JSON, Lua, Python -// -// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -// -// Some tokens act like mask values, which can be OR'd together, -// while others are mutually exclusive. The mask-like ones have -// "Mask" in their name, and a parallel enum that has the shift -// amount (1 << x) for each corresponding enumerant. - -#ifndef spirv_HPP -#define spirv_HPP - -namespace spv { - -typedef unsigned int Id; - -#define SPV_VERSION 0x10000 -#define SPV_REVISION 3 - -static const unsigned int MagicNumber = 0x07230203; -static const unsigned int Version = 0x00010000; -static const unsigned int Revision = 3; -static const unsigned int OpCodeMask = 0xffff; -static const unsigned int WordCountShift = 16; - -enum class SourceLanguage : unsigned { - Unknown = 0, - ESSL = 1, - GLSL = 2, - OpenCL_C = 3, - OpenCL_CPP = 4, -}; - -enum class ExecutionModel : unsigned { - Vertex = 0, - TessellationControl = 1, - TessellationEvaluation = 2, - Geometry = 3, - Fragment = 4, - GLCompute = 5, - Kernel = 6, -}; - -enum class AddressingModel : unsigned { - Logical = 0, - Physical32 = 1, - Physical64 = 2, -}; - -enum class MemoryModel : unsigned { - Simple = 0, - GLSL450 = 1, - OpenCL = 2, -}; - -enum class ExecutionMode : unsigned { - Invocations = 0, - SpacingEqual = 1, - SpacingFractionalEven = 2, - SpacingFractionalOdd = 3, - VertexOrderCw = 4, - VertexOrderCcw = 5, - PixelCenterInteger = 6, - OriginUpperLeft = 7, - OriginLowerLeft = 8, - EarlyFragmentTests = 9, - PointMode = 10, - Xfb = 11, - DepthReplacing = 12, - DepthGreater = 14, - DepthLess = 15, - DepthUnchanged = 16, - LocalSize = 17, - LocalSizeHint = 18, - InputPoints = 19, - InputLines = 20, - InputLinesAdjacency = 21, - Triangles = 22, - InputTrianglesAdjacency = 23, - Quads = 24, - Isolines = 25, - OutputVertices = 26, - OutputPoints = 27, - OutputLineStrip = 28, - OutputTriangleStrip = 29, - VecTypeHint = 30, - ContractionOff = 31, -}; - -enum class StorageClass : unsigned { - UniformConstant = 0, - Input = 1, - Uniform = 2, - Output = 3, - Workgroup = 4, - CrossWorkgroup = 5, - Private = 6, - Function = 7, - Generic = 8, - PushConstant = 9, - AtomicCounter = 10, - Image = 11, -}; - -enum class Dim : unsigned { - Dim1D = 0, - Dim2D = 1, - Dim3D = 2, - Cube = 3, - Rect = 4, - Buffer = 5, - SubpassData = 6, -}; - -enum class SamplerAddressingMode : unsigned { - None = 0, - ClampToEdge = 1, - Clamp = 2, - Repeat = 3, - RepeatMirrored = 4, -}; - -enum class SamplerFilterMode : unsigned { - Nearest = 0, - Linear = 1, -}; - -enum class ImageFormat : unsigned { - Unknown = 0, - Rgba32f = 1, - Rgba16f = 2, - R32f = 3, - Rgba8 = 4, - Rgba8Snorm = 5, - Rg32f = 6, - Rg16f = 7, - R11fG11fB10f = 8, - R16f = 9, - Rgba16 = 10, - Rgb10A2 = 11, - Rg16 = 12, - Rg8 = 13, - R16 = 14, - R8 = 15, - Rgba16Snorm = 16, - Rg16Snorm = 17, - Rg8Snorm = 18, - R16Snorm = 19, - R8Snorm = 20, - Rgba32i = 21, - Rgba16i = 22, - Rgba8i = 23, - R32i = 24, - Rg32i = 25, - Rg16i = 26, - Rg8i = 27, - R16i = 28, - R8i = 29, - Rgba32ui = 30, - Rgba16ui = 31, - Rgba8ui = 32, - R32ui = 33, - Rgb10a2ui = 34, - Rg32ui = 35, - Rg16ui = 36, - Rg8ui = 37, - R16ui = 38, - R8ui = 39, -}; - -enum class ImageChannelOrder : unsigned { - R = 0, - A = 1, - RG = 2, - RA = 3, - RGB = 4, - RGBA = 5, - BGRA = 6, - ARGB = 7, - Intensity = 8, - Luminance = 9, - Rx = 10, - RGx = 11, - RGBx = 12, - Depth = 13, - DepthStencil = 14, - sRGB = 15, - sRGBx = 16, - sRGBA = 17, - sBGRA = 18, -}; - -enum class ImageChannelDataType : unsigned { - SnormInt8 = 0, - SnormInt16 = 1, - UnormInt8 = 2, - UnormInt16 = 3, - UnormShort565 = 4, - UnormShort555 = 5, - UnormInt101010 = 6, - SignedInt8 = 7, - SignedInt16 = 8, - SignedInt32 = 9, - UnsignedInt8 = 10, - UnsignedInt16 = 11, - UnsignedInt32 = 12, - HalfFloat = 13, - Float = 14, - UnormInt24 = 15, - UnormInt101010_2 = 16, -}; - -enum class ImageOperandsShift : unsigned { - Bias = 0, - Lod = 1, - Grad = 2, - ConstOffset = 3, - Offset = 4, - ConstOffsets = 5, - Sample = 6, - MinLod = 7, -}; - -enum class ImageOperandsMask : unsigned { - MaskNone = 0, - Bias = 0x00000001, - Lod = 0x00000002, - Grad = 0x00000004, - ConstOffset = 0x00000008, - Offset = 0x00000010, - ConstOffsets = 0x00000020, - Sample = 0x00000040, - MinLod = 0x00000080, -}; - -enum class FPFastMathModeShift : unsigned { - NotNaN = 0, - NotInf = 1, - NSZ = 2, - AllowRecip = 3, - Fast = 4, -}; - -enum class FPFastMathModeMask : unsigned { - MaskNone = 0, - NotNaN = 0x00000001, - NotInf = 0x00000002, - NSZ = 0x00000004, - AllowRecip = 0x00000008, - Fast = 0x00000010, -}; - -enum class FPRoundingMode : unsigned { - RTE = 0, - RTZ = 1, - RTP = 2, - RTN = 3, -}; - -enum class LinkageType : unsigned { - Export = 0, - Import = 1, -}; - -enum class AccessQualifier : unsigned { - ReadOnly = 0, - WriteOnly = 1, - ReadWrite = 2, -}; - -enum class FunctionParameterAttribute : unsigned { - Zext = 0, - Sext = 1, - ByVal = 2, - Sret = 3, - NoAlias = 4, - NoCapture = 5, - NoWrite = 6, - NoReadWrite = 7, -}; - -enum class Decoration : unsigned { - RelaxedPrecision = 0, - SpecId = 1, - Block = 2, - BufferBlock = 3, - RowMajor = 4, - ColMajor = 5, - ArrayStride = 6, - MatrixStride = 7, - GLSLShared = 8, - GLSLPacked = 9, - CPacked = 10, - BuiltIn = 11, - NoPerspective = 13, - Flat = 14, - Patch = 15, - Centroid = 16, - Sample = 17, - Invariant = 18, - Restrict = 19, - Aliased = 20, - Volatile = 21, - Constant = 22, - Coherent = 23, - NonWritable = 24, - NonReadable = 25, - Uniform = 26, - SaturatedConversion = 28, - Stream = 29, - Location = 30, - Component = 31, - Index = 32, - Binding = 33, - DescriptorSet = 34, - Offset = 35, - XfbBuffer = 36, - XfbStride = 37, - FuncParamAttr = 38, - FPRoundingMode = 39, - FPFastMathMode = 40, - LinkageAttributes = 41, - NoContraction = 42, - InputAttachmentIndex = 43, - Alignment = 44, -}; - -enum class BuiltIn : unsigned { - Position = 0, - PointSize = 1, - ClipDistance = 3, - CullDistance = 4, - VertexId = 5, - InstanceId = 6, - PrimitiveId = 7, - InvocationId = 8, - Layer = 9, - ViewportIndex = 10, - TessLevelOuter = 11, - TessLevelInner = 12, - TessCoord = 13, - PatchVertices = 14, - FragCoord = 15, - PointCoord = 16, - FrontFacing = 17, - SampleId = 18, - SamplePosition = 19, - SampleMask = 20, - FragDepth = 22, - HelperInvocation = 23, - NumWorkgroups = 24, - WorkgroupSize = 25, - WorkgroupId = 26, - LocalInvocationId = 27, - GlobalInvocationId = 28, - LocalInvocationIndex = 29, - WorkDim = 30, - GlobalSize = 31, - EnqueuedWorkgroupSize = 32, - GlobalOffset = 33, - GlobalLinearId = 34, - SubgroupSize = 36, - SubgroupMaxSize = 37, - NumSubgroups = 38, - NumEnqueuedSubgroups = 39, - SubgroupId = 40, - SubgroupLocalInvocationId = 41, - VertexIndex = 42, - InstanceIndex = 43, -}; - -enum class SelectionControlShift : unsigned { - Flatten = 0, - DontFlatten = 1, -}; - -enum class SelectionControlMask : unsigned { - MaskNone = 0, - Flatten = 0x00000001, - DontFlatten = 0x00000002, -}; - -enum class LoopControlShift : unsigned { - Unroll = 0, - DontUnroll = 1, -}; - -enum class LoopControlMask : unsigned { - MaskNone = 0, - Unroll = 0x00000001, - DontUnroll = 0x00000002, -}; - -enum class FunctionControlShift : unsigned { - Inline = 0, - DontInline = 1, - Pure = 2, - Const = 3, -}; - -enum class FunctionControlMask : unsigned { - MaskNone = 0, - Inline = 0x00000001, - DontInline = 0x00000002, - Pure = 0x00000004, - Const = 0x00000008, -}; - -enum class MemorySemanticsShift : unsigned { - Acquire = 1, - Release = 2, - AcquireRelease = 3, - SequentiallyConsistent = 4, - UniformMemory = 6, - SubgroupMemory = 7, - WorkgroupMemory = 8, - CrossWorkgroupMemory = 9, - AtomicCounterMemory = 10, - ImageMemory = 11, -}; - -enum class MemorySemanticsMask : unsigned { - MaskNone = 0, - Acquire = 0x00000002, - Release = 0x00000004, - AcquireRelease = 0x00000008, - SequentiallyConsistent = 0x00000010, - UniformMemory = 0x00000040, - SubgroupMemory = 0x00000080, - WorkgroupMemory = 0x00000100, - CrossWorkgroupMemory = 0x00000200, - AtomicCounterMemory = 0x00000400, - ImageMemory = 0x00000800, -}; - -enum class MemoryAccessShift : unsigned { - Volatile = 0, - Aligned = 1, - Nontemporal = 2, -}; - -enum class MemoryAccessMask : unsigned { - MaskNone = 0, - Volatile = 0x00000001, - Aligned = 0x00000002, - Nontemporal = 0x00000004, -}; - -enum class Scope : unsigned { - CrossDevice = 0, - Device = 1, - Workgroup = 2, - Subgroup = 3, - Invocation = 4, -}; - -enum class GroupOperation : unsigned { - Reduce = 0, - InclusiveScan = 1, - ExclusiveScan = 2, -}; - -enum class KernelEnqueueFlags : unsigned { - NoWait = 0, - WaitKernel = 1, - WaitWorkGroup = 2, -}; - -enum class KernelProfilingInfoShift : unsigned { - CmdExecTime = 0, -}; - -enum class KernelProfilingInfoMask : unsigned { - MaskNone = 0, - CmdExecTime = 0x00000001, -}; - -enum class Capability : unsigned { - Matrix = 0, - Shader = 1, - Geometry = 2, - Tessellation = 3, - Addresses = 4, - Linkage = 5, - Kernel = 6, - Vector16 = 7, - Float16Buffer = 8, - Float16 = 9, - Float64 = 10, - Int64 = 11, - Int64Atomics = 12, - ImageBasic = 13, - ImageReadWrite = 14, - ImageMipmap = 15, - Pipes = 17, - Groups = 18, - DeviceEnqueue = 19, - LiteralSampler = 20, - AtomicStorage = 21, - Int16 = 22, - TessellationPointSize = 23, - GeometryPointSize = 24, - ImageGatherExtended = 25, - StorageImageMultisample = 27, - UniformBufferArrayDynamicIndexing = 28, - SampledImageArrayDynamicIndexing = 29, - StorageBufferArrayDynamicIndexing = 30, - StorageImageArrayDynamicIndexing = 31, - ClipDistance = 32, - CullDistance = 33, - ImageCubeArray = 34, - SampleRateShading = 35, - ImageRect = 36, - SampledRect = 37, - GenericPointer = 38, - Int8 = 39, - InputAttachment = 40, - SparseResidency = 41, - MinLod = 42, - Sampled1D = 43, - Image1D = 44, - SampledCubeArray = 45, - SampledBuffer = 46, - ImageBuffer = 47, - ImageMSArray = 48, - StorageImageExtendedFormats = 49, - ImageQuery = 50, - DerivativeControl = 51, - InterpolationFunction = 52, - TransformFeedback = 53, - GeometryStreams = 54, - StorageImageReadWithoutFormat = 55, - StorageImageWriteWithoutFormat = 56, - MultiViewport = 57, -}; - -enum class Op : unsigned { - OpNop = 0, - OpUndef = 1, - OpSourceContinued = 2, - OpSource = 3, - OpSourceExtension = 4, - OpName = 5, - OpMemberName = 6, - OpString = 7, - OpLine = 8, - OpExtension = 10, - OpExtInstImport = 11, - OpExtInst = 12, - OpMemoryModel = 14, - OpEntryPoint = 15, - OpExecutionMode = 16, - OpCapability = 17, - OpTypeVoid = 19, - OpTypeBool = 20, - OpTypeInt = 21, - OpTypeFloat = 22, - OpTypeVector = 23, - OpTypeMatrix = 24, - OpTypeImage = 25, - OpTypeSampler = 26, - OpTypeSampledImage = 27, - OpTypeArray = 28, - OpTypeRuntimeArray = 29, - OpTypeStruct = 30, - OpTypeOpaque = 31, - OpTypePointer = 32, - OpTypeFunction = 33, - OpTypeEvent = 34, - OpTypeDeviceEvent = 35, - OpTypeReserveId = 36, - OpTypeQueue = 37, - OpTypePipe = 38, - OpTypeForwardPointer = 39, - OpConstantTrue = 41, - OpConstantFalse = 42, - OpConstant = 43, - OpConstantComposite = 44, - OpConstantSampler = 45, - OpConstantNull = 46, - OpSpecConstantTrue = 48, - OpSpecConstantFalse = 49, - OpSpecConstant = 50, - OpSpecConstantComposite = 51, - OpSpecConstantOp = 52, - OpFunction = 54, - OpFunctionParameter = 55, - OpFunctionEnd = 56, - OpFunctionCall = 57, - OpVariable = 59, - OpImageTexelPointer = 60, - OpLoad = 61, - OpStore = 62, - OpCopyMemory = 63, - OpCopyMemorySized = 64, - OpAccessChain = 65, - OpInBoundsAccessChain = 66, - OpPtrAccessChain = 67, - OpArrayLength = 68, - OpGenericPtrMemSemantics = 69, - OpInBoundsPtrAccessChain = 70, - OpDecorate = 71, - OpMemberDecorate = 72, - OpDecorationGroup = 73, - OpGroupDecorate = 74, - OpGroupMemberDecorate = 75, - OpVectorExtractDynamic = 77, - OpVectorInsertDynamic = 78, - OpVectorShuffle = 79, - OpCompositeConstruct = 80, - OpCompositeExtract = 81, - OpCompositeInsert = 82, - OpCopyObject = 83, - OpTranspose = 84, - OpSampledImage = 86, - OpImageSampleImplicitLod = 87, - OpImageSampleExplicitLod = 88, - OpImageSampleDrefImplicitLod = 89, - OpImageSampleDrefExplicitLod = 90, - OpImageSampleProjImplicitLod = 91, - OpImageSampleProjExplicitLod = 92, - OpImageSampleProjDrefImplicitLod = 93, - OpImageSampleProjDrefExplicitLod = 94, - OpImageFetch = 95, - OpImageGather = 96, - OpImageDrefGather = 97, - OpImageRead = 98, - OpImageWrite = 99, - OpImage = 100, - OpImageQueryFormat = 101, - OpImageQueryOrder = 102, - OpImageQuerySizeLod = 103, - OpImageQuerySize = 104, - OpImageQueryLod = 105, - OpImageQueryLevels = 106, - OpImageQuerySamples = 107, - OpConvertFToU = 109, - OpConvertFToS = 110, - OpConvertSToF = 111, - OpConvertUToF = 112, - OpUConvert = 113, - OpSConvert = 114, - OpFConvert = 115, - OpQuantizeToF16 = 116, - OpConvertPtrToU = 117, - OpSatConvertSToU = 118, - OpSatConvertUToS = 119, - OpConvertUToPtr = 120, - OpPtrCastToGeneric = 121, - OpGenericCastToPtr = 122, - OpGenericCastToPtrExplicit = 123, - OpBitcast = 124, - OpSNegate = 126, - OpFNegate = 127, - OpIAdd = 128, - OpFAdd = 129, - OpISub = 130, - OpFSub = 131, - OpIMul = 132, - OpFMul = 133, - OpUDiv = 134, - OpSDiv = 135, - OpFDiv = 136, - OpUMod = 137, - OpSRem = 138, - OpSMod = 139, - OpFRem = 140, - OpFMod = 141, - OpVectorTimesScalar = 142, - OpMatrixTimesScalar = 143, - OpVectorTimesMatrix = 144, - OpMatrixTimesVector = 145, - OpMatrixTimesMatrix = 146, - OpOuterProduct = 147, - OpDot = 148, - OpIAddCarry = 149, - OpISubBorrow = 150, - OpUMulExtended = 151, - OpSMulExtended = 152, - OpAny = 154, - OpAll = 155, - OpIsNan = 156, - OpIsInf = 157, - OpIsFinite = 158, - OpIsNormal = 159, - OpSignBitSet = 160, - OpLessOrGreater = 161, - OpOrdered = 162, - OpUnordered = 163, - OpLogicalEqual = 164, - OpLogicalNotEqual = 165, - OpLogicalOr = 166, - OpLogicalAnd = 167, - OpLogicalNot = 168, - OpSelect = 169, - OpIEqual = 170, - OpINotEqual = 171, - OpUGreaterThan = 172, - OpSGreaterThan = 173, - OpUGreaterThanEqual = 174, - OpSGreaterThanEqual = 175, - OpULessThan = 176, - OpSLessThan = 177, - OpULessThanEqual = 178, - OpSLessThanEqual = 179, - OpFOrdEqual = 180, - OpFUnordEqual = 181, - OpFOrdNotEqual = 182, - OpFUnordNotEqual = 183, - OpFOrdLessThan = 184, - OpFUnordLessThan = 185, - OpFOrdGreaterThan = 186, - OpFUnordGreaterThan = 187, - OpFOrdLessThanEqual = 188, - OpFUnordLessThanEqual = 189, - OpFOrdGreaterThanEqual = 190, - OpFUnordGreaterThanEqual = 191, - OpShiftRightLogical = 194, - OpShiftRightArithmetic = 195, - OpShiftLeftLogical = 196, - OpBitwiseOr = 197, - OpBitwiseXor = 198, - OpBitwiseAnd = 199, - OpNot = 200, - OpBitFieldInsert = 201, - OpBitFieldSExtract = 202, - OpBitFieldUExtract = 203, - OpBitReverse = 204, - OpBitCount = 205, - OpDPdx = 207, - OpDPdy = 208, - OpFwidth = 209, - OpDPdxFine = 210, - OpDPdyFine = 211, - OpFwidthFine = 212, - OpDPdxCoarse = 213, - OpDPdyCoarse = 214, - OpFwidthCoarse = 215, - OpEmitVertex = 218, - OpEndPrimitive = 219, - OpEmitStreamVertex = 220, - OpEndStreamPrimitive = 221, - OpControlBarrier = 224, - OpMemoryBarrier = 225, - OpAtomicLoad = 227, - OpAtomicStore = 228, - OpAtomicExchange = 229, - OpAtomicCompareExchange = 230, - OpAtomicCompareExchangeWeak = 231, - OpAtomicIIncrement = 232, - OpAtomicIDecrement = 233, - OpAtomicIAdd = 234, - OpAtomicISub = 235, - OpAtomicSMin = 236, - OpAtomicUMin = 237, - OpAtomicSMax = 238, - OpAtomicUMax = 239, - OpAtomicAnd = 240, - OpAtomicOr = 241, - OpAtomicXor = 242, - OpPhi = 245, - OpLoopMerge = 246, - OpSelectionMerge = 247, - OpLabel = 248, - OpBranch = 249, - OpBranchConditional = 250, - OpSwitch = 251, - OpKill = 252, - OpReturn = 253, - OpReturnValue = 254, - OpUnreachable = 255, - OpLifetimeStart = 256, - OpLifetimeStop = 257, - OpGroupAsyncCopy = 259, - OpGroupWaitEvents = 260, - OpGroupAll = 261, - OpGroupAny = 262, - OpGroupBroadcast = 263, - OpGroupIAdd = 264, - OpGroupFAdd = 265, - OpGroupFMin = 266, - OpGroupUMin = 267, - OpGroupSMin = 268, - OpGroupFMax = 269, - OpGroupUMax = 270, - OpGroupSMax = 271, - OpReadPipe = 274, - OpWritePipe = 275, - OpReservedReadPipe = 276, - OpReservedWritePipe = 277, - OpReserveReadPipePackets = 278, - OpReserveWritePipePackets = 279, - OpCommitReadPipe = 280, - OpCommitWritePipe = 281, - OpIsValidReserveId = 282, - OpGetNumPipePackets = 283, - OpGetMaxPipePackets = 284, - OpGroupReserveReadPipePackets = 285, - OpGroupReserveWritePipePackets = 286, - OpGroupCommitReadPipe = 287, - OpGroupCommitWritePipe = 288, - OpEnqueueMarker = 291, - OpEnqueueKernel = 292, - OpGetKernelNDrangeSubGroupCount = 293, - OpGetKernelNDrangeMaxSubGroupSize = 294, - OpGetKernelWorkGroupSize = 295, - OpGetKernelPreferredWorkGroupSizeMultiple = 296, - OpRetainEvent = 297, - OpReleaseEvent = 298, - OpCreateUserEvent = 299, - OpIsValidEvent = 300, - OpSetUserEventStatus = 301, - OpCaptureEventProfilingInfo = 302, - OpGetDefaultQueue = 303, - OpBuildNDRange = 304, - OpImageSparseSampleImplicitLod = 305, - OpImageSparseSampleExplicitLod = 306, - OpImageSparseSampleDrefImplicitLod = 307, - OpImageSparseSampleDrefExplicitLod = 308, - OpImageSparseSampleProjImplicitLod = 309, - OpImageSparseSampleProjExplicitLod = 310, - OpImageSparseSampleProjDrefImplicitLod = 311, - OpImageSparseSampleProjDrefExplicitLod = 312, - OpImageSparseFetch = 313, - OpImageSparseGather = 314, - OpImageSparseDrefGather = 315, - OpImageSparseTexelsResident = 316, - OpNoLine = 317, - OpAtomicFlagTestAndSet = 318, - OpAtomicFlagClear = 319, - OpImageSparseRead = 320, -}; - -// Overload operator| for mask bit combining - -inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } -inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } -inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } -inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } -inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } -inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } -inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } -inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } - -} // end namespace spv - -#endif // #ifndef spirv_HPP -