xenia/src/xenia/gpu/spirv_shader_translator.h

717 lines
30 KiB
C++

/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_
#define XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_
#include <array>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "third_party/glslang/SPIRV/SpvBuilder.h"
#include "xenia/gpu/shader_translator.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
class SpirvShaderTranslator : public ShaderTranslator {
public:
union Modification {
// If anything in this is structure is changed in a way not compatible with
// the previous layout, invalidate the pipeline storages by increasing this
// version number (0xYYYYMMDD)!
// TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid
// prototyping stage (easier to do small granular updates with an
// incremental counter).
static constexpr uint32_t kVersion = 5;
enum class DepthStencilMode : uint32_t {
kNoModifiers,
// Early fragment tests - enable if alpha test and alpha to coverage are
// disabled; ignored if anything in the shader blocks early Z writing.
kEarlyHint,
// TODO(Triang3l): Unorm24 (rounding) and float24 (truncating and
// rounding) output modes.
};
struct {
// uint32_t 0.
// Interpolators written by the vertex shader and needed by the pixel
// shader.
uint32_t interpolator_mask : xenos::kMaxInterpolators;
// Dynamically indexable register count from SQ_PROGRAM_CNTL.
uint32_t dynamic_addressable_register_count : 8;
// Pipeline stage and input configuration.
Shader::HostVertexShaderType host_vertex_shader_type
: Shader::kHostVertexShaderTypeBitCount;
} vertex;
struct PixelShaderModification {
// uint32_t 0.
// Interpolators written by the vertex shader and needed by the pixel
// shader.
uint32_t interpolator_mask : xenos::kMaxInterpolators;
uint32_t interpolators_centroid : xenos::kMaxInterpolators;
// uint32_t 1.
// Dynamically indexable register count from SQ_PROGRAM_CNTL.
uint32_t dynamic_addressable_register_count : 8;
uint32_t param_gen_enable : 1;
uint32_t param_gen_interpolator : 4;
// If param_gen_enable is set, this must be set for point primitives, and
// must not be set for other primitive types - enables the point sprite
// coordinates input, and also effects the flag bits in PsParamGen.
uint32_t param_gen_point : 1;
// For host render targets - depth / stencil output mode.
DepthStencilMode depth_stencil_mode : 3;
} pixel;
uint64_t value = 0;
explicit Modification(uint64_t modification_value = 0)
: value(modification_value) {
static_assert_size(*this, sizeof(value));
}
};
enum : uint32_t {
kSysFlag_VertexIndexLoad_Shift,
kSysFlag_ComputeOrPrimitiveVertexIndexLoad_Shift,
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit_Shift,
kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal_Shift,
kSysFlag_PrimitivePolygonal_Shift,
kSysFlag_PrimitiveLine_Shift,
kSysFlag_AlphaPassIfLess_Shift,
kSysFlag_AlphaPassIfEqual_Shift,
kSysFlag_AlphaPassIfGreater_Shift,
kSysFlag_ConvertColor0ToGamma_Shift,
kSysFlag_ConvertColor1ToGamma_Shift,
kSysFlag_ConvertColor2ToGamma_Shift,
kSysFlag_ConvertColor3ToGamma_Shift,
kSysFlag_Count,
// For HostVertexShaderType kVertex, if fullDrawIndexUint32 is not
// supported (ignored otherwise), whether to fetch the index manually
// (32-bit only - 16-bit indices are always fetched via the Vulkan index
// buffer).
kSysFlag_VertexIndexLoad = 1u << kSysFlag_VertexIndexLoad_Shift,
// For HostVertexShaderTypes kMemexportCompute, kPointListAsTriangleStrip,
// kRectangleListAsTriangleStrip, whether the vertex index needs to be
// loaded from the index buffer (rather than using autogenerated indices),
// and whether it's 32-bit. This is separate from kSysFlag_VertexIndexLoad
// because the same system constants may be used for the memexporting
// compute shader and the vertex shader for the same draw, but
// kSysFlag_VertexIndexLoad may be not needed.
kSysFlag_ComputeOrPrimitiveVertexIndexLoad =
1u << kSysFlag_ComputeOrPrimitiveVertexIndexLoad_Shift,
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit =
1u << kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit_Shift,
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
kSysFlag_PrimitivePolygonal = 1u << kSysFlag_PrimitivePolygonal_Shift,
kSysFlag_PrimitiveLine = 1u << kSysFlag_PrimitiveLine_Shift,
kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift,
kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift,
kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift,
kSysFlag_ConvertColor0ToGamma = 1u << kSysFlag_ConvertColor0ToGamma_Shift,
kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift,
kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift,
kSysFlag_ConvertColor3ToGamma = 1u << kSysFlag_ConvertColor3ToGamma_Shift,
};
static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants");
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
// - SystemConstantIndex enum.
// - Structure members in BeginTranslation.
//
// Using the std140 layout - vec2 must be aligned to 8 bytes, vec3 and vec4 to
// 16 bytes.
struct SystemConstants {
uint32_t flags;
uint32_t vertex_index_load_address;
xenos::Endian vertex_index_endian;
int32_t vertex_base_index;
float ndc_scale[3];
uint32_t padding_ndc_scale;
float ndc_offset[3];
uint32_t padding_ndc_offset;
// Each byte contains post-swizzle TextureSign values for each of the needed
// components of each of the 32 used texture fetch constants.
uint32_t texture_swizzled_signs[8];
// If the imageViewFormatSwizzle portability subset is not supported, the
// component swizzle (taking both guest and host swizzles into account) to
// apply to the result directly in the shader code. In each uint32_t,
// swizzles for 2 texture fetch constants (in bits 0:11 and 12:23).
uint32_t texture_swizzles[16];
float alpha_test_reference;
float padding_alpha_test_reference[3];
float color_exp_bias[4];
};
enum ConstantBuffer : uint32_t {
kConstantBufferSystem,
kConstantBufferFloatVertex,
kConstantBufferFloatPixel,
kConstantBufferBoolLoop,
kConstantBufferFetch,
kConstantBufferCount,
};
// The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for
// maxStorageBufferRange it's 128 MB. These are the values of those limits on
// Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound,
// therefore SSBOs must only be used for shared memory - all other storage
// resources must be images or texel buffers.
enum DescriptorSet : uint32_t {
// According to the "Pipeline Layout Compatibility" section of the Vulkan
// specification:
// "Two pipeline layouts are defined to be "compatible for set N" if they
// were created with identically defined descriptor set layouts for sets
// zero through N, and if they were created with identical push constant
// ranges."
// "Place the least frequently changing descriptor sets near the start of
// the pipeline layout, and place the descriptor sets representing the most
// frequently changing resources near the end. When pipelines are switched,
// only the descriptor set bindings that have been invalidated will need to
// be updated and the remainder of the descriptor set bindings will remain
// in place."
// This is partially the reverse of the Direct3D 12's rule of placing the
// most frequently changed descriptor sets in the beginning. Here all
// descriptor sets with an immutable layout are placed first, in reverse
// frequency of changing, and sets that may be different for different
// pipeline states last.
// Always the same descriptor set layouts for all pipeline layouts:
// Never changed.
kDescriptorSetSharedMemoryAndEdram,
// Changed in case of changes in the data.
kDescriptorSetConstants,
// Mutable part of the pipeline layout:
kDescriptorSetMutableLayoutsStart,
// Rarely used at all, but may be changed at an unpredictable rate when
// vertex textures are used (for example, for bones of an object, which may
// consist of multiple draw commands with different materials).
kDescriptorSetTexturesVertex = kDescriptorSetMutableLayoutsStart,
// Per-material textures.
kDescriptorSetTexturesPixel,
kDescriptorSetCount,
};
static_assert(
kDescriptorSetCount <= 4,
"The number of descriptor sets used by translated shaders must be within "
"the minimum Vulkan maxBoundDescriptorSets requirement of 4, which is "
"the limit on most GPUs used in Android devices - Arm Mali, Imagination "
"PowerVR, Qualcomm Adreno 6xx and older, as well as on old PC Nvidia "
"drivers");
// "Xenia Emulator Microcode Translator".
// https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79
static constexpr uint32_t kSpirvMagicToolId = 26;
struct Features {
explicit Features(const ui::vulkan::VulkanProvider& provider);
explicit Features(bool all = false);
unsigned int spirv_version;
uint32_t max_storage_buffer_range;
bool clip_distance;
bool cull_distance;
bool full_draw_index_uint32;
bool image_view_format_swizzle;
bool signed_zero_inf_nan_preserve_float32;
bool denorm_flush_to_zero_float32;
};
SpirvShaderTranslator(const Features& features);
uint64_t GetDefaultVertexShaderModification(
uint32_t dynamic_addressable_register_count,
Shader::HostVertexShaderType host_vertex_shader_type =
Shader::HostVertexShaderType::kVertex) const override;
uint64_t GetDefaultPixelShaderModification(
uint32_t dynamic_addressable_register_count) const override;
static constexpr uint32_t GetSharedMemoryStorageBufferCountLog2(
uint32_t max_storage_buffer_range) {
if (max_storage_buffer_range >= 512 * 1024 * 1024) {
return 0;
}
if (max_storage_buffer_range >= 256 * 1024 * 1024) {
return 1;
}
return 2;
}
uint32_t GetSharedMemoryStorageBufferCountLog2() const {
return GetSharedMemoryStorageBufferCountLog2(
features_.max_storage_buffer_range);
}
// Common functions useful not only for the translator, but also for EDRAM
// emulation via conventional render targets.
// Converts the color value externally clamped to [0, 31.875] to 7e3 floating
// point, with zeros in bits 10:31, rounding to the nearest even.
static spv::Id PreClampedFloat32To7e3(spv::Builder& builder,
spv::Id f32_scalar,
spv::Id ext_inst_glsl_std_450);
// Same as PreClampedFloat32To7e3, but clamps the input to [0, 31.875].
static spv::Id UnclampedFloat32To7e3(spv::Builder& builder,
spv::Id f32_scalar,
spv::Id ext_inst_glsl_std_450);
// Converts the 7e3 number in bits [f10_shift, f10_shift + 10) to a 32-bit
// float.
static spv::Id Float7e3To32(spv::Builder& builder, spv::Id f10_uint_scalar,
uint32_t f10_shift, bool result_as_uint,
spv::Id ext_inst_glsl_std_450);
// Converts the depth value externally clamped to the representable [0, 2)
// range to 20e4 floating point, with zeros in bits 24:31, rounding to the
// nearest even or towards zero. If remap_from_0_to_0_5 is true, it's assumed
// that 0...1 is pre-remapped to 0...0.5 in the input.
static spv::Id PreClampedDepthTo20e4(spv::Builder& builder,
spv::Id f32_scalar,
bool round_to_nearest_even,
bool remap_from_0_to_0_5,
spv::Id ext_inst_glsl_std_450);
// Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit
// float.
static spv::Id Depth20e4To32(spv::Builder& builder, spv::Id f24_uint_scalar,
uint32_t f24_shift, bool remap_to_0_to_0_5,
bool result_as_uint,
spv::Id ext_inst_glsl_std_450);
protected:
void Reset() override;
uint32_t GetModificationRegisterCount() const override;
void StartTranslation() override;
std::vector<uint8_t> CompleteTranslation() override;
void PostTranslation() override;
void ProcessLabel(uint32_t cf_index) override;
void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override;
void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override;
void ProcessLoopStartInstruction(
const ParsedLoopStartInstruction& instr) override;
void ProcessLoopEndInstruction(
const ParsedLoopEndInstruction& instr) override;
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
void ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) override;
void ProcessTextureFetchInstruction(
const ParsedTextureFetchInstruction& instr) override;
void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
private:
struct TextureBinding {
uint32_t fetch_constant;
// Stacked and 3D are separate TextureBindings.
xenos::FetchOpDimension dimension;
bool is_signed;
spv::Id variable;
};
struct SamplerBinding {
uint32_t fetch_constant;
xenos::TextureFilter mag_filter;
xenos::TextureFilter min_filter;
xenos::TextureFilter mip_filter;
xenos::AnisoFilter aniso_filter;
spv::Id variable;
};
// Builder helpers.
spv::Id SpirvSmearScalarResultOrConstant(spv::Id scalar, spv::Id vector_type);
void SpirvCreateSelectionMerge(
spv::Id merge_block_id, spv::SelectionControlMask selection_control_mask =
spv::SelectionControlMaskNone) {
std::unique_ptr<spv::Instruction> selection_merge_op =
std::make_unique<spv::Instruction>(spv::OpSelectionMerge);
selection_merge_op->addIdOperand(merge_block_id);
selection_merge_op->addImmediateOperand(selection_control_mask);
builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op));
}
Modification GetSpirvShaderModification() const {
return Modification(current_translation().modification());
}
bool IsSpirvVertexShader() const {
return is_vertex_shader() &&
!Shader::IsHostVertexShaderTypeDomain(
GetSpirvShaderModification().vertex.host_vertex_shader_type);
}
bool IsSpirvTessEvalShader() const {
return is_vertex_shader() &&
Shader::IsHostVertexShaderTypeDomain(
GetSpirvShaderModification().vertex.host_vertex_shader_type);
}
bool IsExecutionModeEarlyFragmentTests() const {
// TODO(Triang3l): Not applicable to fragment shader interlock.
return is_pixel_shader() &&
GetSpirvShaderModification().pixel.depth_stencil_mode ==
Modification::DepthStencilMode::kEarlyHint &&
current_shader().implicit_early_z_write_allowed();
}
uint32_t GetModificationInterpolatorMask() const {
Modification modification = GetSpirvShaderModification();
return is_vertex_shader() ? modification.vertex.interpolator_mask
: modification.pixel.interpolator_mask;
}
// Returns UINT32_MAX if PsParamGen doesn't need to be written.
uint32_t GetPsParamGenInterpolator() const;
// Must be called before emitting any SPIR-V operations that must be in a
// block in translator callbacks to ensure that if the last instruction added
// was something like OpBranch - in this case, an unreachable block is
// created.
void EnsureBuildPointAvailable();
void StartVertexOrTessEvalShaderBeforeMain();
void StartVertexOrTessEvalShaderInMain();
void CompleteVertexOrTessEvalShaderInMain();
void StartFragmentShaderBeforeMain();
void StartFragmentShaderInMain();
void CompleteFragmentShaderInMain();
// Updates the current flow control condition (to be called in the beginning
// of exec and in jumps), closing the previous conditionals if needed.
// However, if the condition is not different, the instruction-level predicate
// conditional also won't be closed - this must be checked separately if
// needed (for example, in jumps).
void UpdateExecConditionals(ParsedExecInstruction::Type type,
uint32_t bool_constant_index, bool condition);
// Opens or reopens the predicate check conditional for the instruction.
// Should be called before processing a non-control-flow instruction.
void UpdateInstructionPredication(bool predicated, bool condition);
// Closes the instruction-level predicate conditional if it's open, useful if
// a control flow instruction needs to do some code which needs to respect the
// current exec conditional, but can't itself be predicated.
void CloseInstructionPredication();
// Closes conditionals opened by exec and instructions within them (but not by
// labels) and updates the state accordingly.
void CloseExecConditionals();
spv::Id GetStorageAddressingIndex(
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index,
bool is_float_constant = false);
// Loads unswizzled operand without sign modifiers as float4.
spv::Id LoadOperandStorage(const InstructionOperand& operand);
spv::Id ApplyOperandModifiers(spv::Id operand_value,
const InstructionOperand& original_operand,
bool invert_negate = false,
bool force_absolute = false);
// Returns the requested components, with the operand's swizzle applied, in a
// condensed form, but without negation / absolute value modifiers. The
// storage is float4, no matter what the component count of original_operand
// is (the storage will be either r# or c#, but the instruction may be
// scalar).
spv::Id GetUnmodifiedOperandComponents(
spv::Id operand_storage, const InstructionOperand& original_operand,
uint32_t components);
spv::Id GetOperandComponents(spv::Id operand_storage,
const InstructionOperand& original_operand,
uint32_t components, bool invert_negate = false,
bool force_absolute = false) {
return ApplyOperandModifiers(
GetUnmodifiedOperandComponents(operand_storage, original_operand,
components),
original_operand, invert_negate, force_absolute);
}
// If components are identical, the same Id will be written to both outputs.
void GetOperandScalarXY(spv::Id operand_storage,
const InstructionOperand& original_operand,
spv::Id& a_out, spv::Id& b_out,
bool invert_negate = false,
bool force_absolute = false);
// Gets the absolute value of the loaded operand if it's not absolute already.
spv::Id GetAbsoluteOperand(spv::Id operand_storage,
const InstructionOperand& original_operand);
// The type of the value must be a float vector consisting of
// xe::bit_count(result.GetUsedResultComponents()) elements, or (to replicate
// a scalar into all used components) float, or the value can be spv::NoResult
// if there's no result to store (like constants only).
void StoreResult(const InstructionResult& result, spv::Id value);
// For Shader Model 3 multiplication (+-0 or denormal * anything = +0),
// replaces the value with +0 if the minimum of the two operands is 0. This
// must be called with absolute values of operands - use GetAbsoluteOperand!
spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs,
spv::Id operand_1_abs);
// Return type is a xe::bit_count(result.GetUsedResultComponents())-component
// float vector or a single float, depending on whether it's a reduction
// instruction (check getTypeId of the result), or returns spv::NoResult if
// nothing to store.
spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr,
bool& predicate_written);
// Returns a float value to write to the previous scalar register and to the
// destination. If the return value is ps itself (in the retain_prev case),
// returns spv::NoResult (handled as a special case, so if it's retain_prev,
// but don't need to write to anywhere, no OpLoad(ps) will be done).
spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr,
bool& predicate_written);
// Perform endian swap of a uint scalar or vector.
spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian);
spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int);
// The source may be a floating-point scalar or a vector.
spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated);
spv::Id LinearToPWLGamma(spv::Id linear, bool linear_pre_saturated);
size_t FindOrAddTextureBinding(uint32_t fetch_constant,
xenos::FetchOpDimension dimension,
bool is_signed);
size_t FindOrAddSamplerBinding(uint32_t fetch_constant,
xenos::TextureFilter mag_filter,
xenos::TextureFilter min_filter,
xenos::TextureFilter mip_filter,
xenos::AnisoFilter aniso_filter);
// `texture_parameters` need to be set up except for `sampler`, which will be
// set internally, optionally doing linear interpolation between the an
// existing value and the new one (the result location may be the same as for
// the first lerp endpoint, but not across signedness).
void SampleTexture(spv::Builder::TextureParameters& texture_parameters,
spv::ImageOperandsMask image_operands_mask,
spv::Id image_unsigned, spv::Id image_signed,
spv::Id sampler, spv::Id is_all_signed,
spv::Id is_any_signed, spv::Id& result_unsigned_out,
spv::Id& result_signed_out,
spv::Id lerp_factor = spv::NoResult,
spv::Id lerp_first_unsigned = spv::NoResult,
spv::Id lerp_first_signed = spv::NoResult);
// `texture_parameters` need to be set up except for `sampler`, which will be
// set internally.
spv::Id QueryTextureLod(spv::Builder::TextureParameters& texture_parameters,
spv::Id image_unsigned, spv::Id image_signed,
spv::Id sampler, spv::Id is_all_signed);
Features features_;
std::unique_ptr<spv::Builder> builder_;
std::vector<spv::Id> id_vector_temp_;
// For helper functions like operand loading, so they don't conflict with
// id_vector_temp_ usage in bigger callbacks.
std::vector<spv::Id> id_vector_temp_util_;
std::vector<unsigned int> uint_vector_temp_;
std::vector<unsigned int> uint_vector_temp_util_;
spv::Id ext_inst_glsl_std_450_;
spv::Id type_void_;
union {
struct {
spv::Id type_bool_;
spv::Id type_bool2_;
spv::Id type_bool3_;
spv::Id type_bool4_;
};
// Index = component count - 1.
spv::Id type_bool_vectors_[4];
};
union {
struct {
spv::Id type_int_;
spv::Id type_int2_;
spv::Id type_int3_;
spv::Id type_int4_;
};
spv::Id type_int_vectors_[4];
};
union {
struct {
spv::Id type_uint_;
spv::Id type_uint2_;
spv::Id type_uint3_;
spv::Id type_uint4_;
};
spv::Id type_uint_vectors_[4];
};
union {
struct {
spv::Id type_float_;
spv::Id type_float2_;
spv::Id type_float3_;
spv::Id type_float4_;
};
spv::Id type_float_vectors_[4];
};
spv::Id const_int_0_;
spv::Id const_int4_0_;
spv::Id const_uint_0_;
spv::Id const_uint4_0_;
union {
struct {
spv::Id const_float_0_;
spv::Id const_float2_0_;
spv::Id const_float3_0_;
spv::Id const_float4_0_;
};
spv::Id const_float_vectors_0_[4];
};
union {
struct {
spv::Id const_float_1_;
spv::Id const_float2_1_;
spv::Id const_float3_1_;
spv::Id const_float4_1_;
};
spv::Id const_float_vectors_1_[4];
};
// vec2(0.0, 1.0), to arbitrarily VectorShuffle non-constant and constant
// components.
spv::Id const_float2_0_1_;
enum SystemConstantIndex : unsigned int {
kSystemConstantFlags,
kSystemConstantVertexIndexLoadAddress,
kSystemConstantVertexIndexEndian,
kSystemConstantVertexBaseIndex,
kSystemConstantNdcScale,
kSystemConstantNdcOffset,
kSystemConstantTextureSwizzledSigns,
kSystemConstantTextureSwizzles,
kSystemConstantAlphaTestReference,
kSystemConstantColorExpBias,
};
spv::Id uniform_system_constants_;
spv::Id uniform_float_constants_;
spv::Id uniform_bool_loop_constants_;
spv::Id uniform_fetch_constants_;
spv::Id buffers_shared_memory_;
// Not using combined images and samplers because
// maxPerStageDescriptorSamplers is often lower than
// maxPerStageDescriptorSampledImages, and for every fetch constant, there
// are, for regular fetches, two bindings (unsigned and signed).
std::vector<TextureBinding> texture_bindings_;
std::vector<SamplerBinding> sampler_bindings_;
// VS as VS only - int.
spv::Id input_vertex_index_;
// VS as TES only - int.
spv::Id input_primitive_id_;
// PS, only when needed - float4.
spv::Id input_fragment_coord_;
// PS, only when needed - bool.
spv::Id input_front_facing_;
// VS output or PS input, only the ones that are needed (spv::NoResult for the
// unneeded interpolators), indexed by the guest interpolator index - float4.
// The Qualcomm Adreno driver has strict requirements for stage linkage - as
// Xenia uses separate variables, not an array (so the interpolation
// qualifiers can be applied to each element separately), the interpolators
// must also be separate variables in the other stage, including the geometry
// shader (not just an array assuming that consecutive locations will be
// linked as consecutive array elements, on Qualcomm, they won't be linked at
// all).
std::array<spv::Id, xenos::kMaxInterpolators> input_output_interpolators_;
enum OutputPerVertexMember : unsigned int {
kOutputPerVertexMemberPosition,
kOutputPerVertexMemberCount,
};
spv::Id output_per_vertex_;
std::array<spv::Id, xenos::kMaxColorRenderTargets> output_fragment_data_;
std::vector<spv::Id> main_interface_;
spv::Function* function_main_;
spv::Id main_system_constant_flags_;
// bool.
spv::Id var_main_predicate_;
// uint4.
spv::Id var_main_loop_count_;
// int4.
spv::Id var_main_loop_address_;
// int.
spv::Id var_main_address_register_;
// float.
spv::Id var_main_previous_scalar_;
// `base + index * stride` in dwords from the last vfetch_full as it may be
// needed by vfetch_mini - int.
spv::Id var_main_vfetch_address_;
// float.
spv::Id var_main_tfetch_lod_;
// float3.
spv::Id var_main_tfetch_gradients_h_;
spv::Id var_main_tfetch_gradients_v_;
// float4[register_count()].
spv::Id var_main_registers_;
// VS only - float3 (special exports).
spv::Id var_main_point_size_edge_flag_kill_vertex_;
spv::Block* main_loop_header_;
spv::Block* main_loop_continue_;
spv::Block* main_loop_merge_;
spv::Id main_loop_pc_next_;
spv::Block* main_switch_header_;
std::unique_ptr<spv::Instruction> main_switch_op_;
spv::Block* main_switch_merge_;
std::vector<spv::Id> main_switch_next_pc_phi_operands_;
// If the exec bool constant / predicate conditional is open, block after it
// (not added to the function yet).
spv::Block* cf_exec_conditional_merge_;
// If the instruction-level predicate conditional is open, block after it (not
// added to the function yet).
spv::Block* cf_instruction_predicate_merge_;
// When cf_exec_conditional_merge_ is not null:
// If the current exec conditional is based on a bool constant: the number of
// the bool constant.
// If it's based on the predicate value: kCfExecBoolConstantPredicate.
uint32_t cf_exec_bool_constant_or_predicate_;
static constexpr uint32_t kCfExecBoolConstantPredicate = UINT32_MAX;
// When cf_exec_conditional_merge_ is not null, the expected bool constant or
// predicate value for the current exec conditional.
bool cf_exec_condition_;
// When cf_instruction_predicate_merge_ is not null, the expected predicate
// value for the current or the last instruction.
bool cf_instruction_predicate_condition_;
// Whether there was a `setp` in the current exec before the current
// instruction, thus instruction-level predicate value can be different than
// the exec-level predicate value, and can't merge two execs with the same
// predicate condition anymore.
bool cf_exec_predicate_written_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_