diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h index 34eb6e8b7..5d5cb9f27 100644 --- a/src/xenia/gpu/dxbc.h +++ b/src/xenia/gpu/dxbc.h @@ -166,8 +166,11 @@ struct alignas(uint32_t) BlobHeader { // In order of appearance in a container. kResourceDefinition = MakeFourCC('R', 'D', 'E', 'F'), kInputSignature = MakeFourCC('I', 'S', 'G', 'N'), + kInputSignature11_1 = MakeFourCC('I', 'S', 'G', '1'), kPatchConstantSignature = MakeFourCC('P', 'C', 'S', 'G'), kOutputSignature = MakeFourCC('O', 'S', 'G', 'N'), + kOutputSignatureForGS = MakeFourCC('O', 'S', 'G', '5'), + kOutputSignature11_1 = MakeFourCC('O', 'S', 'G', '1'), kShaderEx = MakeFourCC('S', 'H', 'E', 'X'), kShaderFeatureInfo = MakeFourCC('S', 'F', 'I', '0'), kStatistics = MakeFourCC('S', 'T', 'A', 'T'), @@ -320,6 +323,7 @@ enum RdefInputFlags : uint32_t { enum class RdefShaderModel : uint32_t { kPixelShader5_1 = 0xFFFF0501u, kVertexShader5_1 = 0xFFFE0501u, + kGeometryShader5_1 = 0x47530501u, kDomainShader5_1 = 0x44530501u, kComputeShader5_1 = 0x43530501u, }; @@ -467,8 +471,7 @@ enum class SignatureRegisterComponentType : uint32_t { }; // D3D_MIN_PRECISION -// uint8_t as it's used as one byte in SignatureParameter. -enum class MinPrecision : uint8_t { +enum class MinPrecision : uint32_t { kDefault, kFloat16, kFloat2_8, @@ -478,7 +481,7 @@ enum class MinPrecision : uint8_t { kAny10, }; -// D3D11_INTERNALSHADER_PARAMETER_11_1 +// D3D10_INTERNALSHADER_PARAMETER struct alignas(uint32_t) SignatureParameter { uint32_t semantic_name_ptr; uint32_t semantic_index; @@ -496,10 +499,45 @@ struct alignas(uint32_t) SignatureParameter { // For an input signature. uint8_t always_reads_mask; }; - MinPrecision min_precision; }; static_assert_size(SignatureParameter, sizeof(uint32_t) * 6); +// D3D11_INTERNALSHADER_PARAMETER_FOR_GS +// Extends SignatureParameter, see it for more information. +struct alignas(uint32_t) SignatureParameterForGS { + // Stream index (parameters must appear in non-decreasing stream order). + uint32_t stream; + uint32_t semantic_name_ptr; + uint32_t semantic_index; + Name system_value; + SignatureRegisterComponentType component_type; + uint32_t register_index; + uint8_t mask; + union { + uint8_t never_writes_mask; + uint8_t always_reads_mask; + }; +}; +static_assert_size(SignatureParameterForGS, sizeof(uint32_t) * 7); + +// D3D11_INTERNALSHADER_PARAMETER_11_1 +// Extends SignatureParameterForGS, see it for more information. +struct alignas(uint32_t) SignatureParameter11_1 { + uint32_t stream; + uint32_t semantic_name_ptr; + uint32_t semantic_index; + Name system_value; + SignatureRegisterComponentType component_type; + uint32_t register_index; + uint8_t mask; + union { + uint8_t never_writes_mask; + uint8_t always_reads_mask; + }; + MinPrecision min_precision; +}; +static_assert_size(SignatureParameter11_1, sizeof(uint32_t) * 8); + // D3D10_INTERNALSHADER_SIGNATURE struct alignas(uint32_t) Signature { uint32_t parameter_count; @@ -543,6 +581,62 @@ enum class TessellatorDomain : uint32_t { kQuad, }; +// D3D10_SB_PRIMITIVE_TOPOLOGY +enum class PrimitiveTopology : uint32_t { + kUndefined = 0, + kPointList = 1, + kLineList = 2, + kLineStrip = 3, + kTriangleList = 4, + kTriangleStrip = 5, + kLineListWithAdjacency = 10, + kLineStripWithAdjacency = 11, + kTriangleListWithAdjacency = 12, + kTriangleStripWithAdjacency = 13, +}; + +// D3D10_SB_PRIMITIVE +enum class Primitive : uint32_t { + kUndefined = 0, + kPoint = 1, + kLine = 2, + kTriangle = 3, + kLineWithAdjacency = 6, + kTriangleWithAdjacency = 7, + k1ControlPointPatch = 8, + k2ControlPointPatch = 9, + k3ControlPointPatch = 10, + k4ControlPointPatch = 11, + k5ControlPointPatch = 12, + k6ControlPointPatch = 13, + k7ControlPointPatch = 14, + k8ControlPointPatch = 15, + k9ControlPointPatch = 16, + k10ControlPointPatch = 17, + k11ControlPointPatch = 18, + k12ControlPointPatch = 19, + k13ControlPointPatch = 20, + k14ControlPointPatch = 21, + k15ControlPointPatch = 22, + k16ControlPointPatch = 23, + k17ControlPointPatch = 24, + k18ControlPointPatch = 25, + k19ControlPointPatch = 26, + k20ControlPointPatch = 27, + k21ControlPointPatch = 28, + k22ControlPointPatch = 29, + k23ControlPointPatch = 30, + k24ControlPointPatch = 31, + k25ControlPointPatch = 32, + k26ControlPointPatch = 33, + k27ControlPointPatch = 34, + k28ControlPointPatch = 35, + k29ControlPointPatch = 36, + k30ControlPointPatch = 37, + k31ControlPointPatch = 38, + k32ControlPointPatch = 39, +}; + // The STAT blob (based on Wine d3dcompiler_parse_stat). struct alignas(uint32_t) Statistics { // Not increased by declarations and labels. @@ -576,11 +670,11 @@ struct alignas(uint32_t) Statistics { uint32_t movc_instruction_count; // +50 uint32_t conversion_instruction_count; // +54 // Unknown in Wine. - uint32_t unknown_22; // +58 - uint32_t input_primitive; // +5C - uint32_t gs_output_topology; // +60 - uint32_t gs_max_output_vertex_count; // +64 - uint32_t unknown_26; // +68 + uint32_t unknown_22; // +58 + Primitive input_primitive; // +5C + PrimitiveTopology gs_output_topology; // +60 + uint32_t gs_max_output_vertex_count; // +64 + uint32_t unknown_26; // +68 // Unknown in Wine, but confirmed by testing. uint32_t lod_instructions; // +6C uint32_t unknown_28; // +70 @@ -644,6 +738,7 @@ enum class OperandType : uint32_t { kOutputDepth = 12, kNull = 13, kOutputCoverageMask = 15, + kStream = 16, kInputControlPoint = 25, kInputDomainPoint = 28, kUnorderedAccessView = 30, @@ -669,6 +764,7 @@ constexpr OperandDimension GetOperandDimension(OperandType type, return in_dcl ? OperandDimension::kVector : OperandDimension::kNoData; case OperandType::kLabel: case OperandType::kNull: + case OperandType::kStream: return OperandDimension::kNoData; case OperandType::kInputPrimitiveID: case OperandType::kOutputDepth: @@ -856,6 +952,9 @@ struct Dest : OperandAddress { static Dest ODepth() { return Dest(OperandType::kOutputDepth, 0b0001); } static Dest Null() { return Dest(OperandType::kNull, 0b0000); } static Dest OMask() { return Dest(OperandType::kOutputCoverageMask, 0b0001); } + static Dest M(uint32_t index) { + return Dest(OperandType::kStream, 0b0000, index); + } static Dest VICP(uint32_t control_point_count, uint32_t element, uint32_t read_mask = 0b1111) { return Dest(OperandType::kInputControlPoint, read_mask, control_point_count, @@ -1366,8 +1465,12 @@ enum class Opcode : uint32_t { kDclResource = 88, kDclConstantBuffer = 89, kDclSampler = 90, + kDclOutputTopology = 92, + kDclInputPrimitive = 93, + kDclMaxOutputVertexCount = 94, kDclInput = 95, kDclInputSGV = 96, + kDclInputSIV = 97, kDclInputPS = 98, kDclInputPSSGV = 99, kDclInputPSSIV = 100, @@ -1377,6 +1480,9 @@ enum class Opcode : uint32_t { kDclIndexableTemp = 105, kDclGlobalFlags = 106, kLOD = 108, + kEmitStream = 117, + kCutStream = 118, + kEmitThenCutStream = 119, kDerivRTXCoarse = 122, kDerivRTXFine = 123, kDerivRTYCoarse = 124, @@ -1390,6 +1496,7 @@ enum class Opcode : uint32_t { kIBFE = 139, kBFI = 140, kBFRev = 141, + kDclStream = 143, kDclInputControlPointCount = 147, kDclTessDomain = 149, kDclThreadGroup = 155, @@ -1942,6 +2049,24 @@ class Assembler { operand.Write(code_, false, 0b1111, false, true); code_.push_back(space); } + void OpDclOutputTopology(PrimitiveTopology output_topology) { + code_.push_back(OpcodeToken(Opcode::kDclOutputTopology, 0) | + (uint32_t(output_topology) << 11)); + stat_.gs_output_topology = output_topology; + } + void OpDclInputPrimitive(Primitive input_primitive) { + code_.push_back(OpcodeToken(Opcode::kDclInputPrimitive, 0) | + (uint32_t(input_primitive) << 11)); + stat_.input_primitive = input_primitive; + } + // Returns the index of the count written in the code_ vector. + size_t OpDclMaxOutputVertexCount(uint32_t count) { + code_.reserve(code_.size() + 2); + code_.push_back(OpcodeToken(Opcode::kDclMaxOutputVertexCount, 1)); + code_.push_back(count); + stat_.gs_max_output_vertex_count = count; + return code_.size() - 1; + } void OpDclInput(const Dest& operand) { uint32_t operands_length = operand.GetLength(); code_.reserve(code_.size() + 1 + operands_length); @@ -1957,6 +2082,14 @@ class Assembler { code_.push_back(uint32_t(name)); ++stat_.dcl_count; } + void OpDclInputSIV(const Dest& operand, Name name) { + uint32_t operands_length = operand.GetLength(); + code_.reserve(code_.size() + 2 + operands_length); + code_.push_back(OpcodeToken(Opcode::kDclInputSIV, 1 + operands_length)); + operand.Write(code_, true); + code_.push_back(uint32_t(name)); + ++stat_.dcl_count; + } void OpDclInputPS(InterpolationMode interpolation_mode, const Dest& operand) { uint32_t operands_length = operand.GetLength(); code_.reserve(code_.size() + 1 + operands_length); @@ -2039,6 +2172,31 @@ class Assembler { ++stat_.instruction_count; ++stat_.lod_instructions; } + void OpEmitStream(const Dest& stream) { + uint32_t operands_length = stream.GetLength(); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kEmitStream, operands_length)); + stream.Write(code_); + ++stat_.instruction_count; + ++stat_.emit_instruction_count; + } + void OpCutStream(const Dest& stream) { + uint32_t operands_length = stream.GetLength(); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kCutStream, operands_length)); + stream.Write(code_); + ++stat_.instruction_count; + ++stat_.cut_instruction_count; + } + void OpEmitThenCutStream(const Dest& stream) { + uint32_t operands_length = stream.GetLength(); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kEmitThenCutStream, operands_length)); + stream.Write(code_); + ++stat_.instruction_count; + ++stat_.emit_instruction_count; + ++stat_.cut_instruction_count; + } void OpDerivRTXCoarse(const Dest& dest, const Src& src, bool saturate = false) { EmitAluOp(Opcode::kDerivRTXCoarse, 0b0, dest, src, saturate); @@ -2096,6 +2254,12 @@ class Assembler { EmitAluOp(Opcode::kBFRev, 0b1, dest, src); ++stat_.uint_instruction_count; } + void OpDclStream(const Dest& stream) { + uint32_t operands_length = stream.GetLength(); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kDclStream, operands_length)); + stream.Write(code_, true); + } void OpDclInputControlPointCount(uint32_t count) { code_.push_back(OpcodeToken(Opcode::kDclInputControlPointCount, 0) | (count << 11));