[GPU] Fix scalar c[#+aL], shader docs/refactoring

This commit is contained in:
Triang3l 2022-04-13 23:08:19 +03:00
parent 1f324bebcd
commit fea430f1f9
6 changed files with 395 additions and 201 deletions

View file

@ -1331,12 +1331,12 @@ dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand,
dxbc::Index index(operand.storage_index); dxbc::Index index(operand.storage_index);
switch (operand.storage_addressing_mode) { switch (operand.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: case InstructionStorageAddressingMode::kAbsolute:
break; break;
case InstructionStorageAddressingMode::kAddressAbsolute: case InstructionStorageAddressingMode::kAddressRegisterRelative:
index = dxbc::Index(system_temp_ps_pc_p0_a0_, 3, operand.storage_index); index = dxbc::Index(system_temp_ps_pc_p0_a0_, 3, operand.storage_index);
break; break;
case InstructionStorageAddressingMode::kAddressRelative: case InstructionStorageAddressingMode::kLoopRelative:
index = dxbc::Index(system_temp_aL_, 0, operand.storage_index); index = dxbc::Index(system_temp_aL_, 0, operand.storage_index);
break; break;
} }
@ -1365,7 +1365,7 @@ dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand,
src = dxbc::Src::R(temp); src = dxbc::Src::R(temp);
} else { } else {
assert_true(operand.storage_addressing_mode == assert_true(operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic); InstructionStorageAddressingMode::kAbsolute);
src = dxbc::Src::R(index.index_); src = dxbc::Src::R(index.index_);
} }
} break; } break;
@ -1376,7 +1376,7 @@ dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand,
const Shader::ConstantRegisterMap& constant_register_map = const Shader::ConstantRegisterMap& constant_register_map =
current_shader().constant_register_map(); current_shader().constant_register_map();
if (operand.storage_addressing_mode == if (operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) { InstructionStorageAddressingMode::kAbsolute) {
uint32_t float_constant_index = uint32_t float_constant_index =
constant_register_map.GetPackedFloatConstantIndex( constant_register_map.GetPackedFloatConstantIndex(
operand.storage_index); operand.storage_index);
@ -1429,13 +1429,13 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
if (current_shader().uses_register_dynamic_addressing()) { if (current_shader().uses_register_dynamic_addressing()) {
dxbc::Index register_index(result.storage_index); dxbc::Index register_index(result.storage_index);
switch (result.storage_addressing_mode) { switch (result.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: case InstructionStorageAddressingMode::kAbsolute:
break; break;
case InstructionStorageAddressingMode::kAddressAbsolute: case InstructionStorageAddressingMode::kAddressRegisterRelative:
register_index = register_index =
dxbc::Index(system_temp_ps_pc_p0_a0_, 3, result.storage_index); dxbc::Index(system_temp_ps_pc_p0_a0_, 3, result.storage_index);
break; break;
case InstructionStorageAddressingMode::kAddressRelative: case InstructionStorageAddressingMode::kLoopRelative:
register_index = register_index =
dxbc::Index(system_temp_aL_, 0, result.storage_index); dxbc::Index(system_temp_aL_, 0, result.storage_index);
break; break;
@ -1443,7 +1443,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
dest = dxbc::Dest::X(0, register_index); dest = dxbc::Dest::X(0, register_index);
} else { } else {
assert_true(result.storage_addressing_mode == assert_true(result.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic); InstructionStorageAddressingMode::kAbsolute);
dest = dxbc::Dest::R(result.storage_index); dest = dxbc::Dest::R(result.storage_index);
} }
break; break;

View file

@ -44,7 +44,7 @@ namespace gpu {
enum class InstructionStorageTarget { enum class InstructionStorageTarget {
// Result is not stored. // Result is not stored.
kNone, kNone,
// Result is stored to a temporary register indexed by storage_index [0-31]. // Result is stored to a temporary register indexed by storage_index [0-63].
kRegister, kRegister,
// Result is stored into a vertex shader interpolator export [0-15]. // Result is stored into a vertex shader interpolator export [0-15].
kInterpolator, kInterpolator,
@ -85,11 +85,13 @@ constexpr uint32_t GetInstructionStorageTargetUsedComponentCount(
enum class InstructionStorageAddressingMode { enum class InstructionStorageAddressingMode {
// The storage index is not dynamically addressed. // The storage index is not dynamically addressed.
kStatic, kAbsolute,
// The storage index is addressed by a0. // The storage index is addressed by a0.
kAddressAbsolute, // Float constants only.
kAddressRegisterRelative,
// The storage index is addressed by aL. // The storage index is addressed by aL.
kAddressRelative, // Float constants and temporary registers only.
kLoopRelative,
}; };
// Describes the source value of a particular component. // Describes the source value of a particular component.
@ -111,6 +113,12 @@ enum class SwizzleSource {
constexpr SwizzleSource GetSwizzleFromComponentIndex(uint32_t i) { constexpr SwizzleSource GetSwizzleFromComponentIndex(uint32_t i) {
return static_cast<SwizzleSource>(i); return static_cast<SwizzleSource>(i);
} }
constexpr SwizzleSource GetSwizzledAluSourceComponent(
uint32_t swizzle, uint32_t component_index) {
return GetSwizzleFromComponentIndex(
ucode::AluInstruction::GetSwizzledComponentIndex(swizzle,
component_index));
}
inline char GetCharForComponentIndex(uint32_t i) { inline char GetCharForComponentIndex(uint32_t i) {
const static char kChars[] = {'x', 'y', 'z', 'w'}; const static char kChars[] = {'x', 'y', 'z', 'w'};
return kChars[i]; return kChars[i];
@ -127,7 +135,7 @@ struct InstructionResult {
uint32_t storage_index = 0; uint32_t storage_index = 0;
// How the storage index is dynamically addressed, if it is. // How the storage index is dynamically addressed, if it is.
InstructionStorageAddressingMode storage_addressing_mode = InstructionStorageAddressingMode storage_addressing_mode =
InstructionStorageAddressingMode::kStatic; InstructionStorageAddressingMode::kAbsolute;
// True to clamp the result value to [0-1]. // True to clamp the result value to [0-1].
bool is_clamped = false; bool is_clamped = false;
// Defines whether each output component is written, though this is from the // Defines whether each output component is written, though this is from the
@ -191,9 +199,9 @@ struct InstructionResult {
}; };
enum class InstructionStorageSource { enum class InstructionStorageSource {
// Source is stored in a temporary register indexed by storage_index [0-31]. // Source is stored in a temporary register indexed by storage_index [0-63].
kRegister, kRegister,
// Source is stored in a float constant indexed by storage_index [0-511]. // Source is stored in a float constant indexed by storage_index [0-255].
kConstantFloat, kConstantFloat,
// Source is stored in a vertex fetch constant indexed by storage_index // Source is stored in a vertex fetch constant indexed by storage_index
// [0-95]. // [0-95].
@ -210,7 +218,7 @@ struct InstructionOperand {
uint32_t storage_index = 0; uint32_t storage_index = 0;
// How the storage index is dynamically addressed, if it is. // How the storage index is dynamically addressed, if it is.
InstructionStorageAddressingMode storage_addressing_mode = InstructionStorageAddressingMode storage_addressing_mode =
InstructionStorageAddressingMode::kStatic; InstructionStorageAddressingMode::kAbsolute;
// True to negate the operand value. // True to negate the operand value.
bool is_negated = false; bool is_negated = false;
// True to take the absolute value of the source (before any negation). // True to take the absolute value of the source (before any negation).

View file

@ -247,22 +247,18 @@ void Shader::GatherExecInformation(
if (sequence & 0b10) { if (sequence & 0b10) {
ucode_disasm_buffer.Append(" serialize\n "); ucode_disasm_buffer.Append(" serialize\n ");
} }
const uint32_t* op_ptr = ucode_data_.data() + instr_offset * 3;
if (sequence & 0b01) { if (sequence & 0b01) {
auto fetch_opcode = FetchOpcode(ucode_data_[instr_offset * 3] & 0x1F); auto& op = *reinterpret_cast<const FetchInstruction*>(op_ptr);
if (fetch_opcode == FetchOpcode::kVertexFetch) { if (op.opcode() == FetchOpcode::kVertexFetch) {
auto& op = *reinterpret_cast<const VertexFetchInstruction*>( GatherVertexFetchInformation(op.vertex_fetch(), previous_vfetch_full,
ucode_data_.data() + instr_offset * 3);
GatherVertexFetchInformation(op, previous_vfetch_full,
ucode_disasm_buffer); ucode_disasm_buffer);
} else { } else {
auto& op = *reinterpret_cast<const TextureFetchInstruction*>( GatherTextureFetchInformation(
ucode_data_.data() + instr_offset * 3); op.texture_fetch(), unique_texture_bindings, ucode_disasm_buffer);
GatherTextureFetchInformation(op, unique_texture_bindings,
ucode_disasm_buffer);
} }
} else { } else {
auto& op = *reinterpret_cast<const AluInstruction*>(ucode_data_.data() + auto& op = *reinterpret_cast<const AluInstruction*>(op_ptr);
instr_offset * 3);
GatherAluInstructionInformation(op, memexport_alloc_current_count, GatherAluInstructionInformation(op, memexport_alloc_current_count,
memexport_eA_written, memexport_eA_written,
ucode_disasm_buffer); ucode_disasm_buffer);
@ -420,7 +416,7 @@ void Shader::GatherOperandInformation(const InstructionOperand& operand) {
switch (operand.storage_source) { switch (operand.storage_source) {
case InstructionStorageSource::kRegister: case InstructionStorageSource::kRegister:
if (operand.storage_addressing_mode == if (operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) { InstructionStorageAddressingMode::kAbsolute) {
register_static_address_bound_ = register_static_address_bound_ =
std::max(register_static_address_bound_, std::max(register_static_address_bound_,
operand.storage_index + uint32_t(1)); operand.storage_index + uint32_t(1));
@ -430,7 +426,7 @@ void Shader::GatherOperandInformation(const InstructionOperand& operand) {
break; break;
case InstructionStorageSource::kConstantFloat: case InstructionStorageSource::kConstantFloat:
if (operand.storage_addressing_mode == if (operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) { InstructionStorageAddressingMode::kAbsolute) {
// Store used float constants before translating so the // Store used float constants before translating so the
// translator can use tightly packed indices if not dynamically // translator can use tightly packed indices if not dynamically
// indexed. // indexed.
@ -457,7 +453,7 @@ void Shader::GatherFetchResultInformation(const InstructionResult& result) {
// operand. // operand.
assert_true(result.storage_target == InstructionStorageTarget::kRegister); assert_true(result.storage_target == InstructionStorageTarget::kRegister);
if (result.storage_addressing_mode == if (result.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) { InstructionStorageAddressingMode::kAbsolute) {
register_static_address_bound_ = std::max( register_static_address_bound_ = std::max(
register_static_address_bound_, result.storage_index + uint32_t(1)); register_static_address_bound_, result.storage_index + uint32_t(1));
} else { } else {
@ -473,7 +469,7 @@ void Shader::GatherAluResultInformation(
switch (result.storage_target) { switch (result.storage_target) {
case InstructionStorageTarget::kRegister: case InstructionStorageTarget::kRegister:
if (result.storage_addressing_mode == if (result.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) { InstructionStorageAddressingMode::kAbsolute) {
register_static_address_bound_ = std::max( register_static_address_bound_ = std::max(
register_static_address_bound_, result.storage_index + uint32_t(1)); register_static_address_bound_, result.storage_index + uint32_t(1));
} else { } else {
@ -789,28 +785,24 @@ void ShaderTranslator::TranslateExecInstructions(
for (uint32_t instr_offset = instr.instruction_address; for (uint32_t instr_offset = instr.instruction_address;
instr_offset < instr.instruction_address + instr.instruction_count; instr_offset < instr.instruction_address + instr.instruction_count;
++instr_offset, sequence >>= 2) { ++instr_offset, sequence >>= 2) {
const uint32_t* op_ptr = ucode_dwords + instr_offset * 3;
if (sequence & 0b01) { if (sequence & 0b01) {
auto fetch_opcode = auto& op = *reinterpret_cast<const FetchInstruction*>(op_ptr);
static_cast<FetchOpcode>(ucode_dwords[instr_offset * 3] & 0x1F); if (op.opcode() == FetchOpcode::kVertexFetch) {
if (fetch_opcode == FetchOpcode::kVertexFetch) { const VertexFetchInstruction& vfetch_op = op.vertex_fetch();
auto& op = *reinterpret_cast<const VertexFetchInstruction*>(
ucode_dwords + instr_offset * 3);
ParsedVertexFetchInstruction vfetch_instr; ParsedVertexFetchInstruction vfetch_instr;
if (ParseVertexFetchInstruction(op, previous_vfetch_full_, if (ParseVertexFetchInstruction(vfetch_op, previous_vfetch_full_,
vfetch_instr)) { vfetch_instr)) {
previous_vfetch_full_ = op; previous_vfetch_full_ = vfetch_op;
} }
ProcessVertexFetchInstruction(vfetch_instr); ProcessVertexFetchInstruction(vfetch_instr);
} else { } else {
auto& op = *reinterpret_cast<const TextureFetchInstruction*>(
ucode_dwords + instr_offset * 3);
ParsedTextureFetchInstruction tfetch_instr; ParsedTextureFetchInstruction tfetch_instr;
ParseTextureFetchInstruction(op, tfetch_instr); ParseTextureFetchInstruction(op.texture_fetch(), tfetch_instr);
ProcessTextureFetchInstruction(tfetch_instr); ProcessTextureFetchInstruction(tfetch_instr);
} }
} else { } else {
auto& op = *reinterpret_cast<const AluInstruction*>(ucode_dwords + auto& op = *reinterpret_cast<const AluInstruction*>(op_ptr);
instr_offset * 3);
ParsedAluInstruction alu_instr; ParsedAluInstruction alu_instr;
ParseAluInstruction(op, current_shader().type(), alu_instr); ParseAluInstruction(op, current_shader().type(), alu_instr);
ProcessAluInstruction(alu_instr); ProcessAluInstruction(alu_instr);
@ -826,25 +818,40 @@ static void ParseFetchInstructionResult(uint32_t dest, uint32_t swizzle,
result.storage_index = dest; result.storage_index = dest;
result.is_clamped = false; result.is_clamped = false;
result.storage_addressing_mode = result.storage_addressing_mode =
is_relative ? InstructionStorageAddressingMode::kAddressRelative is_relative ? InstructionStorageAddressingMode::kLoopRelative
: InstructionStorageAddressingMode::kStatic; : InstructionStorageAddressingMode::kAbsolute;
result.original_write_mask = 0b1111; result.original_write_mask = 0b1111;
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
switch (swizzle & 0x7) { SwizzleSource component_source = SwizzleSource::k0;
case 4: ucode::FetchDestinationSwizzle component_swizzle =
case 6: ucode::GetFetchDestinationComponentSwizzle(swizzle, i);
result.components[i] = SwizzleSource::k0; switch (component_swizzle) {
case ucode::FetchDestinationSwizzle::kX:
component_source = SwizzleSource::kX;
break; break;
case 5: case ucode::FetchDestinationSwizzle::kY:
result.components[i] = SwizzleSource::k1; component_source = SwizzleSource::kY;
break; break;
case 7: case ucode::FetchDestinationSwizzle::kZ:
result.original_write_mask &= ~uint32_t(1 << i); component_source = SwizzleSource::kZ;
break;
case ucode::FetchDestinationSwizzle::kW:
component_source = SwizzleSource::kW;
break;
case ucode::FetchDestinationSwizzle::k1:
component_source = SwizzleSource::k1;
break;
case ucode::FetchDestinationSwizzle::kKeep:
result.original_write_mask &= ~(UINT32_C(1) << i);
break; break;
default: default:
result.components[i] = GetSwizzleFromComponentIndex(swizzle & 0x3); // ucode::FetchDestinationSwizzle::k0 or the invalid swizzle 6.
// TODO(Triang3l): Find the correct handling of the invalid swizzle 6.
assert_true(component_swizzle == ucode::FetchDestinationSwizzle::k0);
component_source = SwizzleSource::k0;
break;
} }
swizzle >>= 3; result.components[i] = component_source;
} }
} }
@ -867,8 +874,8 @@ bool ParseVertexFetchInstruction(const VertexFetchInstruction& op,
src_op.storage_index = full_op.src(); src_op.storage_index = full_op.src();
src_op.storage_addressing_mode = src_op.storage_addressing_mode =
full_op.is_src_relative() full_op.is_src_relative()
? InstructionStorageAddressingMode::kAddressRelative ? InstructionStorageAddressingMode::kLoopRelative
: InstructionStorageAddressingMode::kStatic; : InstructionStorageAddressingMode::kAbsolute;
src_op.is_negated = false; src_op.is_negated = false;
src_op.is_absolute_value = false; src_op.is_absolute_value = false;
src_op.component_count = 1; src_op.component_count = 1;
@ -962,8 +969,8 @@ void ParseTextureFetchInstruction(const TextureFetchInstruction& op,
src_op.storage_source = InstructionStorageSource::kRegister; src_op.storage_source = InstructionStorageSource::kRegister;
src_op.storage_index = op.src(); src_op.storage_index = op.src();
src_op.storage_addressing_mode = src_op.storage_addressing_mode =
op.is_src_relative() ? InstructionStorageAddressingMode::kAddressRelative op.is_src_relative() ? InstructionStorageAddressingMode::kLoopRelative
: InstructionStorageAddressingMode::kStatic; : InstructionStorageAddressingMode::kAbsolute;
src_op.is_negated = false; src_op.is_negated = false;
src_op.is_absolute_value = false; src_op.is_absolute_value = false;
src_op.component_count = src_op.component_count =
@ -1144,91 +1151,51 @@ static const AluOpcodeInfo alu_scalar_opcode_infos[0x40] = {
static void ParseAluInstructionOperand(const AluInstruction& op, uint32_t i, static void ParseAluInstructionOperand(const AluInstruction& op, uint32_t i,
uint32_t swizzle_component_count, uint32_t swizzle_component_count,
InstructionOperand& out_op) { InstructionOperand& out_op) {
int const_slot = 0;
switch (i) {
case 2:
const_slot = op.src_is_temp(1) ? 0 : 1;
break;
case 3:
const_slot = op.src_is_temp(1) && op.src_is_temp(2) ? 0 : 1;
break;
}
out_op.is_negated = op.src_negate(i); out_op.is_negated = op.src_negate(i);
uint32_t reg = op.src_reg(i); uint32_t reg = op.src_reg(i);
if (op.src_is_temp(i)) { if (op.src_is_temp(i)) {
out_op.storage_source = InstructionStorageSource::kRegister; out_op.storage_source = InstructionStorageSource::kRegister;
out_op.storage_index = reg & 0x1F; out_op.storage_index = AluInstruction::src_temp_reg(reg);
out_op.is_absolute_value = (reg & 0x80) == 0x80; out_op.is_absolute_value = AluInstruction::is_src_temp_value_absolute(reg);
out_op.storage_addressing_mode = out_op.storage_addressing_mode =
(reg & 0x40) ? InstructionStorageAddressingMode::kAddressRelative AluInstruction::is_src_temp_relative(reg)
: InstructionStorageAddressingMode::kStatic; ? InstructionStorageAddressingMode::kLoopRelative
: InstructionStorageAddressingMode::kAbsolute;
} else { } else {
out_op.storage_source = InstructionStorageSource::kConstantFloat; out_op.storage_source = InstructionStorageSource::kConstantFloat;
out_op.storage_index = reg; out_op.storage_index = reg;
if ((const_slot == 0 && op.is_const_0_addressed()) || if (op.src_const_is_addressed(i)) {
(const_slot == 1 && op.is_const_1_addressed())) { if (op.is_const_address_register_relative()) {
if (op.is_address_relative()) {
out_op.storage_addressing_mode = out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressAbsolute; InstructionStorageAddressingMode::kAddressRegisterRelative;
} else { } else {
out_op.storage_addressing_mode = out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative; InstructionStorageAddressingMode::kLoopRelative;
} }
} else { } else {
out_op.storage_addressing_mode = out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kStatic; InstructionStorageAddressingMode::kAbsolute;
} }
out_op.is_absolute_value = op.abs_constants(); out_op.is_absolute_value = op.abs_constants();
} }
out_op.component_count = swizzle_component_count; out_op.component_count = swizzle_component_count;
uint32_t swizzle = op.src_swizzle(i); uint32_t swizzle = op.src_swizzle(i);
if (swizzle_component_count == 1) { if (swizzle_component_count == 1) {
uint32_t a = ((swizzle >> 6) + 3) & 0x3; // Scalar `a` (W).
out_op.components[0] = GetSwizzleFromComponentIndex(a); out_op.components[0] = GetSwizzledAluSourceComponent(swizzle, 3);
} else if (swizzle_component_count == 2) { } else if (swizzle_component_count == 2) {
uint32_t a = ((swizzle >> 6) + 3) & 0x3; // Scalar left-hand `a` (W) and right-hand `b` (X).
uint32_t b = ((swizzle >> 0) + 0) & 0x3; out_op.components[0] = GetSwizzledAluSourceComponent(swizzle, 3);
out_op.components[0] = GetSwizzleFromComponentIndex(a); out_op.components[1] = GetSwizzledAluSourceComponent(swizzle, 0);
out_op.components[1] = GetSwizzleFromComponentIndex(b);
} else if (swizzle_component_count == 3) { } else if (swizzle_component_count == 3) {
assert_always(); assert_always();
} else if (swizzle_component_count == 4) { } else if (swizzle_component_count == 4) {
for (uint32_t j = 0; j < swizzle_component_count; ++j, swizzle >>= 2) { for (uint32_t j = 0; j < swizzle_component_count; ++j) {
out_op.components[j] = GetSwizzleFromComponentIndex((swizzle + j) & 0x3); out_op.components[j] = GetSwizzledAluSourceComponent(swizzle, j);
} }
} }
} }
static void ParseAluInstructionOperandSpecial(
const AluInstruction& op, InstructionStorageSource storage_source,
uint32_t reg, bool negate, int const_slot, uint32_t component_index,
InstructionOperand& out_op) {
out_op.is_negated = negate;
out_op.is_absolute_value = op.abs_constants();
out_op.storage_source = storage_source;
if (storage_source == InstructionStorageSource::kRegister) {
out_op.storage_index = reg & 0x7F;
out_op.storage_addressing_mode = InstructionStorageAddressingMode::kStatic;
} else {
out_op.storage_index = reg;
if ((const_slot == 0 && op.is_const_0_addressed()) ||
(const_slot == 1 && op.is_const_1_addressed())) {
if (op.is_address_relative()) {
out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressAbsolute;
} else {
out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative;
}
} else {
out_op.storage_addressing_mode =
InstructionStorageAddressingMode::kStatic;
}
}
out_op.component_count = 1;
out_op.components[0] = GetSwizzleFromComponentIndex(component_index);
}
bool ParsedAluInstruction::IsVectorOpDefaultNop() const { bool ParsedAluInstruction::IsVectorOpDefaultNop() const {
if (vector_opcode != ucode::AluVectorOpcode::kMax || if (vector_opcode != ucode::AluVectorOpcode::kMax ||
vector_and_constant_result.original_write_mask || vector_and_constant_result.original_write_mask ||
@ -1237,14 +1204,14 @@ bool ParsedAluInstruction::IsVectorOpDefaultNop() const {
InstructionStorageSource::kRegister || InstructionStorageSource::kRegister ||
vector_operands[0].storage_index != 0 || vector_operands[0].storage_index != 0 ||
vector_operands[0].storage_addressing_mode != vector_operands[0].storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic || InstructionStorageAddressingMode::kAbsolute ||
vector_operands[0].is_negated || vector_operands[0].is_absolute_value || vector_operands[0].is_negated || vector_operands[0].is_absolute_value ||
!vector_operands[0].IsStandardSwizzle() || !vector_operands[0].IsStandardSwizzle() ||
vector_operands[1].storage_source != vector_operands[1].storage_source !=
InstructionStorageSource::kRegister || InstructionStorageSource::kRegister ||
vector_operands[1].storage_index != 0 || vector_operands[1].storage_index != 0 ||
vector_operands[1].storage_addressing_mode != vector_operands[1].storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic || InstructionStorageAddressingMode::kAbsolute ||
vector_operands[1].is_negated || vector_operands[1].is_absolute_value || vector_operands[1].is_negated || vector_operands[1].is_absolute_value ||
!vector_operands[1].IsStandardSwizzle()) { !vector_operands[1].IsStandardSwizzle()) {
return false; return false;
@ -1253,7 +1220,7 @@ bool ParsedAluInstruction::IsVectorOpDefaultNop() const {
InstructionStorageTarget::kRegister) { InstructionStorageTarget::kRegister) {
if (vector_and_constant_result.storage_index != 0 || if (vector_and_constant_result.storage_index != 0 ||
vector_and_constant_result.storage_addressing_mode != vector_and_constant_result.storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic) { InstructionStorageAddressingMode::kAbsolute) {
return false; return false;
} }
} else { } else {
@ -1330,14 +1297,14 @@ void ParseAluInstruction(const AluInstruction& op,
instr.vector_and_constant_result.storage_target = storage_target; instr.vector_and_constant_result.storage_target = storage_target;
instr.vector_and_constant_result.storage_addressing_mode = instr.vector_and_constant_result.storage_addressing_mode =
InstructionStorageAddressingMode::kStatic; InstructionStorageAddressingMode::kAbsolute;
if (is_export) { if (is_export) {
instr.vector_and_constant_result.storage_index = storage_index_export; instr.vector_and_constant_result.storage_index = storage_index_export;
} else { } else {
instr.vector_and_constant_result.storage_index = op.vector_dest(); instr.vector_and_constant_result.storage_index = op.vector_dest();
if (op.is_vector_dest_relative()) { if (op.is_vector_dest_relative()) {
instr.vector_and_constant_result.storage_addressing_mode = instr.vector_and_constant_result.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative; InstructionStorageAddressingMode::kLoopRelative;
} }
} }
instr.vector_and_constant_result.is_clamped = op.vector_clamp(); instr.vector_and_constant_result.is_clamped = op.vector_clamp();
@ -1372,14 +1339,14 @@ void ParseAluInstruction(const AluInstruction& op,
instr.scalar_result.storage_target = storage_target; instr.scalar_result.storage_target = storage_target;
instr.scalar_result.storage_addressing_mode = instr.scalar_result.storage_addressing_mode =
InstructionStorageAddressingMode::kStatic; InstructionStorageAddressingMode::kAbsolute;
if (is_export) { if (is_export) {
instr.scalar_result.storage_index = storage_index_export; instr.scalar_result.storage_index = storage_index_export;
} else { } else {
instr.scalar_result.storage_index = op.scalar_dest(); instr.scalar_result.storage_index = op.scalar_dest();
if (op.is_scalar_dest_relative()) { if (op.is_scalar_dest_relative()) {
instr.scalar_result.storage_addressing_mode = instr.scalar_result.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative; InstructionStorageAddressingMode::kLoopRelative;
} }
} }
instr.scalar_result.is_clamped = op.scalar_clamp(); instr.scalar_result.is_clamped = op.scalar_clamp();
@ -1395,20 +1362,42 @@ void ParseAluInstruction(const AluInstruction& op,
scalar_opcode_info.src_swizzle_component_count, scalar_opcode_info.src_swizzle_component_count,
instr.scalar_operands[0]); instr.scalar_operands[0]);
} else { } else {
// Constant and temporary register.
bool src3_negate = op.src_negate(3);
uint32_t src3_swizzle = op.src_swizzle(3); uint32_t src3_swizzle = op.src_swizzle(3);
uint32_t component_a = ((src3_swizzle >> 6) + 3) & 0x3;
uint32_t component_b = ((src3_swizzle >> 0) + 0) & 0x3;
uint32_t reg2 = (src3_swizzle & 0x3C) | (op.src_is_temp(3) << 1) |
(static_cast<int>(op.scalar_opcode()) & 1);
int const_slot = (op.src_is_temp(1) || op.src_is_temp(2)) ? 1 : 0;
ParseAluInstructionOperandSpecial( // Left-hand constant operand (`a` - W swizzle).
op, InstructionStorageSource::kConstantFloat, op.src_reg(3), InstructionOperand& const_op = instr.scalar_operands[0];
op.src_negate(3), 0, component_a, instr.scalar_operands[0]); const_op.is_negated = src3_negate;
const_op.is_absolute_value = op.abs_constants();
const_op.storage_source = InstructionStorageSource::kConstantFloat;
const_op.storage_index = op.src_reg(3);
if (op.src_const_is_addressed(3)) {
if (op.is_const_address_register_relative()) {
const_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRegisterRelative;
} else {
const_op.storage_addressing_mode =
InstructionStorageAddressingMode::kLoopRelative;
}
} else {
const_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAbsolute;
}
const_op.component_count = 1;
const_op.components[0] = GetSwizzledAluSourceComponent(src3_swizzle, 3);
ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister, // Right-hand temporary register operand (`b` - X swizzle).
reg2, op.src_negate(3), const_slot, InstructionOperand& temp_op = instr.scalar_operands[1];
component_b, instr.scalar_operands[1]); temp_op.is_negated = src3_negate;
temp_op.is_absolute_value = op.abs_constants();
temp_op.storage_source = InstructionStorageSource::kRegister;
temp_op.storage_index = op.scalar_const_op_src_temp_reg();
temp_op.storage_addressing_mode =
InstructionStorageAddressingMode::kAbsolute;
temp_op.component_count = 1;
temp_op.components[0] = GetSwizzledAluSourceComponent(src3_swizzle, 0);
} }
} }
} }
@ -1421,7 +1410,7 @@ bool ParsedAluInstruction::IsScalarOpDefaultNop() const {
if (scalar_result.storage_target == InstructionStorageTarget::kRegister) { if (scalar_result.storage_target == InstructionStorageTarget::kRegister) {
if (scalar_result.storage_index != 0 || if (scalar_result.storage_index != 0 ||
scalar_result.storage_addressing_mode != scalar_result.storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic) { InstructionStorageAddressingMode::kAbsolute) {
return false; return false;
} }
} }
@ -1446,7 +1435,7 @@ uint32_t ParsedAluInstruction::GetMemExportStreamConstant() const {
vector_operands[2].storage_source == vector_operands[2].storage_source ==
InstructionStorageSource::kConstantFloat && InstructionStorageSource::kConstantFloat &&
vector_operands[2].storage_addressing_mode == vector_operands[2].storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic && InstructionStorageAddressingMode::kAbsolute &&
vector_operands[2].IsStandardSwizzle() && vector_operands[2].IsStandardSwizzle() &&
!vector_operands[2].is_negated && !vector_operands[2].is_absolute_value) { !vector_operands[2].is_negated && !vector_operands[2].is_absolute_value) {
return vector_operands[2].storage_index; return vector_operands[2].storage_index;

View file

@ -57,13 +57,13 @@ void DisassembleResultOperand(const InstructionResult& result,
} }
if (uses_storage_index) { if (uses_storage_index) {
switch (result.storage_addressing_mode) { switch (result.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: case InstructionStorageAddressingMode::kAbsolute:
out->AppendFormat("{}", result.storage_index); out->AppendFormat("{}", result.storage_index);
break; break;
case InstructionStorageAddressingMode::kAddressAbsolute: case InstructionStorageAddressingMode::kAddressRegisterRelative:
out->AppendFormat("[{}+a0]", result.storage_index); out->AppendFormat("[{}+a0]", result.storage_index);
break; break;
case InstructionStorageAddressingMode::kAddressRelative: case InstructionStorageAddressingMode::kLoopRelative:
out->AppendFormat("[{}+aL]", result.storage_index); out->AppendFormat("[{}+aL]", result.storage_index);
break; break;
} }
@ -109,17 +109,17 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) {
out->Append("_abs"); out->Append("_abs");
} }
switch (op.storage_addressing_mode) { switch (op.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: case InstructionStorageAddressingMode::kAbsolute:
if (op.is_absolute_value) { if (op.is_absolute_value) {
out->AppendFormat("[{}]", op.storage_index); out->AppendFormat("[{}]", op.storage_index);
} else { } else {
out->AppendFormat("{}", op.storage_index); out->AppendFormat("{}", op.storage_index);
} }
break; break;
case InstructionStorageAddressingMode::kAddressAbsolute: case InstructionStorageAddressingMode::kAddressRegisterRelative:
out->AppendFormat("[{}+a0]", op.storage_index); out->AppendFormat("[{}+a0]", op.storage_index);
break; break;
case InstructionStorageAddressingMode::kAddressRelative: case InstructionStorageAddressingMode::kLoopRelative:
out->AppendFormat("[{}+aL]", op.storage_index); out->AppendFormat("[{}+aL]", op.storage_index);
break; break;
} }

View file

@ -3110,16 +3110,16 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
} }
switch (op.storage_addressing_mode) { switch (op.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: { case InstructionStorageAddressingMode::kAbsolute: {
storage_index = b.makeUintConstant(storage_base + op.storage_index); storage_index = b.makeUintConstant(storage_base + op.storage_index);
} break; } break;
case InstructionStorageAddressingMode::kAddressAbsolute: { case InstructionStorageAddressingMode::kAddressRegisterRelative: {
// storage_index + a0 // storage_index + a0
storage_index = storage_index =
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_), b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
b.makeUintConstant(storage_base + op.storage_index)); b.makeUintConstant(storage_base + op.storage_index));
} break; } break;
case InstructionStorageAddressingMode::kAddressRelative: { case InstructionStorageAddressingMode::kLoopRelative: {
// storage_index + aL.x // storage_index + aL.x
auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0); auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0);
storage_index = storage_index =
@ -3269,16 +3269,16 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
std::vector<Id> storage_offsets; // Offsets in nested arrays -> storage std::vector<Id> storage_offsets; // Offsets in nested arrays -> storage
switch (result.storage_addressing_mode) { switch (result.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: { case InstructionStorageAddressingMode::kAbsolute: {
storage_index = b.makeUintConstant(result.storage_index); storage_index = b.makeUintConstant(result.storage_index);
} break; } break;
case InstructionStorageAddressingMode::kAddressAbsolute: { case InstructionStorageAddressingMode::kAddressRegisterRelative: {
// storage_index + a0 // storage_index + a0
storage_index = storage_index =
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_), b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
b.makeUintConstant(result.storage_index)); b.makeUintConstant(result.storage_index));
} break; } break;
case InstructionStorageAddressingMode::kAddressRelative: { case InstructionStorageAddressingMode::kLoopRelative: {
// storage_index + aL.x // storage_index + aL.x
auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0); auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0);
storage_index = b.createBinOp(spv::Op::OpIAdd, uint_type_, idx, storage_index = b.createBinOp(spv::Op::OpIAdd, uint_type_, idx,

View file

@ -2,7 +2,7 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. * * Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
@ -16,11 +16,45 @@
#include "xenia/base/platform.h" #include "xenia/base/platform.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
// Closest AMD doc: // The XNA Game Studio 3.1 contains Graphics.ShaderCompiler.AssembleFromSource,
// which, for TargetPlatform.Xbox360, can validate and assemble Xbox 360 shader
// microcode from Xbox 360 and Direct3D 9 shader assembly, returning the binary,
// as well as validation warnings and errors and the disassembly via the warning
// output. It is the primary source of information about the binary encoding of
// the instructions, as well as valid usage of instruction parameters and
// sequences.
// https://www.microsoft.com/en-us/download/details.aspx?id=39
// (XNAGS31_setup.exe)
// Xenia provides a tool, tools/shader-playground, that invokes the assembler,
// displays the binary and the disassembly from the official assembler, and also
// shows the disassembly generated by Xenia, and passes it back to the assembler
// to validate Xenia's microcode parsing and disassembly by checking if
// reassembling the disassembly results in the same binary.
//
// The behavior and the parameters of some of the instructions were previously
// documented on MSDN in the XNA Game Studio programming guide:
// http://web.archive.org/web/20081211005537/http://msdn.microsoft.com/en-us/library/bb313877.aspx
//
// A great amount of documentation, such as the R400 sequencer specification and
// the official emulator code, was made available during the LG Electronics,
// Inc. v. ATI Technologies ULC "Multi-thread Graphics Processing System" patent
// dispute IPR2015-00325, with the motion to seal having been denied due to "a
// strong public policy interest in making all information filed in an inter
// partes review publicly available". Most of the documents attached, however,
// cover early versions - the development process - of the R400 architecture, so
// there are some differences from the final Xenos GPU (DOT2ADDv is defined
// differently, for example, and MUL/ADD/SUB_CONST are missing).
// https://portal.unifiedpatents.com/ptab/case/IPR2015-00325
//
// Also, the R600, while having a different 5-scalar, as opposed to vec4|scalar,
// parallelism model and instruction encodings and targeting Direct3D 10 rather
// that 9, inherits a lot of instructions and architectural concepts from the
// R400.
// https://www.x.org/docs/AMD/old/r600isa.pdf
// https://developer.amd.com/wordpress/media/2012/10/r600isa.pdf
// https://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf // https://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf
// Microcode format differs, but most fields/enums are the same.
// This code comes from the freedreno project: // Parts of this code also come from the freedreno project:
// https://github.com/freedreno/freedreno/blob/master/includes/instr-a2xx.h // https://github.com/freedreno/freedreno/blob/master/includes/instr-a2xx.h
/* /*
* Copyright (c) 2012 Rob Clark <robdclark@gmail.com> * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
@ -156,7 +190,8 @@ struct ControlFlowExecInstruction {
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Number of instructions being executed. // Number of instructions being executed.
uint32_t count() const { return count_; } uint32_t count() const { return count_; }
// Sequence bits, 2 per instruction, indicating whether ALU or fetch. // Sequence bits, 2 per instruction.
// [0] - ALU (0) or fetch (1), [1] - serialize.
uint32_t sequence() const { return serialize_; } uint32_t sequence() const { return serialize_; }
// Whether to reset the current predicate. // Whether to reset the current predicate.
bool clean() const { return clean_ == 1; } bool clean() const { return clean_ == 1; }
@ -189,7 +224,8 @@ struct ControlFlowCondExecInstruction {
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Number of instructions being executed. // Number of instructions being executed.
uint32_t count() const { return count_; } uint32_t count() const { return count_; }
// Sequence bits, 2 per instruction, indicating whether ALU or fetch. // Sequence bits, 2 per instruction.
// [0] - ALU (0) or fetch (1), [1] - serialize.
uint32_t sequence() const { return serialize_; } uint32_t sequence() const { return serialize_; }
// Constant index used as the conditional. // Constant index used as the conditional.
uint32_t bool_address() const { return bool_address_; } uint32_t bool_address() const { return bool_address_; }
@ -224,7 +260,8 @@ struct ControlFlowCondExecPredInstruction {
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Number of instructions being executed. // Number of instructions being executed.
uint32_t count() const { return count_; } uint32_t count() const { return count_; }
// Sequence bits, 2 per instruction, indicating whether ALU or fetch. // Sequence bits, 2 per instruction.
// [0] - ALU (0) or fetch (1), [1] - serialize.
uint32_t sequence() const { return serialize_; } uint32_t sequence() const { return serialize_; }
// Whether to reset the current predicate. // Whether to reset the current predicate.
bool clean() const { return clean_ == 1; } bool clean() const { return clean_ == 1; }
@ -591,6 +628,24 @@ enum class FetchOpcode : uint32_t {
kSetTextureGradientsVert = 26, kSetTextureGradientsVert = 26,
}; };
enum class FetchDestinationSwizzle {
// The component indices are absolute (not relative to the component itself,
// unlike in ALU operation sources).
kX = 0,
kY = 1,
kZ = 2,
kW = 3,
k0 = 4,
k1 = 5,
// Keep the current value of the destination register (don't write).
kKeep = 7,
};
constexpr FetchDestinationSwizzle GetFetchDestinationComponentSwizzle(
uint32_t swizzle, uint32_t component) {
return FetchDestinationSwizzle((swizzle >> (3 * component)) & 0b111);
}
struct alignas(uint32_t) VertexFetchInstruction { struct alignas(uint32_t) VertexFetchInstruction {
FetchOpcode opcode() const { return data_.opcode_value; } FetchOpcode opcode() const { return data_.opcode_value; }
@ -614,29 +669,6 @@ struct alignas(uint32_t) VertexFetchInstruction {
uint32_t src_swizzle() const { return data_.src_swiz; } uint32_t src_swizzle() const { return data_.src_swiz; }
bool is_src_relative() const { return data_.src_reg_am; } bool is_src_relative() const { return data_.src_reg_am; }
// Returns true if the fetch actually fetches data.
// This may be false if it's used only to populate constants.
bool fetches_any_data() const {
uint32_t dst_swiz = data_.dst_swiz;
bool fetches_any_data = false;
for (int i = 0; i < 4; i++) {
if ((dst_swiz & 0x7) == 4) {
// 0.0
} else if ((dst_swiz & 0x7) == 5) {
// 1.0
} else if ((dst_swiz & 0x7) == 6) {
// ?
} else if ((dst_swiz & 0x7) == 7) {
// Previous register value.
} else {
fetches_any_data = true;
break;
}
dst_swiz >>= 3;
}
return fetches_any_data;
}
uint32_t prefetch_count() const { return data_.prefetch_count; } uint32_t prefetch_count() const { return data_.prefetch_count; }
bool is_mini_fetch() const { return data_.is_mini_fetch == 1; } bool is_mini_fetch() const { return data_.is_mini_fetch == 1; }
@ -676,6 +708,7 @@ struct alignas(uint32_t) VertexFetchInstruction {
uint32_t const_index_sel : 2; uint32_t const_index_sel : 2;
// Prefetch count minus 1. // Prefetch count minus 1.
uint32_t prefetch_count : 3; uint32_t prefetch_count : 3;
// Absolute, one component.
uint32_t src_swiz : 2; uint32_t src_swiz : 2;
}; };
struct { struct {
@ -769,10 +802,11 @@ struct alignas(uint32_t) TextureFetchInstruction {
uint32_t fetch_valid_only : 1; uint32_t fetch_valid_only : 1;
uint32_t const_index : 5; uint32_t const_index : 5;
uint32_t tx_coord_denorm : 1; uint32_t tx_coord_denorm : 1;
uint32_t src_swiz : 6; // xyz // Absolute, three components.
uint32_t src_swiz : 6;
}; };
struct { struct {
uint32_t dst_swiz : 12; // xyzw uint32_t dst_swiz : 12;
xenos::TextureFilter mag_filter : 2; xenos::TextureFilter mag_filter : 2;
xenos::TextureFilter min_filter : 2; xenos::TextureFilter min_filter : 2;
xenos::TextureFilter mip_filter : 2; xenos::TextureFilter mip_filter : 2;
@ -801,21 +835,96 @@ struct alignas(uint32_t) TextureFetchInstruction {
}; };
static_assert_size(TextureFetchInstruction, sizeof(uint32_t) * 3); static_assert_size(TextureFetchInstruction, sizeof(uint32_t) * 3);
union alignas(uint32_t) FetchInstruction {
public:
FetchOpcode opcode() const { return data_.opcode_value; }
// Whether the jump is predicated (or conditional).
bool is_predicated() const { return data_.is_predicated; }
// Required condition value of the comparision (true or false).
bool predicate_condition() const { return data_.pred_condition == 1; }
uint32_t dest() const { return data_.dst_reg; }
uint32_t dest_swizzle() const { return data_.dst_swiz; }
bool is_dest_relative() const { return data_.dst_reg_am; }
uint32_t src() const { return data_.src_reg; }
bool is_src_relative() const { return data_.src_reg_am; }
// For FetchOpcode::kVertexFetch.
const VertexFetchInstruction& vertex_fetch() const { return vertex_fetch_; }
// For operations other than FetchOpcode::kVertexFetch.
const TextureFetchInstruction& texture_fetch() const {
return texture_fetch_;
}
private:
struct Data {
struct {
FetchOpcode opcode_value : 5;
uint32_t src_reg : 6;
uint32_t src_reg_am : 1;
uint32_t dst_reg : 6;
uint32_t dst_reg_am : 1;
// Specific to vertex or texture fetch.
uint32_t : 1;
// [0-31], points to one tf# or three vf# constants.
uint32_t const_index : 5;
// Specific to vertex or texture fetch.
uint32_t : 7;
};
struct {
uint32_t dst_swiz : 12;
// Specific to vertex or texture fetch.
uint32_t : 19;
uint32_t is_predicated : 1;
};
struct {
// Specific to vertex or texture fetch.
uint32_t : 31;
uint32_t pred_condition : 1;
};
};
Data data_;
VertexFetchInstruction vertex_fetch_;
TextureFetchInstruction texture_fetch_;
};
static_assert_size(FetchInstruction, sizeof(uint32_t) * 3);
// What follows is largely a mash up of the microcode assembly naming and the // What follows is largely a mash up of the microcode assembly naming and the
// R600 docs that have a near 1:1 with the instructions available in the xenos // R600 docs that have a near 1:1 with the instructions available in the Xenos
// GPU, and Adreno 2xx instruction names found in Freedreno. Some of the // GPU, and Adreno 2xx instruction names found in Freedreno. Some of the
// behavior has been experimentally verified. Some has been guessed. // behavior has been experimentally verified. Some has been guessed. Some
// Docs: https://www.x.org/docs/AMD/old/r600isa.pdf // instructions are implemented in the Exhibit 2092 - sq_alu of IPR2015-00325,
// however, the code provided there is early and incomplete.
// //
// Conventions: // Conventions:
// - All temporary registers are vec4s. // - All temporary registers are vec4s.
// - Scalar ops swizzle out a single component of their source registers denoted // - Most scalar ALU operations work with one or two components of the source
// by 'a' or 'b'. src0.a means 'the first component specified for src0' and // register passed as the third operand of the whole co-issued ALU operation,
// src0.ab means 'two components specified for src0, in order'. // denoted by `a` (the left-hand operand) and `b` (the right-hand operand).
// - Scalar ops write the result to the entire destination register. // `a` is the [(3 + src3_swizzle[6:7]) & 3] component (W - alpha).
// - pv and ps are the previous results of a vector or scalar ALU operation. // `b` is the [(0 + src3_swizzle[0:1]) & 3] component (X - red).
// Both are valid only within the current ALU clause. They are not modified // - mulsc, addsc, subsc scalar ALU operations accept two operands - a float
// when the instruction that would write them fails its predication check. // constant with the `a` (W) swizzle (addressed by the third operand index and
// addressing mode) being the left-hand operand, and a temporary register with
// the `b` (X) swizzle with the index constructed from:
// - [0:0] = scalar_opcode[0:0]
// - [1:1] = src3_sel[0:0]
// - [2:5] = src3_swizzle[2:5]
// abs_constants and third source's negation are applied to both the constant
// and the temporary register.
// - Some scalar ALU instructions don't have operands.
// - Scalar ALU operations replicate the result into all masked components.
// - Overall, the WXYZ order is pretty commonly used in the Exhibit 2092 -
// sq_alu of IPR2015-00325, this is where the AB = WX order of scalar operands
// likely comes from. Vector predicate instructions also involve the W and X
// components, and in IPR2015-00325 sq_alu, individual components in the
// emulated vector instructions are handled in the WXYZ order. However, max4's
// "greater than the rest" check order is RGBA (XYZW) there. dp4, though, sums
// the products in WXYZ order in IPR2015-00325 sq_alu (but in XYZW order on
// MSDN).
// - ps is the previous result of a scalar ALU operation. It is not modified
// when the instruction that would write it fails its predication check.
// - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for // - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for
// multiplication (+-0 or denormal * anything = +0) wherever it's present // multiplication (+-0 or denormal * anything = +0) wherever it's present
// (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect // (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect
@ -1137,6 +1246,9 @@ enum class AluScalarOpcode : uint32_t {
// dest.xyzw = sqrt(src0.a); // dest.xyzw = sqrt(src0.a);
kSqrt = 40, kSqrt = 40,
// 0 and 1 are the same instruction - one bit of the register index is stored
// in the opcode field.
// mulsc/MUL_CONST_0 dest, src0.a, src1.a // mulsc/MUL_CONST_0 dest, src0.a, src1.a
kMulsc0 = 42, kMulsc0 = 42,
// mulsc/MUL_CONST_1 dest, src0.a, src1.a // mulsc/MUL_CONST_1 dest, src0.a, src1.a
@ -1303,19 +1415,24 @@ enum class AluVectorOpcode : uint32_t {
// dp4/DOT4v dest, src0, src1 // dp4/DOT4v dest, src0, src1
// dest.xyzw = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + // dest.xyzw = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z +
// src0.w * src1.w; // src0.w * src1.w;
// Note: only pv.x contains the value.
kDp4 = 15, kDp4 = 15,
// Three-Element Dot Product // Three-Element Dot Product
// dp3/DOT3v dest, src0, src1 // dp3/DOT3v dest, src0, src1
// dest.xyzw = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z; // dest.xyzw = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z;
// Note: only pv.x contains the value.
kDp3 = 16, kDp3 = 16,
// Two-Element Dot Product and Add // Two-Element Dot Product and Add
// dp2add/DOT2ADDv dest, src0, src1, src2 // dp2add/DOT2ADDv dest, src0, src1, src2
// dest.xyzw = src0.x * src1.x + src0.y * src1.y + src2.x; // dest.xyzw = src0.x * src1.x + src0.y * src1.y + src2.x;
// Note: only pv.x contains the value. // IPR2015-00325 sq_alu may be an outdated and unreliable reference (Sequencer
// Parts Development folder history lists a few changes regarding the swizzle
// in dot2add, sq_alu though implements the instruction as
// src0.x * src1.x + src0.z * src1.z + src2.y, but MSDN specifies the correct
// order as provided in the beginning of this comment, further proven by
// assembling PC shader assembly using XNA, with Shader Model 2 dp2add being
// translated directly into Xenos dp2add without additional swizzling).
// http://web.archive.org/web/20100705150552/http://msdn.microsoft.com/en-us/library/bb313922.aspx
kDp2Add = 17, kDp2Add = 17,
// Cube Map // Cube Map
@ -1363,8 +1480,16 @@ enum class AluVectorOpcode : uint32_t {
// Four-Element Maximum // Four-Element Maximum
// max4/MAX4v dest, src0 // max4/MAX4v dest, src0
// dest.xyzw = max(src0.x, src0.y, src0.z, src0.w); // According to IPR2015-00325 sq_alu:
// Note: only pv.x contains the value. // if (src0.x > src0.y && src0.x > src0.z && src0.x > src0.w) {
// dest.xyzw = src0.x;
// } else if (src0.y > src0.z && src0.y > src0.w) {
// dest.xyzw = src0.y;
// } else if (src0.z > src0.w) {
// dest.xyzw = src0.z;
// } else {
// dest.xyzw = src0.w;
// }
kMax4 = 19, kMax4 = 19,
// Floating-Point Predicate Counter Increment If Equal // Floating-Point Predicate Counter Increment If Equal
@ -1672,7 +1797,9 @@ struct alignas(uint32_t) AluInstruction {
bool abs_constants() const { return data_.abs_constants == 1; } bool abs_constants() const { return data_.abs_constants == 1; }
bool is_const_0_addressed() const { return data_.const_0_rel_abs == 1; } bool is_const_0_addressed() const { return data_.const_0_rel_abs == 1; }
bool is_const_1_addressed() const { return data_.const_1_rel_abs == 1; } bool is_const_1_addressed() const { return data_.const_1_rel_abs == 1; }
bool is_address_relative() const { return data_.address_absolute == 1; } bool is_const_address_register_relative() const {
return data_.const_address_register_relative == 1;
}
AluVectorOpcode vector_opcode() const { return data_.vector_opc; } AluVectorOpcode vector_opcode() const { return data_.vector_opc; }
uint32_t vector_write_mask() const { return data_.vector_write_mask; } uint32_t vector_write_mask() const { return data_.vector_write_mask; }
@ -1686,6 +1813,18 @@ struct alignas(uint32_t) AluInstruction {
bool is_scalar_dest_relative() const { return data_.scalar_dest_rel == 1; } bool is_scalar_dest_relative() const { return data_.scalar_dest_rel == 1; }
bool scalar_clamp() const { return data_.scalar_clamp == 1; } bool scalar_clamp() const { return data_.scalar_clamp == 1; }
static constexpr uint32_t src_temp_reg(uint32_t src_reg) {
return src_reg & 0x3F;
}
static constexpr bool is_src_temp_relative(uint32_t src_reg) {
return (src_reg & 0x40) != 0;
}
static constexpr bool is_src_temp_value_absolute(uint32_t src_reg) {
return (src_reg & 0x80) != 0;
}
// Full register index for constants, packed structure for temporary
// registers (unpack using src_temp_reg, is_src_temp_relative,
// is_src_temp_value_absolute).
uint32_t src_reg(size_t i) const { uint32_t src_reg(size_t i) const {
switch (i) { switch (i) {
case 1: case 1:
@ -1702,16 +1841,59 @@ struct alignas(uint32_t) AluInstruction {
bool src_is_temp(size_t i) const { bool src_is_temp(size_t i) const {
switch (i) { switch (i) {
case 1: case 1:
return data_.src1_sel == 1; return bool(data_.src1_sel);
case 2: case 2:
return data_.src2_sel == 1; return bool(data_.src2_sel);
case 3: case 3:
return data_.src3_sel == 1; return bool(data_.src3_sel);
default: default:
assert_unhandled_case(i); assert_unhandled_case(i);
return 0; return 0;
} }
} }
// Whether the specified operand is actually a constant is disregarded in this
// function so its scope is limited to just parsing the structure's layout -
// to decide whether to use relative addressing for the operand as a whole,
// check externally whether the operand is actually a constant first.
//
// For the constant operand in mulsc, addsc, subsc, this should be called for
// the operand index 3. Note that the XNA disassembler takes the addressing
// mode for the constant scalar operand unconditionally from const_1_rel_abs,
// and ignores the +aL for it unless the scalar operation is co-issued with a
// vector operation reading from a constant. However, the XNA assembler treats
// the constant scalar operand as a constant in the third operand, and places
// the addressing mode for it in const_0_rel_abs if no other constants are
// used in the whole ALU instruction. The validator also doesn't report
// anything if +aL is used when the constant scalar operand is the only
// constant in the instruction (and explicitly calls it the third constant in
// the error message in case both vector operands are constants, and different
// addressing modes are used for the second vector operand and the constant
// scalar operand). Passing the disassembly produced by XNA back to the
// assembler results in different microcode in this case. This indicates that
// most likely there's a bug in the XNA disassembler, and that the addressing
// mode for the constant scalar operand should actually be taken the same way
// as for the third vector operand - from const_0_rel_abs if there are no
// constant vector operands, or from const_1_rel_abs if there is at least one.
bool src_const_is_addressed(size_t i) const {
// "error X7100: When three constants are used in one instruction, the
// second and third constant must either both be non-relative, or both be
// relative."
// Whether to use const_0_rel_abs or const_1_rel_abs is essentially
// min(sum of whether the previous operands are constants, 1).
switch (i) {
case 1:
return bool(data_.const_0_rel_abs);
case 2:
return bool(src_is_temp(1) ? data_.const_0_rel_abs
: data_.const_1_rel_abs);
case 3:
return bool((src_is_temp(1) && src_is_temp(2)) ? data_.const_0_rel_abs
: data_.const_1_rel_abs);
default:
assert_unhandled_case(i);
return false;
}
}
uint32_t src_swizzle(size_t i) const { uint32_t src_swizzle(size_t i) const {
switch (i) { switch (i) {
case 1: case 1:
@ -1739,8 +1921,20 @@ struct alignas(uint32_t) AluInstruction {
} }
} }
uint32_t scalar_const_op_src_temp_reg() const {
return (uint32_t(data_.scalar_opc) & 1) | (data_.src3_sel << 1) |
(data_.src3_swiz & 0x3C);
}
// Helpers. // Helpers.
// Returns the absolute component index calculated from the relative swizzle
// in an ALU instruction.
static constexpr uint32_t GetSwizzledComponentIndex(
uint32_t swizzle, uint32_t component_index) {
return ((swizzle >> (2 * component_index)) + component_index) & 3;
}
// Note that even if the export component is unused (like W of the vertex // Note that even if the export component is unused (like W of the vertex
// shader misc register, YZW of pixel shader depth), it must still not be // shader misc register, YZW of pixel shader depth), it must still not be
// excluded - that may make disassembly not reassemblable if there are // excluded - that may make disassembly not reassemblable if there are
@ -1803,6 +1997,7 @@ struct alignas(uint32_t) AluInstruction {
AluScalarOpcode scalar_opc : 6; AluScalarOpcode scalar_opc : 6;
}; };
struct { struct {
// Swizzles are component-relative.
uint32_t src3_swiz : 8; uint32_t src3_swiz : 8;
uint32_t src2_swiz : 8; uint32_t src2_swiz : 8;
uint32_t src1_swiz : 8; uint32_t src1_swiz : 8;
@ -1811,7 +2006,9 @@ struct alignas(uint32_t) AluInstruction {
uint32_t src1_reg_negate : 1; uint32_t src1_reg_negate : 1;
uint32_t pred_condition : 1; uint32_t pred_condition : 1;
uint32_t is_predicated : 1; uint32_t is_predicated : 1;
uint32_t address_absolute : 1; // Temporary registers can have only absolute and aL-relative indices, not
// a0-relative.
uint32_t const_address_register_relative : 1;
uint32_t const_1_rel_abs : 1; uint32_t const_1_rel_abs : 1;
uint32_t const_0_rel_abs : 1; uint32_t const_0_rel_abs : 1;
}; };