xenia/src/xenia/gpu/shader_translator.cc
2018-11-25 16:37:38 +03:00

1484 lines
51 KiB
C++

#include "shader_translator.h"
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/shader_translator.h"
#include <cstdarg>
#include <set>
#include <string>
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
namespace xe {
namespace gpu {
using namespace ucode;
// The Xbox 360 GPU is effectively an Adreno A200:
// https://github.com/freedreno/freedreno/wiki/A2XX-Shader-Instruction-Set-Architecture
//
// A lot of this information is derived from the freedreno drivers, AMD's
// documentation, publicly available Xbox presentations (from GDC/etc), and
// other reverse engineering.
//
// Naming has been matched as closely as possible to the real thing by using the
// publicly available XNA Game Studio shader assembler.
// You can find a tool for exploring this under tools/shader-playground/,
// allowing interative assembling/disassembling of shader code.
//
// Though the 360's GPU is similar to the Adreno r200, the microcode format is
// slightly different. Though this is a great guide it cannot be assumed it
// matches the 360 in all areas:
// https://github.com/freedreno/freedreno/blob/master/util/disasm-a2xx.c
//
// Lots of naming comes from the disassembly spit out by the XNA GS compiler
// and dumps of d3dcompiler and games: https://pastebin.com/i4kAv7bB
ShaderTranslator::ShaderTranslator() = default;
ShaderTranslator::~ShaderTranslator() = default;
void ShaderTranslator::Reset() {
errors_.clear();
ucode_disasm_buffer_.Reset();
ucode_disasm_line_number_ = 0;
previous_ucode_disasm_scan_offset_ = 0;
register_count_ = 64;
total_attrib_count_ = 0;
vertex_bindings_.clear();
unique_vertex_bindings_ = 0;
texture_bindings_.clear();
unique_texture_bindings_ = 0;
std::memset(&constant_register_map_, 0, sizeof(constant_register_map_));
uses_register_dynamic_addressing_ = false;
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
writes_color_targets_[i] = false;
}
writes_depth_ = false;
}
bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) {
// DEPRECATED: remove this codepath when GL4 goes away.
Reset();
shader_type_ = shader->type();
ucode_dwords_ = shader->ucode_dwords();
ucode_dword_count_ = shader->ucode_dword_count();
uint32_t max_cf_dword_index = static_cast<uint32_t>(ucode_dword_count_);
for (uint32_t i = 0; i < max_cf_dword_index; i += 3) {
ControlFlowInstruction cf_a;
ControlFlowInstruction cf_b;
UnpackControlFlowInstructions(ucode_dwords_ + i, &cf_a, &cf_b);
if (IsControlFlowOpcodeExec(cf_a.opcode())) {
max_cf_dword_index =
std::min(max_cf_dword_index, cf_a.exec.address() * 3);
}
if (IsControlFlowOpcodeExec(cf_b.opcode())) {
max_cf_dword_index =
std::min(max_cf_dword_index, cf_b.exec.address() * 3);
}
GatherInstructionInformation(cf_a);
GatherInstructionInformation(cf_b);
}
shader->vertex_bindings_ = std::move(vertex_bindings_);
shader->texture_bindings_ = std::move(texture_bindings_);
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
shader->writes_color_targets_[i] = writes_color_targets_[i];
}
return true;
}
bool ShaderTranslator::Translate(Shader* shader,
xenos::xe_gpu_program_cntl_t cntl) {
Reset();
register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1
: cntl.ps_regs + 1;
return TranslateInternal(shader);
}
bool ShaderTranslator::Translate(Shader* shader) {
Reset();
return TranslateInternal(shader);
}
bool ShaderTranslator::TranslateInternal(Shader* shader) {
shader_type_ = shader->type();
ucode_dwords_ = shader->ucode_dwords();
ucode_dword_count_ = shader->ucode_dword_count();
// Run through and gather all binding information and to check whether
// registers are dynamically addressed.
// Translators may need this before they start codegen.
uint32_t max_cf_dword_index = static_cast<uint32_t>(ucode_dword_count_);
for (uint32_t i = 0; i < max_cf_dword_index; i += 3) {
ControlFlowInstruction cf_a;
ControlFlowInstruction cf_b;
UnpackControlFlowInstructions(ucode_dwords_ + i, &cf_a, &cf_b);
if (IsControlFlowOpcodeExec(cf_a.opcode())) {
max_cf_dword_index =
std::min(max_cf_dword_index, cf_a.exec.address() * 3);
}
if (IsControlFlowOpcodeExec(cf_b.opcode())) {
max_cf_dword_index =
std::min(max_cf_dword_index, cf_b.exec.address() * 3);
}
GatherInstructionInformation(cf_a);
GatherInstructionInformation(cf_b);
}
StartTranslation();
TranslateBlocks();
// Compute total number of float registers and total bytes used by the
// register map. This saves us work later when we need to pack them.
constant_register_map_.packed_byte_length = 0;
constant_register_map_.float_count = 0;
for (int i = 0; i < 4; ++i) {
// Each bit indicates a vec4 (4 floats).
constant_register_map_.float_count +=
xe::bit_count(constant_register_map_.float_bitmap[i]);
}
constant_register_map_.packed_byte_length +=
4 * 4 * constant_register_map_.float_count;
// Each bit indicates a single word.
constant_register_map_.packed_byte_length +=
4 * xe::bit_count(constant_register_map_.int_bitmap);
// Direct map between words and words we upload.
for (int i = 0; i < 8; ++i) {
if (constant_register_map_.bool_bitmap[i]) {
constant_register_map_.packed_byte_length += 4;
}
}
shader->errors_ = std::move(errors_);
shader->translated_binary_ = CompleteTranslation();
shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string();
shader->vertex_bindings_ = std::move(vertex_bindings_);
shader->texture_bindings_ = std::move(texture_bindings_);
shader->constant_register_map_ = std::move(constant_register_map_);
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
shader->writes_color_targets_[i] = writes_color_targets_[i];
}
shader->is_valid_ = true;
shader->is_translated_ = true;
for (const auto& error : shader->errors_) {
if (error.is_fatal) {
shader->is_valid_ = false;
break;
}
}
PostTranslation(shader);
return shader->is_valid_;
}
void ShaderTranslator::MarkUcodeInstruction(uint32_t dword_offset) {
auto disasm = ucode_disasm_buffer_.GetString();
size_t current_offset = ucode_disasm_buffer_.length();
for (size_t i = previous_ucode_disasm_scan_offset_; i < current_offset; ++i) {
if (disasm[i] == '\n') {
++ucode_disasm_line_number_;
}
}
previous_ucode_disasm_scan_offset_ = current_offset;
}
void ShaderTranslator::AppendUcodeDisasm(char c) {
ucode_disasm_buffer_.Append(c);
}
void ShaderTranslator::AppendUcodeDisasm(const char* value) {
ucode_disasm_buffer_.Append(value);
}
void ShaderTranslator::AppendUcodeDisasmFormat(const char* format, ...) {
va_list va;
va_start(va, format);
ucode_disasm_buffer_.AppendVarargs(format, va);
va_end(va);
}
void ShaderTranslator::EmitTranslationError(const char* message) {
Shader::Error error;
error.is_fatal = true;
error.message = message;
// TODO(benvanik): location information.
errors_.push_back(std::move(error));
}
void ShaderTranslator::EmitUnimplementedTranslationError() {
Shader::Error error;
error.is_fatal = false;
error.message = "Unimplemented translation";
// TODO(benvanik): location information.
errors_.push_back(std::move(error));
}
void ShaderTranslator::GatherInstructionInformation(
const ControlFlowInstruction& cf) {
switch (cf.opcode()) {
case ControlFlowOpcode::kExec:
case ControlFlowOpcode::kExecEnd:
case ControlFlowOpcode::kCondExec:
case ControlFlowOpcode::kCondExecEnd:
case ControlFlowOpcode::kCondExecPred:
case ControlFlowOpcode::kCondExecPredEnd:
case ControlFlowOpcode::kCondExecPredClean:
case ControlFlowOpcode::kCondExecPredCleanEnd: {
uint32_t sequence = cf.exec.sequence();
for (uint32_t instr_offset = cf.exec.address();
instr_offset < cf.exec.address() + cf.exec.count();
++instr_offset, sequence >>= 2) {
bool is_fetch = (sequence & 0x1) == 0x1;
if (is_fetch) {
// Gather vertex and texture fetches.
auto fetch_opcode =
static_cast<FetchOpcode>(ucode_dwords_[instr_offset * 3] & 0x1F);
if (fetch_opcode == FetchOpcode::kVertexFetch) {
assert_true(is_vertex_shader());
GatherVertexFetchInformation(
*reinterpret_cast<const VertexFetchInstruction*>(
ucode_dwords_ + instr_offset * 3));
} else {
GatherTextureFetchInformation(
*reinterpret_cast<const TextureFetchInstruction*>(
ucode_dwords_ + instr_offset * 3));
}
} else {
// Gather up color targets written to, and check if using dynamic
// register indices.
auto& op = *reinterpret_cast<const AluInstruction*>(ucode_dwords_ +
instr_offset * 3);
if (op.has_vector_op()) {
const auto& opcode_info =
alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
for (size_t i = 0; i < opcode_info.argument_count; ++i) {
if (op.src_is_temp(i + 1) && (op.src_reg(i + 1) & 0x40)) {
uses_register_dynamic_addressing_ = true;
}
}
if (op.is_export()) {
if (is_pixel_shader()) {
if (op.vector_dest() <= 3) {
writes_color_targets_[op.vector_dest()] = true;
} else if (op.vector_dest() == 61) {
writes_depth_ = true;
}
}
} else {
if (op.is_vector_dest_relative()) {
uses_register_dynamic_addressing_ = true;
}
}
}
if (op.has_scalar_op()) {
const auto& opcode_info =
alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())];
if (opcode_info.argument_count == 1 && op.src_is_temp(3) &&
(op.src_reg(3) & 0x40)) {
uses_register_dynamic_addressing_ = true;
}
if (op.is_export()) {
if (is_pixel_shader()) {
if (op.scalar_dest() <= 3) {
writes_color_targets_[op.scalar_dest()] = true;
} else if (op.scalar_dest() == 61) {
writes_depth_ = true;
}
}
} else {
if (op.is_scalar_dest_relative()) {
uses_register_dynamic_addressing_ = true;
}
}
}
}
}
} break;
default:
break;
}
}
void ShaderTranslator::GatherVertexFetchInformation(
const VertexFetchInstruction& op) {
ParsedVertexFetchInstruction fetch_instr;
ParseVertexFetchInstruction(op, &fetch_instr);
// Don't bother setting up a binding for an instruction that fetches nothing.
if (!op.fetches_any_data()) {
return;
}
// Check if using dynamic register indices.
if (op.is_dest_relative() || op.is_src_relative()) {
uses_register_dynamic_addressing_ = true;
}
// Try to allocate an attribute on an existing binding.
// If no binding for this fetch slot is found create it.
using VertexBinding = Shader::VertexBinding;
VertexBinding::Attribute* attrib = nullptr;
for (auto& vertex_binding : vertex_bindings_) {
if (vertex_binding.fetch_constant == op.fetch_constant_index()) {
// It may not hold that all strides are equal, but I hope it does.
assert_true(!fetch_instr.attributes.stride ||
vertex_binding.stride_words == fetch_instr.attributes.stride);
vertex_binding.attributes.push_back({});
attrib = &vertex_binding.attributes.back();
break;
}
}
if (!attrib) {
assert_not_zero(fetch_instr.attributes.stride);
VertexBinding vertex_binding;
vertex_binding.binding_index = int(vertex_bindings_.size());
vertex_binding.fetch_constant = op.fetch_constant_index();
vertex_binding.stride_words = fetch_instr.attributes.stride;
vertex_binding.attributes.push_back({});
vertex_bindings_.emplace_back(std::move(vertex_binding));
attrib = &vertex_bindings_.back().attributes.back();
}
// Populate attribute.
attrib->attrib_index = total_attrib_count_++;
attrib->fetch_instr = fetch_instr;
attrib->size_words =
GetVertexFormatSizeInWords(attrib->fetch_instr.attributes.data_format);
}
void ShaderTranslator::GatherTextureFetchInformation(
const TextureFetchInstruction& op) {
// Check if using dynamic register indices.
if (op.is_dest_relative() || op.is_src_relative()) {
uses_register_dynamic_addressing_ = true;
}
switch (op.opcode()) {
case FetchOpcode::kSetTextureLod:
case FetchOpcode::kSetTextureGradientsHorz:
case FetchOpcode::kSetTextureGradientsVert:
// Doesn't use bindings.
return;
default:
// Continue.
break;
}
Shader::TextureBinding binding;
binding.binding_index = -1;
ParseTextureFetchInstruction(op, &binding.fetch_instr);
binding.fetch_constant = binding.fetch_instr.operands[1].storage_index;
// Check and see if this fetch constant was previously used...
for (auto& tex_binding : texture_bindings_) {
if (tex_binding.fetch_constant == binding.fetch_constant) {
binding.binding_index = tex_binding.binding_index;
break;
}
}
if (binding.binding_index == -1) {
// Assign a unique binding index.
binding.binding_index = unique_texture_bindings_++;
}
texture_bindings_.emplace_back(std::move(binding));
}
void AddControlFlowTargetLabel(const ControlFlowInstruction& cf,
std::set<uint32_t>* label_addresses) {
switch (cf.opcode()) {
case ControlFlowOpcode::kLoopStart:
label_addresses->insert(cf.loop_start.address());
break;
case ControlFlowOpcode::kLoopEnd:
label_addresses->insert(cf.loop_end.address());
break;
case ControlFlowOpcode::kCondCall:
label_addresses->insert(cf.cond_call.address());
break;
case ControlFlowOpcode::kCondJmp:
label_addresses->insert(cf.cond_jmp.address());
break;
default:
// Ignored.
break;
}
}
bool ShaderTranslator::TranslateBlocks() {
// Control flow instructions come paired in blocks of 3 dwords and all are
// listed at the top of the ucode.
// Each control flow instruction is executed sequentially until the final
// ending instruction.
// Guess how long the control flow program is by scanning for the first
// kExec-ish and instruction and using its address as the upper bound.
// This is what freedreno does.
uint32_t max_cf_dword_index = static_cast<uint32_t>(ucode_dword_count_);
std::set<uint32_t> label_addresses;
std::vector<ControlFlowInstruction> cf_instructions;
for (uint32_t i = 0; i < max_cf_dword_index; i += 3) {
ControlFlowInstruction cf_a;
ControlFlowInstruction cf_b;
UnpackControlFlowInstructions(ucode_dwords_ + i, &cf_a, &cf_b);
if (IsControlFlowOpcodeExec(cf_a.opcode())) {
max_cf_dword_index =
std::min(max_cf_dword_index, cf_a.exec.address() * 3);
}
if (IsControlFlowOpcodeExec(cf_b.opcode())) {
max_cf_dword_index =
std::min(max_cf_dword_index, cf_b.exec.address() * 3);
}
AddControlFlowTargetLabel(cf_a, &label_addresses);
AddControlFlowTargetLabel(cf_b, &label_addresses);
cf_instructions.push_back(cf_a);
cf_instructions.push_back(cf_b);
}
PreProcessControlFlowInstructions(cf_instructions);
// Translate all instructions.
for (uint32_t i = 0, cf_index = 0; i < max_cf_dword_index; i += 3) {
ControlFlowInstruction cf_a;
ControlFlowInstruction cf_b;
UnpackControlFlowInstructions(ucode_dwords_ + i, &cf_a, &cf_b);
cf_index_ = cf_index;
MarkUcodeInstruction(i);
if (label_addresses.count(cf_index)) {
AppendUcodeDisasmFormat(" label L%u\n", cf_index);
ProcessLabel(cf_index);
}
AppendUcodeDisasmFormat("/* %4u.0 */ ", cf_index / 2);
ProcessControlFlowInstructionBegin(cf_index);
TranslateControlFlowInstruction(cf_a);
ProcessControlFlowInstructionEnd(cf_index);
++cf_index;
cf_index_ = cf_index;
MarkUcodeInstruction(i);
if (label_addresses.count(cf_index)) {
AppendUcodeDisasmFormat(" label L%u\n", cf_index);
ProcessLabel(cf_index);
}
AppendUcodeDisasmFormat("/* %4u.1 */ ", cf_index / 2);
ProcessControlFlowInstructionBegin(cf_index);
TranslateControlFlowInstruction(cf_b);
ProcessControlFlowInstructionEnd(cf_index);
++cf_index;
}
return true;
}
std::vector<uint8_t> UcodeShaderTranslator::CompleteTranslation() {
return ucode_disasm_buffer().ToBytes();
}
void ShaderTranslator::TranslateControlFlowInstruction(
const ControlFlowInstruction& cf) {
switch (cf.opcode()) {
case ControlFlowOpcode::kNop:
TranslateControlFlowNop(cf);
break;
case ControlFlowOpcode::kExec:
TranslateControlFlowExec(cf.exec);
break;
case ControlFlowOpcode::kExecEnd:
TranslateControlFlowExec(cf.exec);
break;
case ControlFlowOpcode::kCondExec:
TranslateControlFlowCondExec(cf.cond_exec);
break;
case ControlFlowOpcode::kCondExecEnd:
TranslateControlFlowCondExec(cf.cond_exec);
break;
case ControlFlowOpcode::kCondExecPred:
TranslateControlFlowCondExecPred(cf.cond_exec_pred);
break;
case ControlFlowOpcode::kCondExecPredEnd:
TranslateControlFlowCondExecPred(cf.cond_exec_pred);
break;
case ControlFlowOpcode::kCondExecPredClean:
TranslateControlFlowCondExec(cf.cond_exec);
break;
case ControlFlowOpcode::kCondExecPredCleanEnd:
TranslateControlFlowCondExec(cf.cond_exec);
break;
case ControlFlowOpcode::kLoopStart:
TranslateControlFlowLoopStart(cf.loop_start);
break;
case ControlFlowOpcode::kLoopEnd:
TranslateControlFlowLoopEnd(cf.loop_end);
break;
case ControlFlowOpcode::kCondCall:
TranslateControlFlowCondCall(cf.cond_call);
break;
case ControlFlowOpcode::kReturn:
TranslateControlFlowReturn(cf.ret);
break;
case ControlFlowOpcode::kCondJmp:
TranslateControlFlowCondJmp(cf.cond_jmp);
break;
case ControlFlowOpcode::kAlloc:
TranslateControlFlowAlloc(cf.alloc);
break;
case ControlFlowOpcode::kMarkVsFetchDone:
break;
default:
assert_unhandled_case(cf.opcode);
break;
}
bool ends_shader = DoesControlFlowOpcodeEndShader(cf.opcode());
if (ends_shader) {
// TODO(benvanik): return?
}
}
void ShaderTranslator::TranslateControlFlowNop(
const ControlFlowInstruction& cf) {
ucode_disasm_buffer_.Append(" cnop\n");
ProcessControlFlowNopInstruction(cf_index_);
}
void ShaderTranslator::TranslateControlFlowExec(
const ControlFlowExecInstruction& cf) {
ParsedExecInstruction i;
i.dword_index = cf_index_;
i.opcode = cf.opcode();
i.opcode_name = cf.opcode() == ControlFlowOpcode::kExecEnd ? "exece" : "exec";
i.instruction_address = cf.address();
i.instruction_count = cf.count();
i.type = ParsedExecInstruction::Type::kUnconditional;
i.is_end = cf.opcode() == ControlFlowOpcode::kExecEnd;
i.clean = cf.clean();
i.is_yield = cf.is_yield();
i.sequence = cf.sequence();
TranslateExecInstructions(i);
}
void ShaderTranslator::TranslateControlFlowCondExec(
const ControlFlowCondExecInstruction& cf) {
ParsedExecInstruction i;
i.dword_index = cf_index_;
i.opcode = cf.opcode();
i.opcode_name = "cexec";
switch (cf.opcode()) {
case ControlFlowOpcode::kCondExecEnd:
case ControlFlowOpcode::kCondExecPredCleanEnd:
i.opcode_name = "cexece";
i.is_end = true;
break;
default:
break;
}
i.instruction_address = cf.address();
i.instruction_count = cf.count();
i.type = ParsedExecInstruction::Type::kConditional;
i.bool_constant_index = cf.bool_address();
constant_register_map_.bool_bitmap[i.bool_constant_index / 32] |=
1 << (i.bool_constant_index % 32);
i.condition = cf.condition();
switch (cf.opcode()) {
case ControlFlowOpcode::kCondExec:
case ControlFlowOpcode::kCondExecEnd:
i.clean = false;
break;
default:
break;
}
i.is_yield = cf.is_yield();
i.sequence = cf.sequence();
TranslateExecInstructions(i);
}
void ShaderTranslator::TranslateControlFlowCondExecPred(
const ControlFlowCondExecPredInstruction& cf) {
ParsedExecInstruction i;
i.dword_index = cf_index_;
i.opcode = cf.opcode();
i.opcode_name =
cf.opcode() == ControlFlowOpcode::kCondExecPredEnd ? "exece" : "exec";
i.instruction_address = cf.address();
i.instruction_count = cf.count();
i.type = ParsedExecInstruction::Type::kPredicated;
i.condition = cf.condition();
i.is_end = cf.opcode() == ControlFlowOpcode::kCondExecPredEnd;
i.clean = cf.clean();
i.is_yield = cf.is_yield();
i.sequence = cf.sequence();
TranslateExecInstructions(i);
}
void ShaderTranslator::TranslateControlFlowLoopStart(
const ControlFlowLoopStartInstruction& cf) {
ParsedLoopStartInstruction i;
i.dword_index = cf_index_;
i.loop_constant_index = cf.loop_id();
constant_register_map_.int_bitmap |= 1 << i.loop_constant_index;
i.is_repeat = cf.is_repeat();
i.loop_skip_address = cf.address();
i.Disassemble(&ucode_disasm_buffer_);
ProcessLoopStartInstruction(i);
}
void ShaderTranslator::TranslateControlFlowLoopEnd(
const ControlFlowLoopEndInstruction& cf) {
ParsedLoopEndInstruction i;
i.dword_index = cf_index_;
i.is_predicated_break = cf.is_predicated_break();
i.predicate_condition = cf.condition();
i.loop_constant_index = cf.loop_id();
constant_register_map_.int_bitmap |= 1 << i.loop_constant_index;
i.loop_body_address = cf.address();
i.Disassemble(&ucode_disasm_buffer_);
ProcessLoopEndInstruction(i);
}
void ShaderTranslator::TranslateControlFlowCondCall(
const ControlFlowCondCallInstruction& cf) {
ParsedCallInstruction i;
i.dword_index = cf_index_;
i.target_address = cf.address();
if (cf.is_unconditional()) {
i.type = ParsedCallInstruction::Type::kUnconditional;
} else if (cf.is_predicated()) {
i.type = ParsedCallInstruction::Type::kPredicated;
i.condition = cf.condition();
} else {
i.type = ParsedCallInstruction::Type::kConditional;
i.bool_constant_index = cf.bool_address();
constant_register_map_.bool_bitmap[i.bool_constant_index / 32] |=
1 << (i.bool_constant_index % 32);
i.condition = cf.condition();
}
i.Disassemble(&ucode_disasm_buffer_);
ProcessCallInstruction(i);
}
void ShaderTranslator::TranslateControlFlowReturn(
const ControlFlowReturnInstruction& cf) {
ParsedReturnInstruction i;
i.dword_index = cf_index_;
i.Disassemble(&ucode_disasm_buffer_);
ProcessReturnInstruction(i);
}
void ShaderTranslator::TranslateControlFlowCondJmp(
const ControlFlowCondJmpInstruction& cf) {
ParsedJumpInstruction i;
i.dword_index = cf_index_;
i.target_address = cf.address();
if (cf.is_unconditional()) {
i.type = ParsedJumpInstruction::Type::kUnconditional;
} else if (cf.is_predicated()) {
i.type = ParsedJumpInstruction::Type::kPredicated;
i.condition = cf.condition();
} else {
i.type = ParsedJumpInstruction::Type::kConditional;
i.bool_constant_index = cf.bool_address();
constant_register_map_.bool_bitmap[i.bool_constant_index / 32] |=
1 << (i.bool_constant_index % 32);
i.condition = cf.condition();
}
i.Disassemble(&ucode_disasm_buffer_);
ProcessJumpInstruction(i);
}
void ShaderTranslator::TranslateControlFlowAlloc(
const ControlFlowAllocInstruction& cf) {
ParsedAllocInstruction i;
i.dword_index = cf_index_;
i.type = cf.alloc_type();
i.count = cf.size();
i.is_vertex_shader = is_vertex_shader();
i.Disassemble(&ucode_disasm_buffer_);
ProcessAllocInstruction(i);
}
void ShaderTranslator::TranslateExecInstructions(
const ParsedExecInstruction& instr) {
instr.Disassemble(&ucode_disasm_buffer_);
ProcessExecInstructionBegin(instr);
uint32_t sequence = instr.sequence;
for (uint32_t instr_offset = instr.instruction_address;
instr_offset < instr.instruction_address + instr.instruction_count;
++instr_offset, sequence >>= 2) {
MarkUcodeInstruction(instr_offset);
AppendUcodeDisasmFormat("/* %4u */ ", instr_offset);
bool is_sync = (sequence & 0x2) == 0x2;
bool is_fetch = (sequence & 0x1) == 0x1;
if (is_sync) {
AppendUcodeDisasm(" serialize\n ");
}
if (is_fetch) {
auto fetch_opcode =
static_cast<FetchOpcode>(ucode_dwords_[instr_offset * 3] & 0x1F);
if (fetch_opcode == FetchOpcode::kVertexFetch) {
auto& op = *reinterpret_cast<const VertexFetchInstruction*>(
ucode_dwords_ + instr_offset * 3);
TranslateVertexFetchInstruction(op);
} else {
auto& op = *reinterpret_cast<const TextureFetchInstruction*>(
ucode_dwords_ + instr_offset * 3);
TranslateTextureFetchInstruction(op);
}
} else {
auto& op = *reinterpret_cast<const AluInstruction*>(ucode_dwords_ +
instr_offset * 3);
TranslateAluInstruction(op);
}
}
ProcessExecInstructionEnd(instr);
}
void ParseFetchInstructionResult(uint32_t dest, uint32_t swizzle,
bool is_relative,
InstructionResult* out_result) {
out_result->storage_target = InstructionStorageTarget::kRegister;
out_result->storage_index = dest;
out_result->is_export = false;
out_result->is_clamped = false;
out_result->storage_addressing_mode =
is_relative ? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
for (int i = 0; i < 4; ++i) {
out_result->write_mask[i] = true;
if ((swizzle & 0x7) == 4) {
out_result->components[i] = SwizzleSource::k0;
} else if ((swizzle & 0x7) == 5) {
out_result->components[i] = SwizzleSource::k1;
} else if ((swizzle & 0x7) == 6) {
out_result->components[i] = SwizzleSource::k0;
} else if ((swizzle & 0x7) == 7) {
out_result->write_mask[i] = false;
} else {
out_result->components[i] = GetSwizzleFromComponentIndex(swizzle & 0x3);
}
swizzle >>= 3;
}
}
void ShaderTranslator::TranslateVertexFetchInstruction(
const VertexFetchInstruction& op) {
ParsedVertexFetchInstruction instr;
ParseVertexFetchInstruction(op, &instr);
instr.Disassemble(&ucode_disasm_buffer_);
ProcessVertexFetchInstruction(instr);
}
void ShaderTranslator::ParseVertexFetchInstruction(
const VertexFetchInstruction& op, ParsedVertexFetchInstruction* out_instr) {
auto& i = *out_instr;
i.dword_index = 0;
i.opcode = FetchOpcode::kVertexFetch;
i.opcode_name = op.is_mini_fetch() ? "vfetch_mini" : "vfetch_full";
i.is_mini_fetch = op.is_mini_fetch();
i.is_predicated = op.is_predicated();
i.predicate_condition = op.predicate_condition();
ParseFetchInstructionResult(op.dest(), op.dest_swizzle(),
op.is_dest_relative(), &i.result);
// Reuse previous vfetch_full if this is a mini.
const auto& full_op = op.is_mini_fetch() ? previous_vfetch_full_ : op;
auto& src_op = i.operands[i.operand_count++];
src_op.storage_source = InstructionStorageSource::kRegister;
src_op.storage_index = full_op.src();
src_op.storage_addressing_mode =
full_op.is_src_relative()
? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
src_op.is_negated = false;
src_op.is_absolute_value = false;
src_op.component_count = 1;
uint32_t swizzle = full_op.src_swizzle();
for (int j = 0; j < src_op.component_count; ++j, swizzle >>= 2) {
src_op.components[j] = GetSwizzleFromComponentIndex(swizzle & 0x3);
}
auto& const_op = i.operands[i.operand_count++];
const_op.storage_source = InstructionStorageSource::kVertexFetchConstant;
const_op.storage_index = full_op.fetch_constant_index();
i.attributes.data_format = op.data_format();
i.attributes.offset = op.offset();
i.attributes.stride = full_op.stride();
i.attributes.exp_adjust = op.exp_adjust();
i.attributes.is_index_rounded = op.is_index_rounded();
i.attributes.is_signed = op.is_signed();
i.attributes.is_integer = !op.is_normalized();
i.attributes.prefetch_count = op.prefetch_count();
// Store for later use by mini fetches.
if (!op.is_mini_fetch()) {
previous_vfetch_full_ = op;
}
}
void ShaderTranslator::TranslateTextureFetchInstruction(
const TextureFetchInstruction& op) {
ParsedTextureFetchInstruction instr;
ParseTextureFetchInstruction(op, &instr);
instr.Disassemble(&ucode_disasm_buffer_);
ProcessTextureFetchInstruction(instr);
}
void ShaderTranslator::ParseTextureFetchInstruction(
const TextureFetchInstruction& op,
ParsedTextureFetchInstruction* out_instr) {
struct TextureFetchOpcodeInfo {
const char* name;
bool has_dest;
bool has_const;
bool has_attributes;
int override_component_count;
} opcode_info;
switch (op.opcode()) {
case FetchOpcode::kTextureFetch: {
static const char* kNames[] = {"tfetch1D", "tfetch2D", "tfetch3D",
"tfetchCube"};
opcode_info = {kNames[static_cast<int>(op.dimension())], true, true, true,
0};
} break;
case FetchOpcode::kGetTextureBorderColorFrac: {
static const char* kNames[] = {"getBCF1D", "getBCF2D", "getBCF3D",
"getBCFCube"};
opcode_info = {kNames[static_cast<int>(op.dimension())], true, true, true,
0};
} break;
case FetchOpcode::kGetTextureComputedLod: {
static const char* kNames[] = {"getCompTexLOD1D", "getCompTexLOD2D",
"getCompTexLOD3D", "getCompTexLODCube"};
opcode_info = {kNames[static_cast<int>(op.dimension())], true, true, true,
0};
} break;
case FetchOpcode::kGetTextureGradients:
opcode_info = {"getGradients", true, true, true, 2};
break;
case FetchOpcode::kGetTextureWeights: {
static const char* kNames[] = {"getWeights1D", "getWeights2D",
"getWeights3D", "getWeightsCube"};
opcode_info = {kNames[static_cast<int>(op.dimension())], true, true, true,
0};
} break;
case FetchOpcode::kSetTextureLod:
opcode_info = {"setTexLOD", false, false, false, 1};
break;
case FetchOpcode::kSetTextureGradientsHorz:
opcode_info = {"setGradientH", false, false, false, 3};
break;
case FetchOpcode::kSetTextureGradientsVert:
opcode_info = {"setGradientV", false, false, false, 3};
break;
default:
case FetchOpcode::kUnknownTextureOp:
assert_unhandled_case(fetch_opcode);
return;
}
auto& i = *out_instr;
i.dword_index = 0;
i.opcode = op.opcode();
i.opcode_name = opcode_info.name;
i.dimension = op.dimension();
i.is_predicated = op.is_predicated();
i.predicate_condition = op.predicate_condition();
if (opcode_info.has_dest) {
ParseFetchInstructionResult(op.dest(), op.dest_swizzle(),
op.is_dest_relative(), &i.result);
} else {
i.result.storage_target = InstructionStorageTarget::kNone;
}
auto& src_op = i.operands[i.operand_count++];
src_op.storage_source = InstructionStorageSource::kRegister;
src_op.storage_index = op.src();
src_op.storage_addressing_mode =
op.is_src_relative() ? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
src_op.is_negated = false;
src_op.is_absolute_value = false;
src_op.component_count =
opcode_info.override_component_count
? opcode_info.override_component_count
: GetTextureDimensionComponentCount(op.dimension());
uint32_t swizzle = op.src_swizzle();
for (int j = 0; j < src_op.component_count; ++j, swizzle >>= 2) {
src_op.components[j] = GetSwizzleFromComponentIndex(swizzle & 0x3);
}
if (opcode_info.has_const) {
auto& const_op = i.operands[i.operand_count++];
const_op.storage_source = InstructionStorageSource::kTextureFetchConstant;
const_op.storage_index = op.fetch_constant_index();
}
if (opcode_info.has_attributes) {
i.attributes.fetch_valid_only = op.fetch_valid_only();
i.attributes.unnormalized_coordinates = op.unnormalized_coordinates();
i.attributes.mag_filter = op.mag_filter();
i.attributes.min_filter = op.min_filter();
i.attributes.mip_filter = op.mip_filter();
i.attributes.aniso_filter = op.aniso_filter();
i.attributes.use_computed_lod = op.use_computed_lod();
i.attributes.use_register_lod = op.use_register_lod();
i.attributes.use_register_gradients = op.use_register_gradients();
i.attributes.lod_bias = op.lod_bias();
i.attributes.offset_x = op.offset_x();
i.attributes.offset_y = op.offset_y();
i.attributes.offset_z = op.offset_z();
}
}
const ShaderTranslator::AluOpcodeInfo
ShaderTranslator::alu_vector_opcode_infos_[0x20] = {
{"add", 2, 4}, // 0
{"mul", 2, 4}, // 1
{"max", 2, 4}, // 2
{"min", 2, 4}, // 3
{"seq", 2, 4}, // 4
{"sgt", 2, 4}, // 5
{"sge", 2, 4}, // 6
{"sne", 2, 4}, // 7
{"frc", 1, 4}, // 8
{"trunc", 1, 4}, // 9
{"floor", 1, 4}, // 10
{"mad", 3, 4}, // 11
{"cndeq", 3, 4}, // 12
{"cndge", 3, 4}, // 13
{"cndgt", 3, 4}, // 14
{"dp4", 2, 4}, // 15
{"dp3", 2, 4}, // 16
{"dp2add", 3, 4}, // 17
{"cube", 2, 4}, // 18
{"max4", 1, 4}, // 19
{"setp_eq_push", 2, 4}, // 20
{"setp_ne_push", 2, 4}, // 21
{"setp_gt_push", 2, 4}, // 22
{"setp_ge_push", 2, 4}, // 23
{"kill_eq", 2, 4}, // 24
{"kill_gt", 2, 4}, // 25
{"kill_ge", 2, 4}, // 26
{"kill_ne", 2, 4}, // 27
{"dst", 2, 4}, // 28
{"maxa", 2, 4}, // 29
};
const ShaderTranslator::AluOpcodeInfo
ShaderTranslator::alu_scalar_opcode_infos_[0x40] = {
{"adds", 1, 2}, // 0
{"adds_prev", 1, 1}, // 1
{"muls", 1, 2}, // 2
{"muls_prev", 1, 1}, // 3
{"muls_prev2", 1, 2}, // 4
{"maxs", 1, 2}, // 5
{"mins", 1, 2}, // 6
{"seqs", 1, 1}, // 7
{"sgts", 1, 1}, // 8
{"sges", 1, 1}, // 9
{"snes", 1, 1}, // 10
{"frcs", 1, 1}, // 11
{"truncs", 1, 1}, // 12
{"floors", 1, 1}, // 13
{"exp", 1, 1}, // 14
{"logc", 1, 1}, // 15
{"log", 1, 1}, // 16
{"rcpc", 1, 1}, // 17
{"rcpf", 1, 1}, // 18
{"rcp", 1, 1}, // 19
{"rsqc", 1, 1}, // 20
{"rsqf", 1, 1}, // 21
{"rsq", 1, 1}, // 22
{"maxas", 1, 2}, // 23
{"maxasf", 1, 2}, // 24
{"subs", 1, 2}, // 25
{"subs_prev", 1, 1}, // 26
{"setp_eq", 1, 1}, // 27
{"setp_ne", 1, 1}, // 28
{"setp_gt", 1, 1}, // 29
{"setp_ge", 1, 1}, // 30
{"setp_inv", 1, 1}, // 31
{"setp_pop", 1, 1}, // 32
{"setp_clr", 1, 1}, // 33
{"setp_rstr", 1, 1}, // 34
{"kills_eq", 1, 1}, // 35
{"kills_gt", 1, 1}, // 36
{"kills_ge", 1, 1}, // 37
{"kills_ne", 1, 1}, // 38
{"kills_one", 1, 1}, // 39
{"sqrt", 1, 1}, // 40
{"UNKNOWN", 0, 0}, // 41
{"mulsc", 2, 1}, // 42
{"mulsc", 2, 1}, // 43
{"addsc", 2, 1}, // 44
{"addsc", 2, 1}, // 45
{"subsc", 2, 1}, // 46
{"subsc", 2, 1}, // 47
{"sin", 1, 1}, // 48
{"cos", 1, 1}, // 49
{"retain_prev", 1, 1}, // 50
};
void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) {
if (!op.has_vector_op() && !op.has_scalar_op()) {
ParsedAluInstruction instr;
instr.type = ParsedAluInstruction::Type::kNop;
instr.Disassemble(&ucode_disasm_buffer_);
ProcessAluInstruction(instr);
return;
}
ParsedAluInstruction instr;
if (op.has_vector_op()) {
const auto& opcode_info =
alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
ParseAluVectorInstruction(op, opcode_info, instr);
ProcessAluInstruction(instr);
}
if (op.has_scalar_op()) {
const auto& opcode_info =
alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())];
ParseAluScalarInstruction(op, opcode_info, instr);
ProcessAluInstruction(instr);
}
}
void ParseAluInstructionOperand(const AluInstruction& op, int i,
int swizzle_component_count,
InstructionOperand* out_op) {
int const_slot = 0;
switch (i) {
case 2:
const_slot = op.src_is_temp(1) ? 0 : 1;
break;
case 3:
const_slot = op.src_is_temp(1) && op.src_is_temp(2) ? 0 : 1;
break;
}
out_op->is_negated = op.src_negate(i);
uint32_t reg = op.src_reg(i);
if (op.src_is_temp(i)) {
out_op->storage_source = InstructionStorageSource::kRegister;
out_op->storage_index = reg & 0x1F;
out_op->is_absolute_value = (reg & 0x80) == 0x80;
out_op->storage_addressing_mode =
(reg & 0x40) ? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
} else {
out_op->storage_source = InstructionStorageSource::kConstantFloat;
out_op->storage_index = reg;
if ((const_slot == 0 && op.is_const_0_addressed()) ||
(const_slot == 1 && op.is_const_1_addressed())) {
if (op.is_address_relative()) {
out_op->storage_addressing_mode =
InstructionStorageAddressingMode::kAddressAbsolute;
} else {
out_op->storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative;
}
} else {
out_op->storage_addressing_mode =
InstructionStorageAddressingMode::kStatic;
}
out_op->is_absolute_value = op.abs_constants();
}
out_op->component_count = swizzle_component_count;
uint32_t swizzle = op.src_swizzle(i);
if (swizzle_component_count == 1) {
uint32_t a = ((swizzle >> 6) + 3) & 0x3;
out_op->components[0] = GetSwizzleFromComponentIndex(a);
} else if (swizzle_component_count == 2) {
uint32_t a = ((swizzle >> 6) + 3) & 0x3;
uint32_t b = ((swizzle >> 0) + 0) & 0x3;
out_op->components[0] = GetSwizzleFromComponentIndex(a);
out_op->components[1] = GetSwizzleFromComponentIndex(b);
} else if (swizzle_component_count == 3) {
assert_always();
} else if (swizzle_component_count == 4) {
for (int j = 0; j < swizzle_component_count; ++j, swizzle >>= 2) {
out_op->components[j] = GetSwizzleFromComponentIndex((swizzle + j) & 0x3);
}
}
}
void ParseAluInstructionOperandSpecial(const AluInstruction& op,
InstructionStorageSource storage_source,
uint32_t reg, bool negate,
int const_slot, uint32_t swizzle,
InstructionOperand* out_op) {
out_op->is_negated = negate;
out_op->is_absolute_value = op.abs_constants();
out_op->storage_source = storage_source;
if (storage_source == InstructionStorageSource::kRegister) {
out_op->storage_index = reg & 0x7F;
} else {
out_op->storage_index = reg;
if ((const_slot == 0 && op.is_const_0_addressed()) ||
(const_slot == 1 && op.is_const_1_addressed())) {
if (op.is_address_relative()) {
out_op->storage_addressing_mode =
InstructionStorageAddressingMode::kAddressAbsolute;
} else {
out_op->storage_addressing_mode =
InstructionStorageAddressingMode::kAddressRelative;
}
} else {
out_op->storage_addressing_mode =
InstructionStorageAddressingMode::kStatic;
}
}
out_op->component_count = 1;
uint32_t a = swizzle & 0x3;
out_op->components[0] = GetSwizzleFromComponentIndex(a);
}
void ShaderTranslator::ParseAluVectorInstruction(
const AluInstruction& op, const AluOpcodeInfo& opcode_info,
ParsedAluInstruction& i) {
i.dword_index = 0;
i.type = ParsedAluInstruction::Type::kVector;
i.vector_opcode = op.vector_opcode();
i.opcode_name = opcode_info.name;
i.is_paired = op.has_scalar_op();
i.is_predicated = op.is_predicated();
i.predicate_condition = op.predicate_condition();
i.result.is_export = op.is_export();
i.result.is_clamped = op.vector_clamp();
i.result.storage_target = InstructionStorageTarget::kRegister;
i.result.storage_index = 0;
uint32_t dest_num = op.vector_dest();
if (!op.is_export()) {
assert_true(dest_num < 32);
i.result.storage_target = InstructionStorageTarget::kRegister;
i.result.storage_index = dest_num;
i.result.storage_addressing_mode =
op.is_vector_dest_relative()
? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
} else if (is_vertex_shader()) {
switch (dest_num) {
case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress;
break;
case 33:
case 34:
case 35:
case 36:
case 37:
i.result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData;
break;
case 62:
i.result.storage_target = InstructionStorageTarget::kPosition;
break;
case 63:
i.result.storage_target = InstructionStorageTarget::kPointSize;
break;
default:
if (dest_num < 16) {
i.result.storage_target = InstructionStorageTarget::kInterpolant;
i.result.storage_index = dest_num;
} else {
// Unimplemented.
// assert_always();
XELOGE(
"ShaderTranslator::ParseAluVectorInstruction: Unsupported write "
"to export %d",
dest_num);
i.result.storage_target = InstructionStorageTarget::kNone;
i.result.storage_index = 0;
}
break;
}
} else if (is_pixel_shader()) {
switch (dest_num) {
case 0:
case 63: // ? masked?
i.result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 0;
break;
case 1:
i.result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 1;
break;
case 2:
i.result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 2;
break;
case 3:
i.result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 3;
break;
case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress;
break;
case 33:
case 34:
case 35:
case 36:
case 37:
i.result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData;
break;
case 61:
i.result.storage_target = InstructionStorageTarget::kDepth;
break;
default:
XELOGE(
"ShaderTranslator::ParseAluVectorInstruction: Unsupported write "
"to export %d",
dest_num);
i.result.storage_target = InstructionStorageTarget::kNone;
i.result.storage_index = 0;
}
}
if (op.is_export()) {
uint32_t write_mask = op.vector_write_mask();
uint32_t const_1_mask = op.scalar_write_mask();
if (!write_mask) {
for (int j = 0; j < 4; ++j) {
i.result.write_mask[j] = false;
}
} else {
for (int j = 0; j < 4; ++j, write_mask >>= 1, const_1_mask >>= 1) {
i.result.write_mask[j] = true;
if (write_mask & 0x1) {
if (const_1_mask & 0x1) {
i.result.components[j] = SwizzleSource::k1;
} else {
i.result.components[j] = GetSwizzleFromComponentIndex(j);
}
} else {
if (op.is_scalar_dest_relative()) {
i.result.components[j] = SwizzleSource::k0;
} else {
i.result.write_mask[j] = false;
}
}
}
}
} else {
uint32_t write_mask = op.vector_write_mask();
for (int j = 0; j < 4; ++j, write_mask >>= 1) {
i.result.write_mask[j] = (write_mask & 0x1) == 0x1;
i.result.components[j] = GetSwizzleFromComponentIndex(j);
}
}
i.operand_count = opcode_info.argument_count;
for (int j = 0; j < i.operand_count; ++j) {
ParseAluInstructionOperand(
op, j + 1, opcode_info.src_swizzle_component_count, &i.operands[j]);
// Track constant float register loads.
if (i.operands[j].storage_source ==
InstructionStorageSource::kConstantFloat) {
if (i.operands[j].storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic) {
// Dynamic addressing makes all constants required.
std::memset(constant_register_map_.float_bitmap, 0xFF,
sizeof(constant_register_map_.float_bitmap));
} else {
auto register_index = i.operands[j].storage_index;
constant_register_map_.float_bitmap[register_index / 64] |=
1ull << (register_index % 64);
}
}
}
i.Disassemble(&ucode_disasm_buffer_);
}
void ShaderTranslator::ParseAluScalarInstruction(
const AluInstruction& op, const AluOpcodeInfo& opcode_info,
ParsedAluInstruction& i) {
i.dword_index = 0;
i.type = ParsedAluInstruction::Type::kScalar;
i.scalar_opcode = op.scalar_opcode();
i.opcode_name = opcode_info.name;
i.is_paired = op.has_vector_op();
i.is_predicated = op.is_predicated();
i.predicate_condition = op.predicate_condition();
uint32_t dest_num;
uint32_t write_mask;
if (op.is_export()) {
dest_num = op.vector_dest();
write_mask = op.scalar_write_mask() & ~op.vector_write_mask();
} else {
dest_num = op.scalar_dest();
write_mask = op.scalar_write_mask();
}
i.result.is_export = op.is_export();
i.result.is_clamped = op.scalar_clamp();
i.result.storage_target = InstructionStorageTarget::kRegister;
i.result.storage_index = 0;
if (!op.is_export()) {
assert_true(dest_num < 32);
i.result.storage_target = InstructionStorageTarget::kRegister;
i.result.storage_index = dest_num;
i.result.storage_addressing_mode =
op.is_scalar_dest_relative()
? InstructionStorageAddressingMode::kAddressRelative
: InstructionStorageAddressingMode::kStatic;
} else if (is_vertex_shader()) {
switch (dest_num) {
case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress;
break;
case 33:
case 34:
case 35:
case 36:
case 37:
i.result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData;
break;
case 62:
i.result.storage_target = InstructionStorageTarget::kPosition;
break;
case 63:
i.result.storage_target = InstructionStorageTarget::kPointSize;
break;
default:
if (dest_num < 16) {
i.result.storage_target = InstructionStorageTarget::kInterpolant;
i.result.storage_index = dest_num;
} else {
// Unimplemented.
// assert_always();
XELOGE(
"ShaderTranslator::ParseAluScalarInstruction: Unsupported write "
"to export %d",
dest_num);
i.result.storage_target = InstructionStorageTarget::kNone;
i.result.storage_index = 0;
}
break;
}
} else if (is_pixel_shader()) {
switch (dest_num) {
case 0:
case 63: // ? masked?
i.result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 0;
break;
case 1:
i.result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 1;
break;
case 2:
i.result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 2;
break;
case 3:
i.result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 3;
break;
case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress;
break;
case 33:
case 34:
case 35:
case 36:
case 37:
i.result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData;
break;
case 61:
i.result.storage_target = InstructionStorageTarget::kDepth;
break;
}
}
for (int j = 0; j < 4; ++j, write_mask >>= 1) {
i.result.write_mask[j] = (write_mask & 0x1) == 0x1;
i.result.components[j] = GetSwizzleFromComponentIndex(j);
}
i.operand_count = opcode_info.argument_count;
if (opcode_info.argument_count == 1) {
ParseAluInstructionOperand(op, 3, opcode_info.src_swizzle_component_count,
&i.operands[0]);
} else {
uint32_t src3_swizzle = op.src_swizzle(3);
uint32_t swiz_a = ((src3_swizzle >> 6) + 3) & 0x3;
uint32_t swiz_b = ((src3_swizzle >> 0) + 0) & 0x3;
uint32_t reg2 = (src3_swizzle & 0x3C) | (op.src_is_temp(3) << 1) |
(static_cast<int>(op.scalar_opcode()) & 1);
int const_slot = (op.src_is_temp(1) || op.src_is_temp(2)) ? 1 : 0;
ParseAluInstructionOperandSpecial(
op, InstructionStorageSource::kConstantFloat, op.src_reg(3),
op.src_negate(3), 0, swiz_a, &i.operands[0]);
ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister,
reg2, op.src_negate(3), const_slot,
swiz_b, &i.operands[1]);
}
// Track constant float register loads - in either case, a float constant may
// be used in operand 0.
if (i.operands[0].storage_source ==
InstructionStorageSource::kConstantFloat) {
auto register_index = i.operands[0].storage_index;
if (i.operands[0].storage_addressing_mode !=
InstructionStorageAddressingMode::kStatic) {
// Dynamic addressing makes all constants required.
std::memset(constant_register_map_.float_bitmap, 0xFF,
sizeof(constant_register_map_.float_bitmap));
} else {
constant_register_map_.float_bitmap[register_index / 64] |=
1ull << (register_index % 64);
}
}
i.Disassemble(&ucode_disasm_buffer_);
}
} // namespace gpu
} // namespace xe