diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 8e9bc6067..cc8e80690 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -18,6 +18,7 @@ #include "xenia/base/byte_stream.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/base/memory.h" #include "xenia/base/profiling.h" #include "xenia/base/ring_buffer.h" #include "xenia/gpu/gpu_flags.h" @@ -334,7 +335,8 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { return; } - regs.values[index].u32 = value; + // Volatile for the WAIT_REG_MEM loop. + const_cast(regs.values[index]) = value; if (!regs.GetRegisterInfo(index)) { XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value); } @@ -342,19 +344,20 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { // Scratch register writeback. if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) { uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0; - if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) { + if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK]) { // Enabled - write to address. - uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32; + uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR]; uint32_t mem_addr = scratch_addr + (scratch_reg * 4); xe::store_and_swap(memory_->TranslatePhysical(mem_addr), value); } } else { switch (index) { // If this is a COHER register, set the dirty flag. - // This will block the command processor the next time it WAIT_MEM_REGs + // This will block the command processor the next time it WAIT_REG_MEMs // and allow us to synchronize the memory. case XE_GPU_REG_COHER_STATUS_HOST: { - regs.values[index].u32 |= UINT32_C(0x80000000); + const_cast(regs.values[index]) |= + UINT32_C(0x80000000); } break; case XE_GPU_REG_DC_LUT_RW_INDEX: { @@ -365,12 +368,12 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { case XE_GPU_REG_DC_LUT_SEQ_COLOR: { // Should be in the 256-entry table writing mode. - assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); + assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1); auto& gamma_ramp_rw_index = regs.Get(); // DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write // enable mask is blue, green, red. bool write_gamma_ramp_component = - (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & + (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0; if (write_gamma_ramp_component) { reg::DC_LUT_30_COLOR& gamma_ramp_entry = @@ -401,14 +404,14 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { case XE_GPU_REG_DC_LUT_PWL_DATA: { // Should be in the PWL writing mode. - assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); + assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1); auto& gamma_ramp_rw_index = regs.Get(); // Bit 7 of the index is ignored for PWL. uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F; // DC_LUT_PWL_DATA is likely in the red, green, blue order because // DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red. bool write_gamma_ramp_component = - (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & + (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0; if (write_gamma_ramp_component) { reg::DC_LUT_PWL_DATA& gamma_ramp_entry = @@ -436,10 +439,10 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { case XE_GPU_REG_DC_LUT_30_COLOR: { // Should be in the 256-entry table writing mode. - assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); + assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1); auto& gamma_ramp_rw_index = regs.Get(); uint32_t gamma_ramp_write_enable_mask = - regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111; + regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111; if (gamma_ramp_write_enable_mask) { reg::DC_LUT_30_COLOR& gamma_ramp_entry = gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index]; @@ -479,10 +482,12 @@ void CommandProcessor::MakeCoherent() { // https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf // https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 - RegisterFile* regs = register_file_; - auto& status_host = regs->Get(); - auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; - auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; + // Volatile because this may be called from the WAIT_REG_MEM loop. + volatile uint32_t* regs_volatile = register_file_->values; + auto status_host = xe::memory::Reinterpret( + uint32_t(regs_volatile[XE_GPU_REG_COHER_STATUS_HOST])); + uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST]; + uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST]; if (!status_host.status) { return; @@ -502,7 +507,7 @@ void CommandProcessor::MakeCoherent() { base_host + size_host, size_host, action); // Mark coherent. - status_host.status = 0; + regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0; } void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); } @@ -940,28 +945,33 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingBuffer* reader, SCOPE_profile_cpu_f("gpu"); // wait until a register or memory location is a specific value + uint32_t wait_info = reader->ReadAndSwap(); uint32_t poll_reg_addr = reader->ReadAndSwap(); uint32_t ref = reader->ReadAndSwap(); uint32_t mask = reader->ReadAndSwap(); uint32_t wait = reader->ReadAndSwap(); + + bool is_memory = (wait_info & 0x10) != 0; + + assert_true(is_memory || poll_reg_addr < RegisterFile::kRegisterCount); + const volatile uint32_t& value_ref = + is_memory ? *reinterpret_cast(memory_->TranslatePhysical( + poll_reg_addr & ~uint32_t(0x3))) + : register_file_->values[poll_reg_addr]; + bool matched = false; do { - uint32_t value; - if (wait_info & 0x10) { - // Memory. - auto endianness = static_cast(poll_reg_addr & 0x3); - poll_reg_addr &= ~0x3; - value = xe::load(memory_->TranslatePhysical(poll_reg_addr)); - value = GpuSwap(value, endianness); - trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4); + uint32_t value = value_ref; + if (is_memory) { + trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr & ~uint32_t(0x3)), + sizeof(uint32_t)); + value = xenos::GpuSwap(value, + static_cast(poll_reg_addr & 0x3)); } else { - // Register. - assert_true(poll_reg_addr < RegisterFile::kRegisterCount); - value = register_file_->values[poll_reg_addr].u32; if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) { MakeCoherent(); - value = register_file_->values[poll_reg_addr].u32; + value = value_ref; } } switch (wait_info & 0x7) { @@ -1024,17 +1034,17 @@ bool CommandProcessor::ExecutePacketType3_REG_RMW(RingBuffer* reader, uint32_t rmw_info = reader->ReadAndSwap(); uint32_t and_mask = reader->ReadAndSwap(); uint32_t or_mask = reader->ReadAndSwap(); - uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32; + uint32_t value = register_file_->values[rmw_info & 0x1FFF]; if ((rmw_info >> 31) & 0x1) { // & reg - value &= register_file_->values[and_mask & 0x1FFF].u32; + value &= register_file_->values[and_mask & 0x1FFF]; } else { // & imm value &= and_mask; } if ((rmw_info >> 30) & 0x1) { // | reg - value |= register_file_->values[or_mask & 0x1FFF].u32; + value |= register_file_->values[or_mask & 0x1FFF]; } else { // | imm value |= or_mask; @@ -1055,7 +1065,7 @@ bool CommandProcessor::ExecutePacketType3_REG_TO_MEM(RingBuffer* reader, uint32_t reg_val; assert_true(reg_addr < RegisterFile::kRegisterCount); - reg_val = register_file_->values[reg_addr].u32; + reg_val = register_file_->values[reg_addr]; auto endianness = static_cast(mem_addr & 0x3); mem_addr &= ~0x3; @@ -1105,7 +1115,7 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingBuffer* reader, } else { // Register. assert_true(poll_reg_addr < RegisterFile::kRegisterCount); - value = register_file_->values[poll_reg_addr].u32; + value = register_file_->values[poll_reg_addr]; } bool matched = false; switch (wait_info & 0x7) { @@ -1240,7 +1250,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader, if (fake_sample_count >= 0) { auto* pSampleCounts = memory_->TranslatePhysical( - register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32); + register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]); // 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END // and used to detect a finished query. bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished && @@ -1599,10 +1609,10 @@ bool CommandProcessor::ExecutePacketType3_VIZ_QUERY(RingBuffer* reader, // The scan converter writes the internal result back to the register here. // We just fake it and say it was visible in case it is read back. if (id < 32) { - register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0].u32 |= - uint32_t(1) << id; + register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0] |= uint32_t(1) + << id; } else { - register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1].u32 |= + register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1] |= uint32_t(1) << (id - 32); } } @@ -1614,9 +1624,8 @@ void CommandProcessor::InitializeTrace() { // Write the initial register values, to be loaded directly into the // RegisterFile since all registers, including those that may have side // effects on setting, will be saved. - trace_writer_.WriteRegisters( - 0, reinterpret_cast(register_file_->values), - RegisterFile::kRegisterCount, false); + trace_writer_.WriteRegisters(0, register_file_->values, + RegisterFile::kRegisterCount, false); trace_writer_.WriteGammaRamp(gamma_ramp_256_entry_table(), gamma_ramp_pwl_rgb(), gamma_ramp_rw_component_); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 814a74a7c..93589cf7b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -17,6 +17,7 @@ #include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/base/memory.h" #include "xenia/base/profiling.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h" @@ -2306,8 +2307,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) { vfetch_bits_remaining &= ~(uint32_t(1) << j); uint32_t vfetch_index = i * 32 + j; - const auto& vfetch_constant = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + xenos::xe_gpu_vertex_fetch_t vfetch_constant = + regs.GetVertexFetch(vfetch_index); switch (vfetch_constant.type) { case xenos::FetchConstantType::kVertex: break; @@ -3050,10 +3051,10 @@ void D3D12CommandProcessor::UpdateFixedFunctionState( // Blend factor. float blend_factor[] = { - regs[XE_GPU_REG_RB_BLEND_RED].f32, - regs[XE_GPU_REG_RB_BLEND_GREEN].f32, - regs[XE_GPU_REG_RB_BLEND_BLUE].f32, - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, + regs.Get(XE_GPU_REG_RB_BLEND_RED), + regs.Get(XE_GPU_REG_RB_BLEND_GREEN), + regs.Get(XE_GPU_REG_RB_BLEND_BLUE), + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA), }; // std::memcmp instead of != so in case of NaN, every draw won't be // invalidating it. @@ -3100,7 +3101,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( auto pa_cl_clip_cntl = regs.Get(); auto pa_cl_vte_cntl = regs.Get(); auto pa_su_sc_mode_cntl = regs.Get(); - float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; + auto rb_alpha_ref = regs.Get(XE_GPU_REG_RB_ALPHA_REF); auto rb_colorcontrol = regs.Get(); auto rb_depth_info = regs.Get(); auto rb_stencilrefmask = regs.Get(); @@ -3241,9 +3242,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // Tessellation factor range, plus 1.0 according to the images in // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 float tessellation_factor_min = - regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f; + regs.Get(XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL) + 1.0f; float tessellation_factor_max = - regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f; + regs.Get(XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL) + 1.0f; dirty |= system_constants_.tessellation_factor_range_min != tessellation_factor_min; system_constants_.tessellation_factor_range_min = tessellation_factor_min; @@ -3280,12 +3281,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( while (xe::bit_scan_forward(user_clip_planes_remaining, &user_clip_plane_index)) { user_clip_planes_remaining &= ~(UINT32_C(1) << user_clip_plane_index); - const float* user_clip_plane = - ®s[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4].f32; - if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane, + const void* user_clip_plane_regs = + ®s[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4]; + if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane_regs, 4 * sizeof(float))) { dirty = true; - std::memcpy(user_clip_plane_write_ptr, user_clip_plane, + std::memcpy(user_clip_plane_write_ptr, user_clip_plane_regs, 4 * sizeof(float)); } user_clip_plane_write_ptr += 4; @@ -3423,9 +3424,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( color_exp_bias -= 5; } } - float color_exp_bias_scale; - *reinterpret_cast(&color_exp_bias_scale) = - 0x3F800000 + (color_exp_bias << 23); + auto color_exp_bias_scale = xe::memory::Reinterpret( + int32_t(0x3F800000 + (color_exp_bias << 23))); dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale; system_constants_.color_exp_bias[i] = color_exp_bias_scale; if (edram_rov_used) { @@ -3454,7 +3454,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], 4 * sizeof(float)); uint32_t blend_factors_ops = - regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; + regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF; dirty |= system_constants_.edram_rt_blend_factors_ops[i] != blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; @@ -3477,22 +3477,22 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( if (primitive_polygonal) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) { poly_offset_front_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); poly_offset_front_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); } if (pa_su_sc_mode_cntl.poly_offset_back_enable) { poly_offset_back_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE); poly_offset_back_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET); } } else { if (pa_su_sc_mode_cntl.poly_offset_para_enable) { poly_offset_front_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); poly_offset_front_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); poly_offset_back_scale = poly_offset_front_scale; poly_offset_back_offset = poly_offset_front_offset; } @@ -3567,21 +3567,21 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } dirty |= system_constants_.edram_blend_constant[0] != - regs[XE_GPU_REG_RB_BLEND_RED].f32; + regs.Get(XE_GPU_REG_RB_BLEND_RED); system_constants_.edram_blend_constant[0] = - regs[XE_GPU_REG_RB_BLEND_RED].f32; + regs.Get(XE_GPU_REG_RB_BLEND_RED); dirty |= system_constants_.edram_blend_constant[1] != - regs[XE_GPU_REG_RB_BLEND_GREEN].f32; + regs.Get(XE_GPU_REG_RB_BLEND_GREEN); system_constants_.edram_blend_constant[1] = - regs[XE_GPU_REG_RB_BLEND_GREEN].f32; + regs.Get(XE_GPU_REG_RB_BLEND_GREEN); dirty |= system_constants_.edram_blend_constant[2] != - regs[XE_GPU_REG_RB_BLEND_BLUE].f32; + regs.Get(XE_GPU_REG_RB_BLEND_BLUE); system_constants_.edram_blend_constant[2] = - regs[XE_GPU_REG_RB_BLEND_BLUE].f32; + regs.Get(XE_GPU_REG_RB_BLEND_BLUE); dirty |= system_constants_.edram_blend_constant[3] != - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA); system_constants_.edram_blend_constant[3] = - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA); } cbuffer_binding_system_.up_to_date &= !dirty; @@ -3638,10 +3638,10 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, // These are the constant base addresses/ranges for shaders. // We have these hardcoded right now cause nothing seems to differ on the Xbox // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). - assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || - regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || - regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 || + regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 || + regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000); // Check if the float constant layout is still the same and get the counts. const Shader::ConstantRegisterMap& float_constant_map_vertex = vertex_shader->constant_register_map(); @@ -3715,8 +3715,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(float_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + - (float_constant_index << 2)] - .f32, + (float_constant_index << 2)], 4 * sizeof(float)); float_constants += 4 * sizeof(float); } @@ -3746,8 +3745,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(float_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + - (float_constant_index << 2)] - .f32, + (float_constant_index << 2)], 4 * sizeof(float)); float_constants += 4 * sizeof(float); } @@ -3767,7 +3765,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, return false; } std::memcpy(bool_loop_constants, - ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031], kBoolLoopConstantsSize); cbuffer_binding_bool_loop_.up_to_date = true; current_graphics_root_up_to_date_ &= @@ -3782,8 +3780,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, if (fetch_constants == nullptr) { return false; } - std::memcpy(fetch_constants, - ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, + std::memcpy(fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0], kFetchConstantsSize); cbuffer_binding_fetch_.up_to_date = true; current_graphics_root_up_to_date_ &= diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index 24904c7e8..9e3b794d1 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -960,8 +960,8 @@ uint32_t D3D12TextureCache::GetActiveTextureBindlessSRVIndex( D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters( const D3D12Shader::SamplerBinding& binding) const { const auto& regs = register_file(); - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); + xenos::xe_gpu_texture_fetch_t fetch = + regs.GetTextureFetch(binding.fetch_constant); SamplerParameters parameters; @@ -1441,8 +1441,7 @@ ID3D12Resource* D3D12TextureCache::RequestSwapTexture( D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out, xenos::TextureFormat& format_out) { const auto& regs = register_file(); - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); + xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0); TextureKey key; BindingInfoFromFetchConstant(fetch, key, nullptr); if (!key.is_valid || key.base_page == 0 || diff --git a/src/xenia/gpu/draw_extent_estimator.cc b/src/xenia/gpu/draw_extent_estimator.cc index fb65fb96b..20c6086ee 100644 --- a/src/xenia/gpu/draw_extent_estimator.cc +++ b/src/xenia/gpu/draw_extent_estimator.cc @@ -15,6 +15,7 @@ #include "xenia/base/assert.h" #include "xenia/base/cvar.h" +#include "xenia/base/memory.h" #include "xenia/base/profiling.h" #include "xenia/gpu/registers.h" #include "xenia/gpu/ucode.h" @@ -67,7 +68,7 @@ void DrawExtentEstimator::PositionYExportSink::Export( point_size_ = value[0]; } if (value_mask & 0b0100) { - vertex_kill_ = *reinterpret_cast(&value[2]); + vertex_kill_ = xe::memory::Reinterpret(value[2]); } } } @@ -110,7 +111,7 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) { xenos::Endian index_endian = vgt_dma_size.swap_mode; if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) { xenos::IndexFormat index_format = vgt_draw_initiator.index_size; - uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32; + uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE]; uint32_t index_buffer_read_count = std::min(uint32_t(vgt_draw_initiator.num_indices), uint32_t(vgt_dma_size.num_words)); @@ -145,21 +146,22 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) { auto pa_cl_vte_cntl = regs.Get(); float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YSCALE) : 1.0f; - float viewport_y_offset = pa_cl_vte_cntl.vport_y_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 - : 0.0f; + float viewport_y_offset = + pa_cl_vte_cntl.vport_y_offset_ena + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YOFFSET) + : 0.0f; int32_t point_vertex_min_diameter_float = 0; int32_t point_vertex_max_diameter_float = 0; float point_constant_radius_y = 0.0f; if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) { auto pa_su_point_minmax = regs.Get(); - *reinterpret_cast(&point_vertex_min_diameter_float) = - float(pa_su_point_minmax.min_size) * (2.0f / 16.0f); - *reinterpret_cast(&point_vertex_max_diameter_float) = - float(pa_su_point_minmax.max_size) * (2.0f / 16.0f); + point_vertex_min_diameter_float = xe::memory::Reinterpret( + float(pa_su_point_minmax.min_size) * (2.0f / 16.0f)); + point_vertex_max_diameter_float = xe::memory::Reinterpret( + float(pa_su_point_minmax.max_size) * (2.0f / 16.0f)); point_constant_radius_y = float(regs.Get().height) * (1.0f / 16.0f); } @@ -224,12 +226,13 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) { // Vertex-specified diameter. Clamped effectively as a signed integer in // the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN // to the maximum. - point_radius_y = position_y_export_sink.point_size().value(); - *reinterpret_cast(&point_radius_y) = std::min( - point_vertex_max_diameter_float, - std::max(point_vertex_min_diameter_float, - *reinterpret_cast(&point_radius_y))); - point_radius_y *= 0.5f; + point_radius_y = + 0.5f * + xe::memory::Reinterpret(std::min( + point_vertex_max_diameter_float, + std::max(point_vertex_min_diameter_float, + xe::memory::Reinterpret( + position_y_export_sink.point_size().value())))); } else { // Constant radius. point_radius_y = point_constant_radius_y; @@ -331,11 +334,12 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y, } // Then apply the floating-point viewport offset. if (pa_cl_vte_cntl.vport_y_offset_ena) { - viewport_bottom += regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; + viewport_bottom += regs.Get(XE_GPU_REG_PA_CL_VPORT_YOFFSET); } - viewport_bottom += pa_cl_vte_cntl.vport_y_scale_ena - ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) - : 1.0f; + viewport_bottom += + pa_cl_vte_cntl.vport_y_scale_ena + ? std::abs(regs.Get(XE_GPU_REG_PA_CL_VPORT_YSCALE)) + : 1.0f; // Using floor, or, rather, truncation (because maxing with zero anyway) // similar to how viewport scissoring behaves on real AMD, Intel and Nvidia // GPUs on Direct3D 12 (but not WARP), also like in diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index eb61c39cb..73494a7f2 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -11,7 +11,6 @@ #include #include -#include #include "xenia/base/assert.h" #include "xenia/base/cvar.h" @@ -100,20 +99,20 @@ void GetPreferredFacePolygonOffset(const RegisterFile& regs, // ones that are rendered (except for shadow volumes). if (pa_su_sc_mode_cntl.poly_offset_front_enable && !pa_su_sc_mode_cntl.cull_front) { - scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; - offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + scale = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); + offset = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); } if (pa_su_sc_mode_cntl.poly_offset_back_enable && !pa_su_sc_mode_cntl.cull_back && !scale && !offset) { - scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; - offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; + scale = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE); + offset = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET); } } else { // Non-triangle primitives use the front offset, but it's toggled via // poly_offset_para_enable. if (pa_su_sc_mode_cntl.poly_offset_para_enable) { - scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; - offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + scale = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); + offset = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); } } scale_out = scale; @@ -148,7 +147,7 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader, } // Check if a color target is actually written. - uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; + uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK]; uint32_t rts_remaining = shader.writes_color_targets(); uint32_t rt_index; while (xe::bit_scan_forward(rts_remaining, &rt_index)) { @@ -311,24 +310,26 @@ void GetHostViewportInfo(const RegisterFile& regs, // Obtain the original viewport values in a normalized way. float scale_xy[] = { - pa_cl_vte_cntl.vport_x_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 - : 1.0f, - pa_cl_vte_cntl.vport_y_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 - : 1.0f, + pa_cl_vte_cntl.vport_x_scale_ena + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_XSCALE) + : 1.0f, + pa_cl_vte_cntl.vport_y_scale_ena + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YSCALE) + : 1.0f, }; float scale_z = pa_cl_vte_cntl.vport_z_scale_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_ZSCALE) : 1.0f; float offset_base_xy[] = { pa_cl_vte_cntl.vport_x_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_XOFFSET) : 0.0f, pa_cl_vte_cntl.vport_y_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YOFFSET) : 0.0f, }; float offset_z = pa_cl_vte_cntl.vport_z_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_ZOFFSET) : 0.0f; // Calculate all the integer.0 or integer.5 offsetting exactly at full // precision, separately so it can be used in other integer calculations @@ -615,7 +616,7 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs, return 0; } uint32_t normalized_color_mask = 0; - uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; + uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK]; for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { // Exclude the render targets not statically written to by the pixel shader. // If the shader doesn't write to a render target, it shouldn't be written @@ -661,9 +662,8 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader, ? regs.Get().base : regs.Get().base; for (uint32_t constant_index : shader.memexport_stream_constants()) { - const auto& stream = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_000_X + - (float_constants_base + constant_index) * 4); + xenos::xe_gpu_memexport_stream_t stream = + regs.GetMemExportStream(float_constants_base + constant_index); if (!stream.index_count) { continue; } @@ -705,7 +705,7 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader, } // Add a new range if haven't expanded an existing one. if (!range_reused) { - ranges_out.emplace_back(stream.base_address, stream_size_bytes); + ranges_out.emplace_back(uint32_t(stream.base_address), stream_size_bytes); } } } @@ -824,8 +824,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, // Get the extent of pixels covered by the resolve rectangle, according to the // top-left rasterization rule. // D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU. - auto fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); + xenos::xe_gpu_vertex_fetch_t fetch = regs.GetVertexFetch(0); if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) { XELOGE("Unsupported resolve vertex buffer format"); assert_always(); @@ -994,7 +993,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, } // Calculate the destination memory extent. - uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; + uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE]; uint32_t copy_dest_base_adjusted = rb_copy_dest_base; uint32_t copy_dest_extent_start, copy_dest_extent_end; auto rb_copy_dest_pitch = regs.Get(); @@ -1164,9 +1163,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, info_out.copy_dest_info.copy_dest_swap = false; } - info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; - info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; - info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; + info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR]; + info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR]; + info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO]; XELOGD( "Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially " diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h index 57b8511c6..d2bef4458 100644 --- a/src/xenia/gpu/dxbc.h +++ b/src/xenia/gpu/dxbc.h @@ -17,6 +17,7 @@ #include "xenia/base/assert.h" #include "xenia/base/math.h" +#include "xenia/base/memory.h" namespace xe { namespace gpu { @@ -1102,10 +1103,10 @@ struct Src : OperandAddress { } static Src LI(int32_t x) { return LI(x, x, x, x); } static Src LF(float x, float y, float z, float w) { - return LU(*reinterpret_cast(&x), - *reinterpret_cast(&y), - *reinterpret_cast(&z), - *reinterpret_cast(&w)); + return LU(xe::memory::Reinterpret(x), + xe::memory::Reinterpret(y), + xe::memory::Reinterpret(z), + xe::memory::Reinterpret(w)); } static Src LF(float x) { return LF(x, x, x, x); } static Src LP(const uint32_t* xyzw) { @@ -1222,12 +1223,10 @@ struct Src : OperandAddress { bool negate) { if (is_integer) { if (absolute) { - *reinterpret_cast(&value) = - std::abs(*reinterpret_cast(&value)); + value = uint32_t(std::abs(int32_t(value))); } if (negate) { - *reinterpret_cast(&value) = - -*reinterpret_cast(&value); + value = uint32_t(-int32_t(value)); } } else { if (absolute) { diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index b5470fd0a..3c04e0fff 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -201,7 +201,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) { } assert_true(r < RegisterFile::kRegisterCount); - return register_file_.values[r].u32; + return register_file_.values[r]; } void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) { @@ -219,7 +219,7 @@ void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) { } assert_true(r < RegisterFile::kRegisterCount); - register_file_.values[r].u32 = value; + register_file_.values[r] = value; } void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) { diff --git a/src/xenia/gpu/packet_disassembler.h b/src/xenia/gpu/packet_disassembler.h index 942a88409..c4572b928 100644 --- a/src/xenia/gpu/packet_disassembler.h +++ b/src/xenia/gpu/packet_disassembler.h @@ -42,7 +42,7 @@ struct PacketAction { union { struct { uint32_t index; - RegisterFile::RegisterValue value; + uint32_t value; } register_write; struct { uint64_t value; @@ -56,7 +56,7 @@ struct PacketAction { PacketAction action; action.type = Type::kRegisterWrite; action.register_write.index = index; - action.register_write.value.u32 = value; + action.register_write.value = value; return action; } diff --git a/src/xenia/gpu/primitive_processor.cc b/src/xenia/gpu/primitive_processor.cc index 827fb7b4e..9e20be2c4 100644 --- a/src/xenia/gpu/primitive_processor.cc +++ b/src/xenia/gpu/primitive_processor.cc @@ -498,8 +498,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) { uint32_t index_size_log2 = guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2; // The base should already be aligned, but aligning here too for safety. - guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 & - ~uint32_t((1 << index_size_log2) - 1); + guest_index_base = + regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1); guest_index_buffer_needed_bytes = guest_draw_vertex_count << index_size_log2; if (guest_index_base > SharedMemory::kBufferSize || @@ -652,8 +652,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) { uint32_t index_size_log2 = guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2; // The base should already be aligned, but aligning here too for safety. - guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 & - ~uint32_t((1 << index_size_log2) - 1); + guest_index_base = + regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1); guest_index_buffer_needed_bytes = guest_draw_vertex_count << index_size_log2; if (guest_index_base > SharedMemory::kBufferSize || diff --git a/src/xenia/gpu/register_file.h b/src/xenia/gpu/register_file.h index e9a4f1137..40870810f 100644 --- a/src/xenia/gpu/register_file.h +++ b/src/xenia/gpu/register_file.h @@ -12,8 +12,12 @@ #include #include +#include +#include "xenia/base/assert.h" +#include "xenia/base/memory.h" #include "xenia/gpu/registers.h" +#include "xenia/gpu/xenos.h" namespace xe { namespace gpu { @@ -34,39 +38,53 @@ class RegisterFile { static const RegisterInfo* GetRegisterInfo(uint32_t index); static constexpr size_t kRegisterCount = 0x5003; - union RegisterValue { - uint32_t u32; - float f32; - }; - RegisterValue values[kRegisterCount]; + uint32_t values[kRegisterCount]; + + const uint32_t& operator[](uint32_t reg) const { return values[reg]; } + uint32_t& operator[](uint32_t reg) { return values[reg]; } - const RegisterValue& operator[](uint32_t reg) const { return values[reg]; } - RegisterValue& operator[](uint32_t reg) { return values[reg]; } - const RegisterValue& operator[](Register reg) const { return values[reg]; } - RegisterValue& operator[](Register reg) { return values[reg]; } template - const T& Get(uint32_t reg) const { - return *reinterpret_cast(&values[reg]); + T Get(uint32_t reg) const { + return xe::memory::Reinterpret(values[reg]); } template - T& Get(uint32_t reg) { - return *reinterpret_cast(&values[reg]); + T Get(Register reg) const { + return Get(static_cast(reg)); } template - const T& Get(Register reg) const { - return *reinterpret_cast(&values[reg]); + T Get() const { + return Get(T::register_index); } - template - T& Get(Register reg) { - return *reinterpret_cast(&values[reg]); + + xenos::xe_gpu_vertex_fetch_t GetVertexFetch(uint32_t index) const { + assert_true(index < 96); + xenos::xe_gpu_vertex_fetch_t fetch; + std::memcpy(&fetch, + &values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + (sizeof(fetch) / sizeof(uint32_t)) * index], + sizeof(fetch)); + return fetch; } - template - const T& Get() const { - return *reinterpret_cast(&values[T::register_index]); + + xenos::xe_gpu_texture_fetch_t GetTextureFetch(uint32_t index) const { + assert_true(index < 32); + xenos::xe_gpu_texture_fetch_t fetch; + std::memcpy(&fetch, + &values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + (sizeof(fetch) / sizeof(uint32_t)) * index], + sizeof(fetch)); + return fetch; } - template - T& Get() { - return *reinterpret_cast(&values[T::register_index]); + + xenos::xe_gpu_memexport_stream_t GetMemExportStream( + uint32_t float_constant_index) const { + assert_true(float_constant_index < 512); + xenos::xe_gpu_memexport_stream_t stream; + std::memcpy( + &stream, + &values[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * float_constant_index], + sizeof(stream)); + return stream; } }; diff --git a/src/xenia/gpu/shader_interpreter.cc b/src/xenia/gpu/shader_interpreter.cc index 9e1084397..9a1342aca 100644 --- a/src/xenia/gpu/shader_interpreter.cc +++ b/src/xenia/gpu/shader_interpreter.cc @@ -28,10 +28,7 @@ void ShaderInterpreter::Execute() { state_.Reset(); const uint32_t* bool_constants = - ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32; - const xenos::LoopConstant* loop_constants = - reinterpret_cast( - ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32); + ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031]; bool exec_ended = false; uint32_t cf_index_next = 1; @@ -140,8 +137,8 @@ void ShaderInterpreter::Execute() { cf_index_next = cf_loop_start.address(); continue; } - xenos::LoopConstant loop_constant = - loop_constants[cf_loop_start.loop_id()]; + auto loop_constant = register_file_.Get( + XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + cf_loop_start.loop_id()); state_.loop_constants[state_.loop_stack_depth] = loop_constant; uint32_t& loop_iterator_ref = state_.loop_iterators[state_.loop_stack_depth]; @@ -170,8 +167,11 @@ void ShaderInterpreter::Execute() { &cf_instr); xenos::LoopConstant loop_constant = state_.loop_constants[state_.loop_stack_depth - 1]; - assert_true(loop_constant.value == - loop_constants[cf_loop_end.loop_id()].value); + assert_zero( + std::memcmp(&loop_constant, + ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + + cf_loop_end.loop_id()], + sizeof(loop_constant))); uint32_t loop_iterator = ++state_.loop_iterators[state_.loop_stack_depth - 1]; if (loop_iterator < loop_constant.count && @@ -257,28 +257,31 @@ void ShaderInterpreter::Execute() { } } -const float* ShaderInterpreter::GetFloatConstant( +const std::array ShaderInterpreter::GetFloatConstant( uint32_t address, bool is_relative, bool relative_address_is_a0) const { - static const float zero[4] = {}; int32_t index = int32_t(address); if (is_relative) { index += relative_address_is_a0 ? state_.address_register : state_.GetLoopAddress(); } if (index < 0) { - return zero; + return std::array(); } auto base_and_size_minus_1 = register_file_.Get( shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST : XE_GPU_REG_SQ_PS_CONST); if (uint32_t(index) > base_and_size_minus_1.size) { - return zero; + return std::array(); } index += base_and_size_minus_1.base; if (index >= 512) { - return zero; + return std::array(); } - return ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index].f32; + std::array value; + std::memcpy(value.data(), + ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index], + sizeof(float) * 4); + return value; } void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { @@ -297,6 +300,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { const float* vector_src_ptr; uint32_t vector_src_register = instr.src_reg(1 + i); bool vector_src_absolute = false; + std::array vector_src_float_constant; if (instr.src_is_temp(1 + i)) { vector_src_ptr = GetTempRegister( ucode::AluInstruction::src_temp_reg(vector_src_register), @@ -304,9 +308,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute( vector_src_register); } else { - vector_src_ptr = GetFloatConstant( + vector_src_float_constant = GetFloatConstant( vector_src_register, instr.src_const_is_addressed(1 + i), instr.is_const_address_register_relative()); + vector_src_ptr = vector_src_float_constant.data(); } uint32_t vector_src_absolute_mask = ~(uint32_t(vector_src_absolute) << 31); @@ -618,6 +623,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { // r#/c#.w or r#/c#.wx. const float* scalar_src_ptr; uint32_t scalar_src_register = instr.src_reg(3); + std::array scalar_src_float_constant; if (instr.src_is_temp(3)) { scalar_src_ptr = GetTempRegister( ucode::AluInstruction::src_temp_reg(scalar_src_register), @@ -625,9 +631,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute( scalar_src_register); } else { - scalar_src_ptr = GetFloatConstant( + scalar_src_float_constant = GetFloatConstant( scalar_src_register, instr.src_const_is_addressed(3), instr.is_const_address_register_relative()); + scalar_src_ptr = scalar_src_float_constant.data(); } uint32_t scalar_src_swizzle = instr.src_swizzle(3); scalar_operand_component_count = @@ -984,10 +991,8 @@ void ShaderInterpreter::ExecuteVertexFetchInstruction( state_.vfetch_full_last = instr; } - xenos::xe_gpu_vertex_fetch_t fetch_constant = - *reinterpret_cast( - ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - state_.vfetch_full_last.fetch_constant_index()]); + xenos::xe_gpu_vertex_fetch_t fetch_constant = register_file_.GetVertexFetch( + state_.vfetch_full_last.fetch_constant_index()); if (!instr.is_mini_fetch()) { // Get the part of the address that depends on vfetch_full data. diff --git a/src/xenia/gpu/shader_interpreter.h b/src/xenia/gpu/shader_interpreter.h index dca530221..47b3d957a 100644 --- a/src/xenia/gpu/shader_interpreter.h +++ b/src/xenia/gpu/shader_interpreter.h @@ -11,6 +11,7 @@ #define XENIA_GPU_SHADER_INTERPRETER_H_ #include +#include #include #include @@ -120,8 +121,8 @@ class ShaderInterpreter { float* GetTempRegister(uint32_t address, bool is_relative) { return temp_registers_[GetTempRegisterIndex(address, is_relative)]; } - const float* GetFloatConstant(uint32_t address, bool is_relative, - bool relative_address_is_a0) const; + const std::array GetFloatConstant( + uint32_t address, bool is_relative, bool relative_address_is_a0) const; void ExecuteAluInstruction(ucode::AluInstruction instr); void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle, diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc index 18fac01d9..7ba729b89 100644 --- a/src/xenia/gpu/texture_cache.cc +++ b/src/xenia/gpu/texture_cache.cc @@ -333,8 +333,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) { uint32_t index_bit = UINT32_C(1) << index; textures_remaining &= ~index_bit; TextureBinding& binding = texture_bindings_[index]; - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6); + xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(index); TextureKey old_key = binding.key; uint8_t old_swizzled_signs = binding.swizzled_signs; BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs); diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index be614eda3..178c30fc9 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -19,6 +19,7 @@ #include "xenia/base/filesystem.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/base/memory.h" #include "xenia/base/platform.h" #include "xenia/base/string.h" #include "xenia/base/system.h" @@ -357,9 +358,10 @@ void TraceViewer::DrawPacketDisassemblerUI() { ImGui::NextColumn(); if (!register_info || register_info->type == RegisterInfo::Type::kDword) { - ImGui::Text("%.8X", action.register_write.value.u32); + ImGui::Text("%.8X", action.register_write.value); } else { - ImGui::Text("%8f", action.register_write.value.f32); + ImGui::Text("%8f", xe::memory::Reinterpret( + action.register_write.value)); } ImGui::Columns(1); break; @@ -709,10 +711,8 @@ void TraceViewer::DrawTextureInfo( const Shader::TextureBinding& texture_binding) { auto& regs = *graphics_system_->register_file(); - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - texture_binding.fetch_constant * 6; - auto group = reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; + xenos::xe_gpu_texture_fetch_t fetch = + regs.GetTextureFetch(texture_binding.fetch_constant); if (fetch.type != xenos::FetchConstantType::kTexture && (!cvars::gpu_allow_invalid_fetch_constants || fetch.type != xenos::FetchConstantType::kInvalidTexture)) { @@ -780,9 +780,9 @@ void TraceViewer::DrawFailedTextureInfo( void TraceViewer::DrawVertexFetcher(Shader* shader, const Shader::VertexBinding& vertex_binding, - const xe_gpu_vertex_fetch_t* fetch) { - const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2); - uint32_t vertex_count = fetch->size / vertex_binding.stride_words; + const xe_gpu_vertex_fetch_t& fetch) { + const uint8_t* addr = memory_->TranslatePhysical(fetch.address << 2); + uint32_t vertex_count = fetch.size / vertex_binding.stride_words; int column_count = 0; for (const auto& attrib : vertex_binding.attributes) { switch (attrib.fetch_instr.attributes.data_format) { @@ -883,7 +883,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader, #define LOADEL(type, wo) \ GpuSwap(xe::load(vstart + \ (attrib.fetch_instr.attributes.offset + wo) * 4), \ - fetch->endian) + fetch.endian) switch (attrib.fetch_instr.attributes.data_format) { case xenos::VertexFormat::k_32: ImGui::Text("%.8X", LOADEL(uint32_t, 0)); @@ -1187,7 +1187,7 @@ void TraceViewer::DrawStateUI() { } auto enable_mode = - static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + static_cast(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7); const char* mode_name = "Unknown"; switch (enable_mode) { @@ -1210,7 +1210,7 @@ void TraceViewer::DrawStateUI() { break; } case ModeControl::kCopy: { - uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; + uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE]; ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(), copy_dest_base); break; @@ -1221,9 +1221,9 @@ void TraceViewer::DrawStateUI() { ImGui::BulletText("Viewport State:"); if (true) { ImGui::TreePush((const void*)0); - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; + uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL]; if ((pa_su_sc_mode_cntl >> 16) & 1) { - uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; + uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET]; int16_t window_offset_x = window_offset & 0x7FFF; int16_t window_offset_y = (window_offset >> 16) & 0x7FFF; if (window_offset_x & 0x4000) { @@ -1237,8 +1237,8 @@ void TraceViewer::DrawStateUI() { } else { ImGui::BulletText("Window Offset: disabled"); } - uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; + uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL]; + uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR]; ImGui::BulletText( "Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF, (window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF, @@ -1246,7 +1246,7 @@ void TraceViewer::DrawStateUI() { (window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF), ((window_scissor_br >> 16) & 0x7FFF) - ((window_scissor_tl >> 16) & 0x7FFF)); - uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO]; uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF; uint32_t surface_pitch = surface_info & 0x3FFF; auto surface_msaa = (surface_info >> 16) & 0x3; @@ -1258,7 +1258,7 @@ void TraceViewer::DrawStateUI() { ImGui::BulletText("Surface Pitch: %d", surface_pitch); ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz); ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]); - uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; + uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL]; bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0; bool vport_yscale_enable = (vte_control & (1 << 2)) > 0; @@ -1273,14 +1273,20 @@ void TraceViewer::DrawStateUI() { } ImGui::BulletText( "Viewport Offset: %f, %f, %f", - vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0, - vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0, - vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0); + vport_xoffset_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_XOFFSET) + : 0.0f, + vport_yoffset_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YOFFSET) + : 0.0f, + vport_zoffset_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_ZOFFSET) + : 0.0f); ImGui::BulletText( "Viewport Scale: %f, %f, %f", - vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1, - vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1, - vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1); + vport_xscale_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_XSCALE) + : 1.0f, + vport_yscale_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YSCALE) + : 1.0f, + vport_zscale_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_ZSCALE) + : 1.0f); if (!vport_xscale_enable) { ImGui::PopStyleColor(); } @@ -1290,7 +1296,7 @@ void TraceViewer::DrawStateUI() { ((vte_control >> 8) & 0x1) ? "y/w0" : "y", ((vte_control >> 9) & 0x1) ? "z/w0" : "z", ((vte_control >> 10) & 0x1) ? "w0" : "1/w0"); - uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; + uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL]; bool clip_enabled = ((clip_control >> 17) & 0x1) == 0; bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1; ImGui::BulletText("Clip Enabled: %s, DX Clip: %s", @@ -1302,11 +1308,9 @@ void TraceViewer::DrawStateUI() { ImGui::BulletText("Rasterizer State:"); if (true) { ImGui::TreePush((const void*)0); - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; - uint32_t pa_sc_screen_scissor_tl = - regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32; - uint32_t pa_sc_screen_scissor_br = - regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32; + uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL]; + uint32_t pa_sc_screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL]; + uint32_t pa_sc_screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR]; if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) { int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF; int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF; @@ -1361,7 +1365,7 @@ void TraceViewer::DrawStateUI() { } ImGui::Columns(1); - auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO]; uint32_t surface_pitch = rb_surface_info & 0x3FFF; auto surface_msaa = static_cast((rb_surface_info >> 16) & 0x3); @@ -1370,39 +1374,39 @@ void TraceViewer::DrawStateUI() { if (enable_mode != ModeControl::kDepth) { // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; - uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32; + uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL]; if ((color_control & 0x8) != 0) { ImGui::BulletText("Alpha Test: %s %.2f", kCompareFuncNames[color_control & 0x7], - regs[XE_GPU_REG_RB_ALPHA_REF].f32); + regs.Get(XE_GPU_REG_RB_ALPHA_REF)); } else { ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored); ImGui::BulletText("Alpha Test: disabled"); ImGui::PopStyleColor(); } - auto blend_color = ImVec4(regs[XE_GPU_REG_RB_BLEND_RED].f32, - regs[XE_GPU_REG_RB_BLEND_GREEN].f32, - regs[XE_GPU_REG_RB_BLEND_BLUE].f32, - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32); + auto blend_color = ImVec4(regs.Get(XE_GPU_REG_RB_BLEND_RED), + regs.Get(XE_GPU_REG_RB_BLEND_GREEN), + regs.Get(XE_GPU_REG_RB_BLEND_BLUE), + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA)); ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x, blend_color.y, blend_color.z, blend_color.w); ImGui::SameLine(); // TODO small_height (was true) parameter was removed ImGui::ColorButton(nullptr, blend_color); - uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; + uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK]; uint32_t color_info[4] = { - regs[XE_GPU_REG_RB_COLOR_INFO].u32, - regs[XE_GPU_REG_RB_COLOR1_INFO].u32, - regs[XE_GPU_REG_RB_COLOR2_INFO].u32, - regs[XE_GPU_REG_RB_COLOR3_INFO].u32, + regs[XE_GPU_REG_RB_COLOR_INFO], + regs[XE_GPU_REG_RB_COLOR1_INFO], + regs[XE_GPU_REG_RB_COLOR2_INFO], + regs[XE_GPU_REG_RB_COLOR3_INFO], }; uint32_t rb_blendcontrol[4] = { - regs[XE_GPU_REG_RB_BLENDCONTROL0].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL1].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL2].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL3].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL0], + regs[XE_GPU_REG_RB_BLENDCONTROL1], + regs[XE_GPU_REG_RB_BLENDCONTROL2], + regs[XE_GPU_REG_RB_BLENDCONTROL3], }; ImGui::Columns(2); for (int i = 0; i < xe::countof(color_info); ++i) { @@ -1511,9 +1515,9 @@ void TraceViewer::DrawStateUI() { } if (ImGui::CollapsingHeader("Depth/Stencil Target")) { - auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; - auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL]; + auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK]; + auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO]; bool uses_depth = (rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004); uint32_t stencil_ref = (rb_stencilrefmask & 0xFF); @@ -1697,10 +1701,9 @@ void TraceViewer::DrawStateUI() { draw_info.index_buffer_size, kIndexFormatNames[int(draw_info.index_format)], kEndiannessNames[int(draw_info.index_endianness)]); - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; + uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL]; if (pa_su_sc_mode_cntl & (1 << 21)) { - uint32_t reset_index = - regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32; + uint32_t reset_index = regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX]; if (draw_info.index_format == xenos::IndexFormat::kInt16) { ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF); } else { @@ -1760,30 +1763,16 @@ void TraceViewer::DrawStateUI() { auto shader = command_processor->active_vertex_shader(); if (shader) { for (const auto& vertex_binding : shader->vertex_bindings()) { - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - (vertex_binding.fetch_constant / 3) * 6; - const auto group = - reinterpret_cast(®s.values[r]); - const xe_gpu_vertex_fetch_t* fetch = nullptr; - switch (vertex_binding.fetch_constant % 3) { - case 0: - fetch = &group->vertex_fetch_0; - break; - case 1: - fetch = &group->vertex_fetch_1; - break; - case 2: - fetch = &group->vertex_fetch_2; - break; - } - assert_true(fetch->endian == xenos::Endian::k8in32); + xe_gpu_vertex_fetch_t fetch = + regs.GetVertexFetch(vertex_binding.fetch_constant); + assert_true(fetch.endian == xenos::Endian::k8in32); char tree_root_id[32]; sprintf(tree_root_id, "#vertices_root_%d", vertex_binding.fetch_constant); if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s", - vertex_binding.fetch_constant, fetch->address << 2, - fetch->size * 4, - kEndiannessNames[int(fetch->endian)])) { + vertex_binding.fetch_constant, fetch.address << 2, + fetch.size * 4, + kEndiannessNames[int(fetch.endian)])) { ImGui::BeginChild("#vertices", ImVec2(0, 300)); DrawVertexFetcher(shader, vertex_binding, fetch); ImGui::EndChild(); @@ -1831,7 +1820,7 @@ void TraceViewer::DrawStateUI() { ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6, (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6); ImGui::NextColumn(); - ImGui::Text("%.8X", regs[i].u32); + ImGui::Text("%.8X", regs[i]); ImGui::NextColumn(); } ImGui::Columns(1); @@ -1842,8 +1831,9 @@ void TraceViewer::DrawStateUI() { i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) { ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4); ImGui::NextColumn(); - ImGui::Text("%f, %f, %f, %f", regs[i + 0].f32, regs[i + 1].f32, - regs[i + 2].f32, regs[i + 3].f32); + ImGui::Text("%f, %f, %f, %f", regs.Get(i + 0), + regs.Get(i + 1), regs.Get(i + 2), + regs.Get(i + 3)); ImGui::NextColumn(); } ImGui::Columns(1); @@ -1856,7 +1846,7 @@ void TraceViewer::DrawStateUI() { (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32, (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31); ImGui::NextColumn(); - ImGui::Text("%.8X", regs[i].u32); + ImGui::Text("%.8X", regs[i]); ImGui::NextColumn(); } ImGui::Columns(1); @@ -1867,7 +1857,7 @@ void TraceViewer::DrawStateUI() { i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) { ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00); ImGui::NextColumn(); - ImGui::Text("%.8X", regs[i].u32); + ImGui::Text("%.8X", regs[i]); ImGui::NextColumn(); } ImGui::Columns(1); diff --git a/src/xenia/gpu/trace_viewer.h b/src/xenia/gpu/trace_viewer.h index 58ab16e4e..b9a988eef 100644 --- a/src/xenia/gpu/trace_viewer.h +++ b/src/xenia/gpu/trace_viewer.h @@ -122,7 +122,7 @@ class TraceViewer : public xe::ui::WindowedApp { void DrawVertexFetcher(Shader* shader, const Shader::VertexBinding& vertex_binding, - const xenos::xe_gpu_vertex_fetch_t* fetch); + const xenos::xe_gpu_vertex_fetch_t& fetch); TraceViewerWindowListener window_listener_; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 58336c901..806382e00 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2486,8 +2486,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, (uint64_t(1) << (vfetch_index & 63))) { continue; } - const auto& vfetch_constant = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + xenos::xe_gpu_vertex_fetch_t vfetch_constant = + regs.GetVertexFetch(vfetch_index); switch (vfetch_constant.type) { case xenos::FetchConstantType::kVertex: break; @@ -3285,10 +3285,10 @@ void VulkanCommandProcessor::UpdateDynamicState( // Blend constants. float blend_constants[] = { - regs[XE_GPU_REG_RB_BLEND_RED].f32, - regs[XE_GPU_REG_RB_BLEND_GREEN].f32, - regs[XE_GPU_REG_RB_BLEND_BLUE].f32, - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, + regs.Get(XE_GPU_REG_RB_BLEND_RED), + regs.Get(XE_GPU_REG_RB_BLEND_GREEN), + regs.Get(XE_GPU_REG_RB_BLEND_BLUE), + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA), }; dynamic_blend_constants_update_needed_ |= std::memcmp(dynamic_blend_constants_, blend_constants, @@ -3434,7 +3434,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( const RegisterFile& regs = *register_file_; auto pa_cl_vte_cntl = regs.Get(); auto pa_su_sc_mode_cntl = regs.Get(); - float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; + auto rb_alpha_ref = regs.Get(XE_GPU_REG_RB_ALPHA_REF); auto rb_colorcontrol = regs.Get(); auto rb_depth_info = regs.Get(); auto rb_stencilrefmask = regs.Get(); @@ -3442,7 +3442,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( regs.Get(XE_GPU_REG_RB_STENCILREFMASK_BF); auto rb_surface_info = regs.Get(); auto vgt_draw_initiator = regs.Get(); - int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); + auto vgt_indx_offset = regs.Get(XE_GPU_REG_VGT_INDX_OFFSET); bool edram_fragment_shader_interlock = render_target_cache_->GetPath() == @@ -3755,7 +3755,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; system_constants_.edram_rt_format_flags[i] = format_flags; uint32_t blend_factors_ops = - regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; + regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF; dirty |= system_constants_.edram_rt_blend_factors_ops[i] != blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; @@ -3784,22 +3784,22 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( if (primitive_polygonal) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) { poly_offset_front_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); poly_offset_front_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); } if (pa_su_sc_mode_cntl.poly_offset_back_enable) { poly_offset_back_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE); poly_offset_back_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET); } } else { if (pa_su_sc_mode_cntl.poly_offset_para_enable) { poly_offset_front_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); poly_offset_front_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); poly_offset_back_scale = poly_offset_front_scale; poly_offset_back_offset = poly_offset_front_offset; } @@ -3862,21 +3862,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( } dirty |= system_constants_.edram_blend_constant[0] != - regs[XE_GPU_REG_RB_BLEND_RED].f32; + regs.Get(XE_GPU_REG_RB_BLEND_RED); system_constants_.edram_blend_constant[0] = - regs[XE_GPU_REG_RB_BLEND_RED].f32; + regs.Get(XE_GPU_REG_RB_BLEND_RED); dirty |= system_constants_.edram_blend_constant[1] != - regs[XE_GPU_REG_RB_BLEND_GREEN].f32; + regs.Get(XE_GPU_REG_RB_BLEND_GREEN); system_constants_.edram_blend_constant[1] = - regs[XE_GPU_REG_RB_BLEND_GREEN].f32; + regs.Get(XE_GPU_REG_RB_BLEND_GREEN); dirty |= system_constants_.edram_blend_constant[2] != - regs[XE_GPU_REG_RB_BLEND_BLUE].f32; + regs.Get(XE_GPU_REG_RB_BLEND_BLUE); system_constants_.edram_blend_constant[2] = - regs[XE_GPU_REG_RB_BLEND_BLUE].f32; + regs.Get(XE_GPU_REG_RB_BLEND_BLUE); dirty |= system_constants_.edram_blend_constant[3] != - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA); system_constants_.edram_blend_constant[3] = - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA); } if (dirty) { @@ -3903,10 +3903,10 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // These are the constant base addresses/ranges for shaders. // We have these hardcoded right now cause nothing seems to differ on the Xbox // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). - assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || - regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || - regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 || + regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 || + regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000); // Check if the float constant layout is still the same and get the counts. const Shader::ConstantRegisterMap& float_constant_map_vertex = vertex_shader->constant_register_map(); @@ -4001,8 +4001,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + - (float_constant_index << 2)] - .f32, + (float_constant_index << 2)], sizeof(float) * 4); mapping += sizeof(float) * 4; } @@ -4033,8 +4032,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + - (float_constant_index << 2)] - .f32, + (float_constant_index << 2)], sizeof(float) * 4); mapping += sizeof(float) * 4; } @@ -4055,7 +4053,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, return false; } buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize); - std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031], kBoolLoopConstantsSize); current_constant_buffers_up_to_date_ |= UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop; @@ -4073,7 +4071,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, return false; } buffer_info.range = VkDeviceSize(kFetchConstantsSize); - std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, + std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0], kFetchConstantsSize); current_constant_buffers_up_to_date_ |= UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index f91cc4e6b..eb2ee9b21 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -718,7 +718,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( [common_blend_rt_index]), (((normalized_color_mask & ~(uint32_t(0b1111) << (4 * common_blend_rt_index))) - ? regs[XE_GPU_REG_RB_COLOR_MASK].u32 + ? regs[XE_GPU_REG_RB_COLOR_MASK] : normalized_color_mask) >> (4 * common_blend_rt_index)) & 0b1111, diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index bff490b9d..1f3ccaf24 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -612,8 +612,8 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView( VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters( const VulkanShader::SamplerBinding& binding) const { const auto& regs = register_file(); - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); + xenos::xe_gpu_texture_fetch_t fetch = + regs.GetTextureFetch(binding.fetch_constant); SamplerParameters parameters; @@ -875,8 +875,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture( uint32_t& width_scaled_out, uint32_t& height_scaled_out, xenos::TextureFormat& format_out) { const auto& regs = register_file(); - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); + xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0); TextureKey key; BindingInfoFromFetchConstant(fetch, key, nullptr); if (!key.is_valid || key.base_page == 0 || diff --git a/src/xenia/gpu/xenos.cc b/src/xenia/gpu/xenos.cc index f15c621cd..ce7f6177d 100644 --- a/src/xenia/gpu/xenos.cc +++ b/src/xenia/gpu/xenos.cc @@ -12,6 +12,7 @@ #include #include "xenia/base/math.h" +#include "xenia/base/memory.h" namespace xe { namespace gpu { @@ -118,8 +119,8 @@ float Float7e3To32(uint32_t f10) { exponent = uint32_t(1 - int32_t(mantissa_lzcnt)); mantissa = (mantissa << mantissa_lzcnt) & 0x7F; } - uint32_t f32 = ((exponent + 124) << 23) | (mantissa << 3); - return *reinterpret_cast(&f32); + return xe::memory::Reinterpret( + uint32_t(((exponent + 124) << 23) | (mantissa << 3))); } // Based on CFloat24 from d3dref9.dll and the 6e4 code from: @@ -131,7 +132,7 @@ uint32_t Float32To20e4(float f32, bool round_to_nearest_even) { // Positive only, and not -0 or NaN. return 0; } - uint32_t f32u32 = *reinterpret_cast(&f32); + auto f32u32 = xe::memory::Reinterpret(f32); if (f32u32 >= 0x3FFFFFF8) { // Saturate. return 0xFFFFFF; @@ -165,8 +166,8 @@ float Float20e4To32(uint32_t f24) { exponent = uint32_t(1 - int32_t(mantissa_lzcnt)); mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF; } - uint32_t f32 = ((exponent + 112) << 23) | (mantissa << 3); - return *reinterpret_cast(&f32); + return xe::memory::Reinterpret( + uint32_t(((exponent + 112) << 23) | (mantissa << 3))); } const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) {