From ffef59aaa57d59579ef33454831b7b4ed4cbaf88 Mon Sep 17 00:00:00 2001 From: DrChat Date: Sun, 18 Feb 2018 12:18:20 -0600 Subject: [PATCH] [Base] RingBuffer: Add ReadAndSwap instead of piggybacking off of Read --- src/xenia/base/ring_buffer.h | 20 +++-- src/xenia/gpu/command_processor.cc | 128 ++++++++++++++--------------- 2 files changed, 79 insertions(+), 69 deletions(-) diff --git a/src/xenia/base/ring_buffer.h b/src/xenia/base/ring_buffer.h index b52f6afd0..3165d6b7d 100644 --- a/src/xenia/base/ring_buffer.h +++ b/src/xenia/base/ring_buffer.h @@ -12,6 +12,7 @@ #include #include +#include #include #include "xenia/base/assert.h" @@ -72,16 +73,25 @@ class RingBuffer { } template - T Read(bool swap = false) { - static_assert(sizeof(T) <= 8, "Immediate read only supports basic types!"); + T Read() { + static_assert(std::is_fundamental::value, + "Immediate read only supports basic types!"); T imm; size_t read = Read(reinterpret_cast(&imm), sizeof(T)); assert_true(read == sizeof(T)); - if (swap) { - imm = xe::byte_swap(imm); - } + return imm; + } + template + T ReadAndSwap() { + static_assert(std::is_fundamental::value, + "Immediate read only supports basic types!"); + + T imm; + size_t read = Read(reinterpret_cast(&imm), sizeof(T)); + assert_true(read == sizeof(T)); + imm = xe::byte_swap(imm); return imm; } diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 59ceb60f4..d957f432c 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -443,7 +443,7 @@ void CommandProcessor::ExecutePacket(uint32_t ptr, uint32_t count) { } bool CommandProcessor::ExecutePacket(RingBuffer* reader) { - const uint32_t packet = reader->Read(true); + const uint32_t packet = reader->ReadAndSwap(); const uint32_t packet_type = packet >> 30; if (packet == 0) { trace_writer_.WritePacketStart(uint32_t(reader->read_ptr() - 4), 1); @@ -483,7 +483,7 @@ bool CommandProcessor::ExecutePacketType0(RingBuffer* reader, uint32_t packet) { uint32_t base_index = (packet & 0x7FFF); uint32_t write_one_reg = (packet >> 15) & 0x1; for (uint32_t m = 0; m < count; m++) { - uint32_t reg_data = reader->Read(true); + uint32_t reg_data = reader->ReadAndSwap(); uint32_t target_index = write_one_reg ? base_index : base_index + m; WriteRegister(target_index, reg_data); } @@ -498,8 +498,8 @@ bool CommandProcessor::ExecutePacketType1(RingBuffer* reader, uint32_t packet) { trace_writer_.WritePacketStart(uint32_t(reader->read_ptr() - 4), 3); uint32_t reg_index_1 = packet & 0x7FF; uint32_t reg_index_2 = (packet >> 11) & 0x7FF; - uint32_t reg_data_1 = reader->Read(true); - uint32_t reg_data_2 = reader->Read(true); + uint32_t reg_data_1 = reader->ReadAndSwap(); + uint32_t reg_data_2 = reader->ReadAndSwap(); WriteRegister(reg_index_1, reg_data_1); WriteRegister(reg_index_2, reg_data_2); trace_writer_.WritePacketEnd(); @@ -622,38 +622,38 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) { break; case PM4_SET_BIN_MASK_LO: { - uint32_t value = reader->Read(true); + uint32_t value = reader->ReadAndSwap(); bin_mask_ = (bin_mask_ & 0xFFFFFFFF00000000ull) | value; result = true; } break; case PM4_SET_BIN_MASK_HI: { - uint32_t value = reader->Read(true); + uint32_t value = reader->ReadAndSwap(); bin_mask_ = (bin_mask_ & 0xFFFFFFFFull) | (static_cast(value) << 32); result = true; } break; case PM4_SET_BIN_SELECT_LO: { - uint32_t value = reader->Read(true); + uint32_t value = reader->ReadAndSwap(); bin_select_ = (bin_select_ & 0xFFFFFFFF00000000ull) | value; result = true; } break; case PM4_SET_BIN_SELECT_HI: { - uint32_t value = reader->Read(true); + uint32_t value = reader->ReadAndSwap(); bin_select_ = (bin_select_ & 0xFFFFFFFFull) | (static_cast(value) << 32); result = true; } break; case PM4_SET_BIN_MASK: { assert_true(count == 2); - uint64_t val_hi = reader->Read(true); - uint64_t val_lo = reader->Read(true); + uint64_t val_hi = reader->ReadAndSwap(); + uint64_t val_lo = reader->ReadAndSwap(); bin_mask_ = (val_hi << 32) | val_lo; result = true; } break; case PM4_SET_BIN_SELECT: { assert_true(count == 2); - uint64_t val_hi = reader->Read(true); - uint64_t val_lo = reader->Read(true); + uint64_t val_hi = reader->ReadAndSwap(); + uint64_t val_lo = reader->ReadAndSwap(); bin_select_ = (val_hi << 32) | val_lo; result = true; } break; @@ -713,7 +713,7 @@ bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingBuffer* reader, SCOPE_profile_cpu_f("gpu"); // generate interrupt from the command stream - uint32_t cpu_mask = reader->Read(true); + uint32_t cpu_mask = reader->ReadAndSwap(); for (int n = 0; n < 6; n++) { if (cpu_mask & (1 << n)) { graphics_system_->DispatchInterruptCallback(1, n); @@ -735,13 +735,13 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingBuffer* reader, // VdSwap will post this to tell us we need to swap the screen/fire an // interrupt. // 63 words here, but only the first has any data. - uint32_t magic = reader->Read(true); + uint32_t magic = reader->ReadAndSwap(); assert_true(magic == 'SWAP'); // TODO(benvanik): only swap frontbuffer ptr. - uint32_t frontbuffer_ptr = reader->Read(true); - uint32_t frontbuffer_width = reader->Read(true); - uint32_t frontbuffer_height = reader->Read(true); + uint32_t frontbuffer_ptr = reader->ReadAndSwap(); + uint32_t frontbuffer_width = reader->ReadAndSwap(); + uint32_t frontbuffer_height = reader->ReadAndSwap(); reader->AdvanceRead((count - 4) * sizeof(uint32_t)); if (swap_mode_ == SwapMode::kNormal) { @@ -756,8 +756,8 @@ bool CommandProcessor::ExecutePacketType3_INDIRECT_BUFFER(RingBuffer* reader, uint32_t packet, uint32_t count) { // indirect buffer dispatch - uint32_t list_ptr = CpuToGpu(reader->Read(true)); - uint32_t list_length = reader->Read(true); + uint32_t list_ptr = CpuToGpu(reader->ReadAndSwap()); + uint32_t list_length = reader->ReadAndSwap(); assert_zero(list_length & ~0xFFFFF); list_length &= 0xFFFFF; ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length); @@ -770,11 +770,11 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingBuffer* reader, SCOPE_profile_cpu_f("gpu"); // wait until a register or memory location is a specific value - uint32_t wait_info = reader->Read(true); - uint32_t poll_reg_addr = reader->Read(true); - uint32_t ref = reader->Read(true); - uint32_t mask = reader->Read(true); - uint32_t wait = reader->Read(true); + uint32_t wait_info = reader->ReadAndSwap(); + uint32_t poll_reg_addr = reader->ReadAndSwap(); + uint32_t ref = reader->ReadAndSwap(); + uint32_t mask = reader->ReadAndSwap(); + uint32_t wait = reader->ReadAndSwap(); bool matched = false; do { uint32_t value; @@ -851,9 +851,9 @@ bool CommandProcessor::ExecutePacketType3_REG_RMW(RingBuffer* reader, uint32_t count) { // register read/modify/write // ? (used during shader upload and edram setup) - uint32_t rmw_info = reader->Read(true); - uint32_t and_mask = reader->Read(true); - uint32_t or_mask = reader->Read(true); + uint32_t rmw_info = reader->ReadAndSwap(); + uint32_t and_mask = reader->ReadAndSwap(); + uint32_t or_mask = reader->ReadAndSwap(); uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32; if ((rmw_info >> 31) & 0x1) { // & reg @@ -879,8 +879,8 @@ bool CommandProcessor::ExecutePacketType3_REG_TO_MEM(RingBuffer* reader, // Copy Register to Memory (?) // Count is 2, assuming a Register Addr and a Memory Addr. - uint32_t reg_addr = reader->Read(true); - uint32_t mem_addr = reader->Read(true); + uint32_t reg_addr = reader->ReadAndSwap(); + uint32_t mem_addr = reader->ReadAndSwap(); uint32_t reg_val; @@ -899,9 +899,9 @@ bool CommandProcessor::ExecutePacketType3_REG_TO_MEM(RingBuffer* reader, bool CommandProcessor::ExecutePacketType3_MEM_WRITE(RingBuffer* reader, uint32_t packet, uint32_t count) { - uint32_t write_addr = reader->Read(true); + uint32_t write_addr = reader->ReadAndSwap(); for (uint32_t i = 0; i < count - 1; i++) { - uint32_t write_data = reader->Read(true); + uint32_t write_data = reader->ReadAndSwap(); auto endianness = static_cast(write_addr & 0x3); auto addr = write_addr & ~0x3; @@ -918,12 +918,12 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingBuffer* reader, uint32_t packet, uint32_t count) { // conditional write to memory or register - uint32_t wait_info = reader->Read(true); - uint32_t poll_reg_addr = reader->Read(true); - uint32_t ref = reader->Read(true); - uint32_t mask = reader->Read(true); - uint32_t write_reg_addr = reader->Read(true); - uint32_t write_data = reader->Read(true); + uint32_t wait_info = reader->ReadAndSwap(); + uint32_t poll_reg_addr = reader->ReadAndSwap(); + uint32_t ref = reader->ReadAndSwap(); + uint32_t mask = reader->ReadAndSwap(); + uint32_t write_reg_addr = reader->ReadAndSwap(); + uint32_t write_data = reader->ReadAndSwap(); uint32_t value; if (wait_info & 0x10) { // Memory. @@ -985,7 +985,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingBuffer* reader, uint32_t packet, uint32_t count) { // generate an event that creates a write to memory when completed - uint32_t initiator = reader->Read(true); + uint32_t initiator = reader->ReadAndSwap(); // Writeback initiator. WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); if (count == 1) { @@ -1002,9 +1002,9 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD(RingBuffer* reader, uint32_t packet, uint32_t count) { // generate a VS|PS_done event - uint32_t initiator = reader->Read(true); - uint32_t address = reader->Read(true); - uint32_t value = reader->Read(true); + uint32_t initiator = reader->ReadAndSwap(); + uint32_t address = reader->ReadAndSwap(); + uint32_t value = reader->ReadAndSwap(); // Writeback initiator. WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); uint32_t data_value; @@ -1027,8 +1027,8 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT(RingBuffer* reader, uint32_t packet, uint32_t count) { // generate a screen extent event - uint32_t initiator = reader->Read(true); - uint32_t address = reader->Read(true); + uint32_t initiator = reader->ReadAndSwap(); + uint32_t address = reader->ReadAndSwap(); // Writeback initiator. WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); auto endianness = static_cast(address & 0x3); @@ -1057,7 +1057,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader, uint32_t packet, uint32_t count) { assert_true(count == 1); - uint32_t initiator = reader->Read(true); + uint32_t initiator = reader->ReadAndSwap(); // Writeback initiator. WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); @@ -1076,8 +1076,8 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingBuffer* reader, // This ID matches the one issued in PM4_VIZ_QUERY // ID = dword0 & 0x3F; // use = dword0 & 0x40; - uint32_t dword0 = reader->Read(true); // viz query info - uint32_t dword1 = reader->Read(true); + uint32_t dword0 = reader->ReadAndSwap(); // viz query info + uint32_t dword1 = reader->ReadAndSwap(); uint32_t index_count = dword1 >> 16; auto prim_type = static_cast(dword1 & 0x3F); bool is_indexed = false; @@ -1087,8 +1087,8 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingBuffer* reader, // DI_SRC_SEL_DMA // Indexed draw. is_indexed = true; - index_buffer_info.guest_base = reader->Read(true); - uint32_t index_size = reader->Read(true); + index_buffer_info.guest_base = reader->ReadAndSwap(); + uint32_t index_size = reader->ReadAndSwap(); index_buffer_info.endianness = static_cast(index_size >> 30); index_size &= 0x00FFFFFF; bool index_32bit = (dword1 >> 11) & 0x1; @@ -1124,7 +1124,7 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingBuffer* reader, uint32_t packet, uint32_t count) { // draw using supplied indices in packet - uint32_t dword0 = reader->Read(true); + uint32_t dword0 = reader->ReadAndSwap(); uint32_t index_count = dword0 >> 16; auto prim_type = static_cast(dword0 & 0x3F); uint32_t src_sel = (dword0 >> 6) & 0x3; @@ -1150,7 +1150,7 @@ bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingBuffer* reader, // load constant into chip and to memory // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) // reg - 0x2000 - uint32_t offset_type = reader->Read(true); + uint32_t offset_type = reader->ReadAndSwap(); uint32_t index = offset_type & 0x7FF; uint32_t type = (offset_type >> 16) & 0xFF; switch (type) { @@ -1175,7 +1175,7 @@ bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingBuffer* reader, return true; } for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = reader->Read(true); + uint32_t data = reader->ReadAndSwap(); WriteRegister(index, data); } return true; @@ -1184,10 +1184,10 @@ bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingBuffer* reader, bool CommandProcessor::ExecutePacketType3_SET_CONSTANT2(RingBuffer* reader, uint32_t packet, uint32_t count) { - uint32_t offset_type = reader->Read(true); + uint32_t offset_type = reader->ReadAndSwap(); uint32_t index = offset_type & 0xFFFF; for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = reader->Read(true); + uint32_t data = reader->ReadAndSwap(); WriteRegister(index, data); } return true; @@ -1197,11 +1197,11 @@ bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT(RingBuffer* reader, uint32_t packet, uint32_t count) { // load constants from memory - uint32_t address = reader->Read(true); + uint32_t address = reader->ReadAndSwap(); address &= 0x3FFFFFFF; - uint32_t offset_type = reader->Read(true); + uint32_t offset_type = reader->ReadAndSwap(); uint32_t index = offset_type & 0x7FF; - uint32_t size_dwords = reader->Read(true); + uint32_t size_dwords = reader->ReadAndSwap(); size_dwords &= 0xFFF; uint32_t type = (offset_type >> 16) & 0xFF; switch (type) { @@ -1235,10 +1235,10 @@ bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT(RingBuffer* reader, bool CommandProcessor::ExecutePacketType3_SET_SHADER_CONSTANTS( RingBuffer* reader, uint32_t packet, uint32_t count) { - uint32_t offset_type = reader->Read(true); + uint32_t offset_type = reader->ReadAndSwap(); uint32_t index = offset_type & 0xFFFF; for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = reader->Read(true); + uint32_t data = reader->ReadAndSwap(); WriteRegister(index, data); } return true; @@ -1250,10 +1250,10 @@ bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingBuffer* reader, SCOPE_profile_cpu_f("gpu"); // load sequencer instruction memory (pointer-based) - uint32_t addr_type = reader->Read(true); + uint32_t addr_type = reader->ReadAndSwap(); auto shader_type = static_cast(addr_type & 0x3); uint32_t addr = addr_type & ~0x3; - uint32_t start_size = reader->Read(true); + uint32_t start_size = reader->ReadAndSwap(); uint32_t start = start_size >> 16; uint32_t size_dwords = start_size & 0xFFFF; // dwords assert_true(start == 0); @@ -1281,8 +1281,8 @@ bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE(RingBuffer* reader, SCOPE_profile_cpu_f("gpu"); // load sequencer instruction memory (code embedded in packet) - uint32_t dword0 = reader->Read(true); - uint32_t dword1 = reader->Read(true); + uint32_t dword0 = reader->ReadAndSwap(); + uint32_t dword1 = reader->ReadAndSwap(); auto shader_type = static_cast(dword0); uint32_t start_size = dword1; uint32_t start = start_size >> 16; @@ -1312,7 +1312,7 @@ bool CommandProcessor::ExecutePacketType3_INVALIDATE_STATE(RingBuffer* reader, uint32_t packet, uint32_t count) { // selective invalidation of state pointers - /*uint32_t mask =*/reader->Read(true); + /*uint32_t mask =*/reader->ReadAndSwap(); // driver_->InvalidateState(mask); return true; } @@ -1324,7 +1324,7 @@ bool CommandProcessor::ExecutePacketType3_VIZ_QUERY(RingBuffer* reader, // http://www.google.com/patents/US20050195186 assert_true(count == 1); - uint32_t dword0 = reader->Read(true); + uint32_t dword0 = reader->ReadAndSwap(); uint32_t id = dword0 & 0x3F; uint32_t end = dword0 & 0x80;