From 90bc6ad1a8cf16629a5e974e3a8385512cb11a1c Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 19 Oct 2013 11:50:01 -0700 Subject: [PATCH] Interrupts fire on the right 'thread', ringbuffer work, --- src/xenia/cpu/processor.cc | 15 +++- src/xenia/cpu/processor.h | 4 +- src/xenia/cpu/thread_state.cc | 24 +++--- src/xenia/cpu/thread_state.h | 8 +- src/xenia/gpu/graphics_system.cc | 11 ++- src/xenia/gpu/graphics_system.h | 2 +- src/xenia/gpu/ring_buffer_worker.cc | 107 ++++++++++++++++++++----- src/xenia/gpu/ring_buffer_worker.h | 11 ++- src/xenia/gpu/xenos/packets.h | 9 ++- src/xenia/gpu/xenos/register_table.inc | 2 + 10 files changed, 145 insertions(+), 48 deletions(-) diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index c3c7fda97..372414598 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -11,6 +11,7 @@ #include #include +#include #include @@ -67,6 +68,7 @@ Processor::~Processor() { } modules_.clear(); + xe_memory_heap_free(memory_, interrupt_thread_block_, 2048); DeallocThread(interrupt_thread_state_); xe_mutex_free(interrupt_thread_lock_); @@ -107,6 +109,9 @@ int Processor::Setup() { interrupt_thread_lock_ = xe_mutex_alloc(10000); interrupt_thread_state_ = AllocThread(16 * 1024, 0, 0); + interrupt_thread_block_ = xe_memory_heap_alloc( + memory_, 0, 2048, 0); + interrupt_thread_state_->ppc_state()->r[13] = interrupt_thread_block_; sym_table_ = new SymbolTable(); @@ -259,11 +264,19 @@ uint64_t Processor::Execute(ThreadState* thread_state, uint32_t address, return ppc_state->r[3]; } -uint64_t Processor::ExecuteInterrupt(uint32_t address, +uint64_t Processor::ExecuteInterrupt(uint32_t cpu, + uint32_t address, uint64_t arg0, uint64_t arg1) { // Acquire lock on interrupt thread (we can only dispatch one at a time). xe_mutex_lock(interrupt_thread_lock_); + + // Set 0x10C(r13) to the current CPU ID. + uint8_t* p = xe_memory_addr(memory_, 0); + XESETUINT8BE(p + interrupt_thread_block_ + 0x10C, cpu); + + // Execute interrupt. uint64_t result = Execute(interrupt_thread_state_, address, arg0, arg1); + xe_mutex_unlock(interrupt_thread_lock_); return result; } diff --git a/src/xenia/cpu/processor.h b/src/xenia/cpu/processor.h index 53e49cb2d..fda16a509 100644 --- a/src/xenia/cpu/processor.h +++ b/src/xenia/cpu/processor.h @@ -62,7 +62,8 @@ public: uint64_t Execute(ThreadState* thread_state, uint32_t address, uint64_t arg0, uint64_t arg1); - uint64_t ExecuteInterrupt(uint32_t address, uint64_t arg0, uint64_t arg1); + uint64_t ExecuteInterrupt( + uint32_t cpu, uint32_t address, uint64_t arg0, uint64_t arg1); sdb::FunctionSymbol* GetFunction(uint32_t address); void* GetFunctionPointer(uint32_t address); @@ -80,6 +81,7 @@ private: xe_mutex_t* interrupt_thread_lock_; ThreadState* interrupt_thread_state_; + uint32_t interrupt_thread_block_; }; diff --git a/src/xenia/cpu/thread_state.cc b/src/xenia/cpu/thread_state.cc index 9f51e5e0e..9151ddf74 100644 --- a/src/xenia/cpu/thread_state.cc +++ b/src/xenia/cpu/thread_state.cc @@ -26,27 +26,23 @@ ThreadState::ThreadState( stack_address_ = xe_memory_heap_alloc(memory_, 0, stack_size, 0); - xe_zero_struct(&ppc_state_, sizeof(ppc_state_)); + // Allocate with 64b alignment. + ppc_state_ = (xe_ppc_state_t*)xe_malloc_aligned(sizeof(xe_ppc_state_t)); + XEASSERT(((uint64_t)ppc_state_ & 0xF) == 0); + xe_zero_struct(ppc_state_, sizeof(xe_ppc_state_t)); // Stash pointers to common structures that callbacks may need. - ppc_state_.membase = xe_memory_addr(memory_, 0); - ppc_state_.processor = processor; - ppc_state_.thread_state = this; + ppc_state_->membase = xe_memory_addr(memory_, 0); + ppc_state_->processor = processor; + ppc_state_->thread_state = this; // Set initial registers. - ppc_state_.r[1] = stack_address_ + stack_size; - ppc_state_.r[13] = thread_state_address_; + ppc_state_->r[1] = stack_address_ + stack_size; + ppc_state_->r[13] = thread_state_address_; } ThreadState::~ThreadState() { + xe_free_aligned(ppc_state_); xe_memory_heap_free(memory_, stack_address_, 0); xe_memory_release(memory_); } - -uint32_t ThreadState::thread_id() const { - return thread_id_; -} - -xe_ppc_state_t* ThreadState::ppc_state() { - return &ppc_state_; -} diff --git a/src/xenia/cpu/thread_state.h b/src/xenia/cpu/thread_state.h index 10571c9a9..192f7fcc7 100644 --- a/src/xenia/cpu/thread_state.h +++ b/src/xenia/cpu/thread_state.h @@ -29,9 +29,8 @@ public: uint32_t thread_id); ~ThreadState(); - uint32_t thread_id() const; - - xe_ppc_state_t* ppc_state(); + uint32_t thread_id() const { return thread_id_; } + xe_ppc_state_t* ppc_state() const { return ppc_state_; } private: uint32_t stack_size_; @@ -42,7 +41,8 @@ private: uint32_t thread_state_address_; uint32_t thread_id_; - xe_ppc_state_t ppc_state_; + // NOTE: must be 64b aligned for SSE ops. + xe_ppc_state_t* ppc_state_; }; diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index e4e3932ff..d16634b8e 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -25,7 +25,7 @@ GraphicsSystem::GraphicsSystem(const CreationParams* params) : last_interrupt_time_(0), swap_pending_(false) { memory_ = xe_memory_retain(params->memory); - worker_ = new RingBufferWorker(memory_); + worker_ = new RingBufferWorker(this, memory_); // Set during Initialize(); driver_ = 0; @@ -160,12 +160,17 @@ void GraphicsSystem::WriteRegister(uint32_t r, uint64_t value) { regs->values[r].u32 = (uint32_t)value; } -void GraphicsSystem::DispatchInterruptCallback() { +void GraphicsSystem::DispatchInterruptCallback(uint32_t cpu) { + // Pick a CPU, if needed. We're going to guess 2. Because. + if (cpu == 0xFFFFFFFF) { + cpu = 2; + } + // NOTE: we may be executing in some random thread. last_interrupt_time_ = xe_pal_now(); if (!interrupt_callback_) { return; } processor_->ExecuteInterrupt( - interrupt_callback_, 0, interrupt_callback_data_); + cpu, interrupt_callback_, 1, interrupt_callback_data_); } diff --git a/src/xenia/gpu/graphics_system.h b/src/xenia/gpu/graphics_system.h index 670676523..47cce8211 100644 --- a/src/xenia/gpu/graphics_system.h +++ b/src/xenia/gpu/graphics_system.h @@ -52,7 +52,7 @@ public: virtual uint64_t ReadRegister(uint32_t r); virtual void WriteRegister(uint32_t r, uint64_t value); - void DispatchInterruptCallback(); + void DispatchInterruptCallback(uint32_t cpu = 0xFFFFFFFF); bool swap_pending() const { return swap_pending_; } void set_swap_pending(bool value) { swap_pending_ = value; } diff --git a/src/xenia/gpu/ring_buffer_worker.cc b/src/xenia/gpu/ring_buffer_worker.cc index ec08e0dd4..470ca06b1 100644 --- a/src/xenia/gpu/ring_buffer_worker.cc +++ b/src/xenia/gpu/ring_buffer_worker.cc @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -19,8 +20,9 @@ using namespace xe::gpu; using namespace xe::gpu::xenos; -RingBufferWorker::RingBufferWorker(xe_memory_ref memory) : - memory_(memory), driver_(0) { +RingBufferWorker::RingBufferWorker( + GraphicsSystem* graphics_system, xe_memory_ref memory) : + graphics_system_(graphics_system), memory_(memory), driver_(0) { write_ptr_index_event_ = CreateEvent( NULL, FALSE, FALSE, NULL); @@ -31,6 +33,10 @@ RingBufferWorker::RingBufferWorker(xe_memory_ref memory) : read_ptr_writeback_ptr_ = 0; write_ptr_index_ = 0; write_ptr_max_index_ = 0; + + LARGE_INTEGER perf_counter; + QueryPerformanceCounter(&perf_counter); + counter_base_ = perf_counter.QuadPart; } RingBufferWorker::~RingBufferWorker() { @@ -38,6 +44,12 @@ RingBufferWorker::~RingBufferWorker() { CloseHandle(write_ptr_index_event_); } +uint64_t RingBufferWorker::GetCounter() { + LARGE_INTEGER perf_counter; + QueryPerformanceCounter(&perf_counter); + return perf_counter.QuadPart - counter_base_; +} + void RingBufferWorker::Initialize(GraphicsDriver* driver, uint32_t ptr, uint32_t page_count) { driver_ = driver; @@ -268,6 +280,21 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { ADVANCE_PTR(count); break; + case PM4_INTERRUPT: + // generate interrupt from the command stream + { + XELOGGPU("[%.8X] Packet(%.8X): PM4_INTERRUPT", + packet_ptr, packet); + LOG_DATA(count); + uint32_t cpu_mask = READ_AND_ADVANCE_PTR(); + for (int n = 0; n < 6; n++) { + if (cpu_mask & (1 << n)) { + graphics_system_->DispatchInterruptCallback(n); + } + } + } + break; + case PM4_INDIRECT_BUFFER: // indirect buffer dispatch { @@ -302,8 +329,8 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { value = regs->values[poll_reg_addr].u32; } switch (wait_info & 0x7) { - case 0x0: // Always. - matched = true; + case 0x0: // Never. + matched = false; break; case 0x1: // Less than reference. matched = (value & mask) < ref; @@ -323,14 +350,17 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { case 0x6: // Greater than reference. matched = (value & mask) > ref; break; - default: - XELOGE("Unsupported wait comparison type!"); - XEASSERTALWAYS(); + case 0x7: // Always + matched = true; break; } if (!matched) { // Wait. - SwitchToThread(); + if (wait >= 0x100) { + Sleep(wait / 0x100); + } else { + SwitchToThread(); + } } } while (!matched); } @@ -375,7 +405,6 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { uint32_t poll_reg_addr = READ_AND_ADVANCE_PTR(); uint32_t ref = READ_AND_ADVANCE_PTR(); uint32_t mask = READ_AND_ADVANCE_PTR(); - uint32_t wait = READ_AND_ADVANCE_PTR(); uint32_t write_reg_addr = READ_AND_ADVANCE_PTR(); uint32_t write_data = READ_AND_ADVANCE_PTR(); uint32_t value; @@ -389,8 +418,8 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { } bool matched = false; switch (wait_info & 0x7) { - case 0x0: // Always. - matched = true; + case 0x0: // Never. + matched = false; break; case 0x1: // Less than reference. matched = (value & mask) < ref; @@ -410,9 +439,8 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { case 0x6: // Greater than reference. matched = (value & mask) > ref; break; - default: - XELOGE("Unsupported wait comparison type!"); - XEASSERTALWAYS(); + case 0x7: // Always + matched = true; break; } if (matched) { @@ -441,12 +469,22 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD", packet_ptr, packet); LOG_DATA(count); - uint32_t d0 = READ_AND_ADVANCE_PTR(); // 3? - XEASSERT(d0 == 0x3); - uint32_t d1 = READ_AND_ADVANCE_PTR(); // ptr - uint32_t d2 = READ_AND_ADVANCE_PTR(); // value? - if (!(d1 & 0xC0000000)) { - XESETUINT32BE(p + TRANSLATE_ADDR(d1), d2); + uint32_t initiator = READ_AND_ADVANCE_PTR(); + uint32_t address = READ_AND_ADVANCE_PTR(); + uint32_t value = READ_AND_ADVANCE_PTR(); + // Writeback initiator. + WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x1F); + uint32_t data_value; + if ((initiator >> 31) & 0x1) { + // Write counter (GPU clock counter?). + // TODO(benvanik): 64-bit write? + data_value = (uint32_t)GetCounter(); + } else { + // Write value. + data_value = value; + } + if (!(address & 0xC0000000)) { + XESETUINT32BE(p + TRANSLATE_ADDR(address), data_value); } else { // TODO(benvanik): read up on PM4_EVENT_WRITE_SHD. // No clue. Maybe relative write based on a register base? @@ -543,6 +581,35 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { } break; + case PM4_SET_BIN_MASK_LO: + { + uint32_t value = READ_AND_ADVANCE_PTR(); + XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X", + packet_ptr, packet, value); + } + break; + case PM4_SET_BIN_MASK_HI: + { + uint32_t value = READ_AND_ADVANCE_PTR(); + XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X", + packet_ptr, packet, value); + } + break; + case PM4_SET_BIN_SELECT_LO: + { + uint32_t value = READ_AND_ADVANCE_PTR(); + XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X", + packet_ptr, packet, value); + } + break; + case PM4_SET_BIN_SELECT_HI: + { + uint32_t value = READ_AND_ADVANCE_PTR(); + XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X", + packet_ptr, packet, value); + } + break; + default: XELOGGPU("[%.8X] Packet(%.8X): unknown!", packet_ptr, packet); diff --git a/src/xenia/gpu/ring_buffer_worker.h b/src/xenia/gpu/ring_buffer_worker.h index 4048d7224..14c602970 100644 --- a/src/xenia/gpu/ring_buffer_worker.h +++ b/src/xenia/gpu/ring_buffer_worker.h @@ -19,14 +19,17 @@ namespace xe { namespace gpu { class GraphicsDriver; +class GraphicsSystem; class RingBufferWorker { public: - RingBufferWorker(xe_memory_ref memory); + RingBufferWorker(GraphicsSystem* graphics_system, xe_memory_ref memory); virtual ~RingBufferWorker(); xe_memory_ref memory(); + uint64_t GetCounter(); + void Initialize(GraphicsDriver* driver, uint32_t ptr, uint32_t page_count); void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); @@ -49,9 +52,11 @@ private: void WriteRegister(uint32_t index, uint32_t value); protected: - xe_memory_ref memory_; + xe_memory_ref memory_; + GraphicsSystem* graphics_system_; + GraphicsDriver* driver_; - GraphicsDriver* driver_; + uint64_t counter_base_; uint32_t primary_buffer_ptr_; uint32_t primary_buffer_size_; diff --git a/src/xenia/gpu/xenos/packets.h b/src/xenia/gpu/xenos/packets.h index 39fa75043..2625f8900 100644 --- a/src/xenia/gpu/xenos/packets.h +++ b/src/xenia/gpu/xenos/packets.h @@ -67,9 +67,16 @@ enum Type3Opcode { PM4_SET_BIN_SELECT = 0x51, // sets the 64-bit BIN_SELECT register in the PFP PM4_CONTEXT_UPDATE = 0x5e, // updates the current context, if needed - PM4_INTERRUPT = 0x40, // generate interrupt from the command stream + PM4_INTERRUPT = 0x54, // generate interrupt from the command stream PM4_IM_STORE = 0x2c, // copy sequencer instruction memory to system memory + + // Tiled rendering: + // https://www.google.com/patents/US20060055701 + PM4_SET_BIN_MASK_LO = 0x60, + PM4_SET_BIN_MASK_HI = 0x61, + PM4_SET_BIN_SELECT_LO = 0x62, + PM4_SET_BIN_SELECT_HI = 0x63, }; diff --git a/src/xenia/gpu/xenos/register_table.inc b/src/xenia/gpu/xenos/register_table.inc index a440627af..c549e39a6 100644 --- a/src/xenia/gpu/xenos/register_table.inc +++ b/src/xenia/gpu/xenos/register_table.inc @@ -98,6 +98,8 @@ XE_GPU_REGISTER(0x2182, dword, SQ_INTERPOLATOR_CNTL) XE_GPU_REGISTER(0x2183, dword, SQ_WRAPPING_0) XE_GPU_REGISTER(0x2184, dword, SQ_WRAPPING_1) +XE_GPU_REGISTER(0x21F9, dword, VGT_EVENT_INITIATOR) + XE_GPU_REGISTER(0x2200, dword, RB_DEPTHCONTROL) XE_GPU_REGISTER(0x2201, dword, RB_BLENDCONTROL_0) XE_GPU_REGISTER(0x2202, dword, RB_COLORCONTROL)