2015-11-08 20:54:36 +01:00
|
|
|
/**
|
|
|
|
|
******************************************************************************
|
|
|
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
|
|
|
******************************************************************************
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
* Copyright 2022 Ben Vanik. All rights reserved. *
|
2015-11-08 20:54:36 +01:00
|
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
|
|
|
******************************************************************************
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "xenia/gpu/command_processor.h"
|
|
|
|
|
|
|
|
|
|
#include <algorithm>
|
2018-02-17 05:03:33 +01:00
|
|
|
#include <cinttypes>
|
2017-01-25 20:20:26 +01:00
|
|
|
#include <cmath>
|
2015-11-08 20:54:36 +01:00
|
|
|
|
2020-03-02 16:37:11 +01:00
|
|
|
#include "third_party/fmt/include/fmt/format.h"
|
2015-12-07 17:55:30 +01:00
|
|
|
#include "xenia/base/byte_stream.h"
|
2015-11-08 20:54:36 +01:00
|
|
|
#include "xenia/base/logging.h"
|
|
|
|
|
#include "xenia/base/math.h"
|
|
|
|
|
#include "xenia/base/profiling.h"
|
2016-01-25 05:54:26 +01:00
|
|
|
#include "xenia/base/ring_buffer.h"
|
2015-11-08 20:54:36 +01:00
|
|
|
#include "xenia/gpu/gpu_flags.h"
|
|
|
|
|
#include "xenia/gpu/graphics_system.h"
|
|
|
|
|
#include "xenia/gpu/sampler_info.h"
|
|
|
|
|
#include "xenia/gpu/texture_info.h"
|
|
|
|
|
#include "xenia/gpu/xenos.h"
|
2015-12-31 07:58:22 +01:00
|
|
|
#include "xenia/kernel/kernel_state.h"
|
|
|
|
|
#include "xenia/kernel/user_module.h"
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
namespace xe {
|
|
|
|
|
namespace gpu {
|
|
|
|
|
|
|
|
|
|
using namespace xe::gpu::xenos;
|
|
|
|
|
|
|
|
|
|
CommandProcessor::CommandProcessor(GraphicsSystem* graphics_system,
|
|
|
|
|
kernel::KernelState* kernel_state)
|
|
|
|
|
: memory_(graphics_system->memory()),
|
|
|
|
|
kernel_state_(kernel_state),
|
|
|
|
|
graphics_system_(graphics_system),
|
|
|
|
|
register_file_(graphics_system_->register_file()),
|
|
|
|
|
trace_writer_(graphics_system->memory()->physical_membase()),
|
|
|
|
|
worker_running_(true),
|
|
|
|
|
write_ptr_index_event_(xe::threading::Event::CreateAutoResetEvent(false)),
|
|
|
|
|
write_ptr_index_(0) {}
|
|
|
|
|
|
|
|
|
|
CommandProcessor::~CommandProcessor() = default;
|
|
|
|
|
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
bool CommandProcessor::Initialize() {
|
2018-10-22 22:28:52 +02:00
|
|
|
// Initialize the gamma ramps to their default (linear) values - taken from
|
|
|
|
|
// what games set when starting.
|
|
|
|
|
for (uint32_t i = 0; i < 256; ++i) {
|
|
|
|
|
uint32_t value = i * 1023 / 255;
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
gamma_ramp_.table[i].value = value | (value << 10) | (value << 20);
|
2018-10-22 22:28:52 +02:00
|
|
|
}
|
|
|
|
|
for (uint32_t i = 0; i < 128; ++i) {
|
|
|
|
|
uint32_t value = (i * 65535 / 127) & ~63;
|
|
|
|
|
if (i < 127) {
|
|
|
|
|
value |= 0x200 << 16;
|
|
|
|
|
}
|
|
|
|
|
for (uint32_t j = 0; j < 3; ++j) {
|
|
|
|
|
gamma_ramp_.pwl[i].values[j].value = value;
|
|
|
|
|
}
|
|
|
|
|
}
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
dirty_gamma_ramp_table_ = true;
|
2018-10-22 22:28:52 +02:00
|
|
|
dirty_gamma_ramp_pwl_ = true;
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
worker_running_ = true;
|
|
|
|
|
worker_thread_ = kernel::object_ref<kernel::XHostThread>(
|
|
|
|
|
new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() {
|
|
|
|
|
WorkerThreadMain();
|
|
|
|
|
return 0;
|
|
|
|
|
}));
|
2019-01-27 16:48:31 +01:00
|
|
|
worker_thread_->set_name("GPU Commands");
|
2015-11-08 20:54:36 +01:00
|
|
|
worker_thread_->Create();
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::Shutdown() {
|
|
|
|
|
EndTracing();
|
|
|
|
|
|
|
|
|
|
worker_running_ = false;
|
|
|
|
|
write_ptr_index_event_->Set();
|
|
|
|
|
worker_thread_->Wait(0, 0, 0, nullptr);
|
|
|
|
|
worker_thread_.reset();
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-02 16:37:11 +01:00
|
|
|
void CommandProcessor::InitializeShaderStorage(
|
2020-12-07 20:23:54 +01:00
|
|
|
const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) {
|
|
|
|
|
}
|
2020-03-21 17:21:00 +01:00
|
|
|
|
2020-03-02 16:37:11 +01:00
|
|
|
void CommandProcessor::RequestFrameTrace(
|
|
|
|
|
const std::filesystem::path& root_path) {
|
2015-11-08 20:54:36 +01:00
|
|
|
if (trace_state_ == TraceState::kStreaming) {
|
|
|
|
|
XELOGE("Streaming trace; cannot also trace frame.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (trace_state_ == TraceState::kSingleFrame) {
|
|
|
|
|
XELOGE("Frame trace already pending; ignoring.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
trace_state_ = TraceState::kSingleFrame;
|
|
|
|
|
trace_frame_path_ = root_path;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-02 16:37:11 +01:00
|
|
|
void CommandProcessor::BeginTracing(const std::filesystem::path& root_path) {
|
2015-11-08 20:54:36 +01:00
|
|
|
if (trace_state_ == TraceState::kStreaming) {
|
|
|
|
|
XELOGE("Streaming already active; ignoring request.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (trace_state_ == TraceState::kSingleFrame) {
|
|
|
|
|
XELOGE("Frame trace pending; ignoring streaming request.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
2015-12-31 07:58:22 +01:00
|
|
|
// Streaming starts on the next primary buffer execute.
|
2015-11-08 20:54:36 +01:00
|
|
|
trace_state_ = TraceState::kStreaming;
|
2015-12-31 07:58:22 +01:00
|
|
|
trace_stream_path_ = root_path;
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::EndTracing() {
|
|
|
|
|
if (!trace_writer_.is_open()) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
assert_true(trace_state_ == TraceState::kStreaming);
|
2019-10-23 22:33:50 +02:00
|
|
|
trace_state_ = TraceState::kDisabled;
|
2015-11-08 20:54:36 +01:00
|
|
|
trace_writer_.Close();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::CallInThread(std::function<void()> fn) {
|
|
|
|
|
if (pending_fns_.empty() &&
|
|
|
|
|
kernel::XThread::IsInThread(worker_thread_.get())) {
|
|
|
|
|
fn();
|
|
|
|
|
} else {
|
|
|
|
|
pending_fns_.push(std::move(fn));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::ClearCaches() {}
|
|
|
|
|
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
void CommandProcessor::SetDesiredSwapPostEffect(
|
|
|
|
|
SwapPostEffect swap_post_effect) {
|
|
|
|
|
if (swap_post_effect_desired_ == swap_post_effect) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
swap_post_effect_desired_ = swap_post_effect;
|
|
|
|
|
CallInThread([this, swap_post_effect]() {
|
|
|
|
|
swap_post_effect_actual_ = swap_post_effect;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
void CommandProcessor::WorkerThreadMain() {
|
|
|
|
|
if (!SetupContext()) {
|
2017-12-18 21:27:00 +01:00
|
|
|
xe::FatalError("Unable to setup command processor internal state");
|
2015-11-08 20:54:36 +01:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (worker_running_) {
|
|
|
|
|
while (!pending_fns_.empty()) {
|
|
|
|
|
auto fn = std::move(pending_fns_.front());
|
|
|
|
|
pending_fns_.pop();
|
|
|
|
|
fn();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t write_ptr_index = write_ptr_index_.load();
|
|
|
|
|
if (write_ptr_index == 0xBAADF00D || read_ptr_index_ == write_ptr_index) {
|
2016-06-17 04:30:06 +02:00
|
|
|
SCOPE_profile_cpu_i("gpu", "xe::gpu::CommandProcessor::Stall");
|
2015-11-08 20:54:36 +01:00
|
|
|
// We've run out of commands to execute.
|
|
|
|
|
// We spin here waiting for new ones, as the overhead of waiting on our
|
|
|
|
|
// event is too high.
|
|
|
|
|
PrepareForWait();
|
2017-01-29 03:59:41 +01:00
|
|
|
uint32_t loop_count = 0;
|
2015-11-08 20:54:36 +01:00
|
|
|
do {
|
2017-01-29 03:59:41 +01:00
|
|
|
// If we spin around too much, revert to a "low-power" state.
|
|
|
|
|
if (loop_count > 500) {
|
|
|
|
|
const int wait_time_ms = 5;
|
|
|
|
|
xe::threading::Wait(write_ptr_index_event_.get(), true,
|
|
|
|
|
std::chrono::milliseconds(wait_time_ms));
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
xe::threading::MaybeYield();
|
2017-01-29 03:59:41 +01:00
|
|
|
loop_count++;
|
2015-11-08 20:54:36 +01:00
|
|
|
write_ptr_index = write_ptr_index_.load();
|
|
|
|
|
} while (worker_running_ && pending_fns_.empty() &&
|
|
|
|
|
(write_ptr_index == 0xBAADF00D ||
|
|
|
|
|
read_ptr_index_ == write_ptr_index));
|
|
|
|
|
ReturnFromWait();
|
|
|
|
|
if (!worker_running_ || !pending_fns_.empty()) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert_true(read_ptr_index_ != write_ptr_index);
|
|
|
|
|
|
|
|
|
|
// Execute. Note that we handle wraparound transparently.
|
2016-01-25 05:54:26 +01:00
|
|
|
read_ptr_index_ = ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index);
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
// TODO(benvanik): use reader->Read_update_freq_ and only issue after moving
|
|
|
|
|
// that many indices.
|
|
|
|
|
if (read_ptr_writeback_ptr_) {
|
|
|
|
|
xe::store_and_swap<uint32_t>(
|
|
|
|
|
memory_->TranslatePhysical(read_ptr_writeback_ptr_), read_ptr_index_);
|
|
|
|
|
}
|
2018-02-17 05:03:33 +01:00
|
|
|
|
|
|
|
|
// FIXME: We're supposed to process the WAIT_UNTIL register at this point,
|
|
|
|
|
// but no games seem to actually use it.
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ShutdownContext();
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-07 17:55:30 +01:00
|
|
|
void CommandProcessor::Pause() {
|
|
|
|
|
if (paused_) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
paused_ = true;
|
|
|
|
|
|
|
|
|
|
threading::Fence fence;
|
|
|
|
|
CallInThread([&fence]() {
|
|
|
|
|
fence.Signal();
|
|
|
|
|
threading::Thread::GetCurrentThread()->Suspend();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
fence.Wait();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::Resume() {
|
|
|
|
|
if (!paused_) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
paused_ = false;
|
|
|
|
|
|
|
|
|
|
worker_thread_->thread()->Resume();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool CommandProcessor::Save(ByteStream* stream) {
|
|
|
|
|
assert_true(paused_);
|
|
|
|
|
|
|
|
|
|
stream->Write<uint32_t>(primary_buffer_ptr_);
|
|
|
|
|
stream->Write<uint32_t>(primary_buffer_size_);
|
|
|
|
|
stream->Write<uint32_t>(read_ptr_index_);
|
|
|
|
|
stream->Write<uint32_t>(read_ptr_update_freq_);
|
|
|
|
|
stream->Write<uint32_t>(read_ptr_writeback_ptr_);
|
|
|
|
|
stream->Write<uint32_t>(write_ptr_index_.load());
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool CommandProcessor::Restore(ByteStream* stream) {
|
|
|
|
|
assert_true(paused_);
|
|
|
|
|
|
|
|
|
|
primary_buffer_ptr_ = stream->Read<uint32_t>();
|
|
|
|
|
primary_buffer_size_ = stream->Read<uint32_t>();
|
|
|
|
|
read_ptr_index_ = stream->Read<uint32_t>();
|
|
|
|
|
read_ptr_update_freq_ = stream->Read<uint32_t>();
|
|
|
|
|
read_ptr_writeback_ptr_ = stream->Read<uint32_t>();
|
|
|
|
|
write_ptr_index_.store(stream->Read<uint32_t>());
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
bool CommandProcessor::SetupContext() { return true; }
|
|
|
|
|
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
void CommandProcessor::ShutdownContext() {}
|
2015-11-08 20:54:36 +01:00
|
|
|
|
2021-05-04 22:13:55 +02:00
|
|
|
void CommandProcessor::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) {
|
2017-03-26 05:00:48 +02:00
|
|
|
read_ptr_index_ = 0;
|
2015-11-08 20:54:36 +01:00
|
|
|
primary_buffer_ptr_ = ptr;
|
2021-05-04 22:13:55 +02:00
|
|
|
primary_buffer_size_ = uint32_t(1) << (size_log2 + 3);
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr,
|
2021-05-04 22:13:55 +02:00
|
|
|
uint32_t block_size_log2) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C
|
|
|
|
|
// ptr = RB_RPTR_ADDR, pointer to write back the address to.
|
|
|
|
|
read_ptr_writeback_ptr_ = ptr;
|
|
|
|
|
// CP_RB_CNTL Ring Buffer Control 0x704
|
2021-05-04 22:13:55 +02:00
|
|
|
// block_size = RB_BLKSZ, log2 of number of quadwords read between updates of
|
|
|
|
|
// the read pointer.
|
|
|
|
|
read_ptr_update_freq_ = uint32_t(1) << block_size_log2 >> 2;
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::UpdateWritePointer(uint32_t value) {
|
|
|
|
|
write_ptr_index_ = value;
|
|
|
|
|
write_ptr_index_event_->Set();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|
|
|
|
RegisterFile* regs = register_file_;
|
|
|
|
|
if (index >= RegisterFile::kRegisterCount) {
|
2020-02-28 21:30:48 +01:00
|
|
|
XELOGW("CommandProcessor::WriteRegister index out of bounds: {}", index);
|
2015-11-08 20:54:36 +01:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
regs->values[index].u32 = value;
|
2016-06-25 17:32:48 +02:00
|
|
|
if (!regs->GetRegisterInfo(index)) {
|
2020-02-28 21:30:48 +01:00
|
|
|
XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value);
|
2016-06-25 17:32:48 +02:00
|
|
|
}
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
// If this is a COHER register, set the dirty flag.
|
|
|
|
|
// This will block the command processor the next time it WAIT_MEM_REGs and
|
|
|
|
|
// allow us to synchronize the memory.
|
|
|
|
|
if (index == XE_GPU_REG_COHER_STATUS_HOST) {
|
|
|
|
|
regs->values[index].u32 |= 0x80000000ul;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Scratch register writeback.
|
|
|
|
|
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
|
|
|
|
|
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
|
|
|
|
|
if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) {
|
|
|
|
|
// Enabled - write to address.
|
|
|
|
|
uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
|
|
|
|
|
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
|
|
|
|
|
xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-22 12:36:24 +02:00
|
|
|
void CommandProcessor::UpdateGammaRampValue(GammaRampType type,
|
|
|
|
|
uint32_t value) {
|
|
|
|
|
RegisterFile* regs = register_file_;
|
|
|
|
|
|
|
|
|
|
auto index = regs->values[XE_GPU_REG_DC_LUT_RW_INDEX].u32;
|
|
|
|
|
|
|
|
|
|
auto mask = regs->values[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32;
|
|
|
|
|
auto mask_lo = (mask >> 0) & 0x7;
|
|
|
|
|
auto mask_hi = (mask >> 3) & 0x7;
|
|
|
|
|
|
|
|
|
|
// If games update individual components we're going to have a problem.
|
|
|
|
|
assert_true(mask_lo == 0 || mask_lo == 7);
|
|
|
|
|
assert_true(mask_hi == 0);
|
|
|
|
|
|
|
|
|
|
if (mask_lo) {
|
|
|
|
|
switch (type) {
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
case GammaRampType::kTable:
|
2018-05-22 12:36:24 +02:00
|
|
|
assert_true(regs->values[XE_GPU_REG_DC_LUT_RW_MODE].u32 == 0);
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
gamma_ramp_.table[index].value = value;
|
|
|
|
|
dirty_gamma_ramp_table_ = true;
|
2018-05-22 12:36:24 +02:00
|
|
|
break;
|
|
|
|
|
case GammaRampType::kPWL:
|
|
|
|
|
assert_true(regs->values[XE_GPU_REG_DC_LUT_RW_MODE].u32 == 1);
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
// The lower 6 bits are hardwired to 0.
|
|
|
|
|
// https://developer.amd.com/wordpress/media/2012/10/RRG-216M56-03oOEM.pdf
|
|
|
|
|
gamma_ramp_.pwl[index].values[gamma_ramp_rw_subindex_].value =
|
|
|
|
|
value & ~(uint32_t(63) | (uint32_t(63) << 16));
|
2018-10-22 22:28:52 +02:00
|
|
|
gamma_ramp_rw_subindex_ = (gamma_ramp_rw_subindex_ + 1) % 3;
|
|
|
|
|
dirty_gamma_ramp_pwl_ = true;
|
2018-05-22 12:36:24 +02:00
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
assert_unhandled_case(type);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
void CommandProcessor::MakeCoherent() {
|
|
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
|
|
|
|
|
|
// Status host often has 0x01000000 or 0x03000000.
|
|
|
|
|
// This is likely toggling VC (vertex cache) or TC (texture cache).
|
|
|
|
|
// Or, it also has a direction in here maybe - there is probably
|
|
|
|
|
// some way to check for dest coherency (what all the COHER_DEST_BASE_*
|
|
|
|
|
// registers are for).
|
|
|
|
|
// Best docs I've found on this are here:
|
2018-11-22 16:20:09 +01:00
|
|
|
// https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
|
|
|
|
|
// https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
RegisterFile* regs = register_file_;
|
2019-10-20 18:40:37 +02:00
|
|
|
auto& status_host = regs->Get<reg::COHER_STATUS_HOST>();
|
2016-10-22 01:14:24 +02:00
|
|
|
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
|
|
|
|
|
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
|
2015-11-08 20:54:36 +01:00
|
|
|
|
2019-10-20 18:40:37 +02:00
|
|
|
if (!status_host.status) {
|
2015-11-08 20:54:36 +01:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-30 17:09:19 +02:00
|
|
|
const char* action = "N/A";
|
2019-10-20 18:40:37 +02:00
|
|
|
if (status_host.vc_action_ena && status_host.tc_action_ena) {
|
2018-05-30 02:42:59 +02:00
|
|
|
action = "VC | TC";
|
2019-10-20 18:40:37 +02:00
|
|
|
} else if (status_host.tc_action_ena) {
|
2018-05-30 02:42:59 +02:00
|
|
|
action = "TC";
|
2019-10-20 18:40:37 +02:00
|
|
|
} else if (status_host.vc_action_ena) {
|
2018-05-30 02:42:59 +02:00
|
|
|
action = "VC";
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
// TODO(benvanik): notify resource cache of base->size and type.
|
2020-02-28 21:30:48 +01:00
|
|
|
XELOGD("Make {:08X} -> {:08X} ({}b) coherent, action = {}", base_host,
|
2018-05-30 02:42:59 +02:00
|
|
|
base_host + size_host, size_host, action);
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
// Mark coherent.
|
2019-10-20 18:40:37 +02:00
|
|
|
status_host.status = 0;
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::ReturnFromWait() {}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
uint32_t CommandProcessor::ExecutePrimaryBuffer(uint32_t read_index,
|
|
|
|
|
uint32_t write_index) {
|
2015-11-08 20:54:36 +01:00
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
|
|
2015-12-31 07:58:22 +01:00
|
|
|
// If we have a pending trace stream open it now. That way we ensure we get
|
|
|
|
|
// all commands.
|
|
|
|
|
if (!trace_writer_.is_open() && trace_state_ == TraceState::kStreaming) {
|
|
|
|
|
uint32_t title_id = kernel_state_->GetExecutableModule()
|
|
|
|
|
? kernel_state_->GetExecutableModule()->title_id()
|
|
|
|
|
: 0;
|
2022-01-29 12:02:55 +01:00
|
|
|
auto file_name = fmt::format("{:08X}_stream.xtr", title_id);
|
2020-03-02 16:37:11 +01:00
|
|
|
auto path = trace_stream_path_ / file_name;
|
2015-12-31 07:58:22 +01:00
|
|
|
trace_writer_.Open(path, title_id);
|
2019-10-23 22:33:50 +02:00
|
|
|
InitializeTrace();
|
2015-12-31 07:58:22 +01:00
|
|
|
}
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
// Adjust pointer base.
|
2016-01-25 05:54:26 +01:00
|
|
|
uint32_t start_ptr = primary_buffer_ptr_ + read_index * sizeof(uint32_t);
|
2015-11-08 20:54:36 +01:00
|
|
|
start_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (start_ptr & 0x1FFFFFFF);
|
2016-01-25 05:54:26 +01:00
|
|
|
uint32_t end_ptr = primary_buffer_ptr_ + write_index * sizeof(uint32_t);
|
2015-11-08 20:54:36 +01:00
|
|
|
end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF);
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
trace_writer_.WritePrimaryBufferStart(start_ptr, write_index - read_index);
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
// Execute commands!
|
2016-01-25 05:54:26 +01:00
|
|
|
RingBuffer reader(memory_->TranslatePhysical(primary_buffer_ptr_),
|
|
|
|
|
primary_buffer_size_);
|
|
|
|
|
reader.set_read_offset(read_index * sizeof(uint32_t));
|
|
|
|
|
reader.set_write_offset(write_index * sizeof(uint32_t));
|
|
|
|
|
do {
|
|
|
|
|
if (!ExecutePacket(&reader)) {
|
|
|
|
|
// This probably should be fatal - but we're going to continue anyways.
|
|
|
|
|
XELOGE("**** PRIMARY RINGBUFFER: Failed to execute packet.");
|
|
|
|
|
assert_always();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} while (reader.read_count());
|
2015-11-08 20:54:36 +01:00
|
|
|
|
2019-12-04 19:42:26 +01:00
|
|
|
OnPrimaryBufferEnd();
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
trace_writer_.WritePrimaryBufferEnd();
|
2016-01-25 05:54:26 +01:00
|
|
|
|
|
|
|
|
return write_index;
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t count) {
|
2015-11-08 20:54:36 +01:00
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
trace_writer_.WriteIndirectBufferStart(ptr, count * sizeof(uint32_t));
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
// Execute commands!
|
2016-01-25 05:54:26 +01:00
|
|
|
RingBuffer reader(memory_->TranslatePhysical(ptr), count * sizeof(uint32_t));
|
|
|
|
|
reader.set_write_offset(count * sizeof(uint32_t));
|
|
|
|
|
do {
|
|
|
|
|
if (!ExecutePacket(&reader)) {
|
|
|
|
|
// Return up a level if we encounter a bad packet.
|
|
|
|
|
XELOGE("**** INDIRECT RINGBUFFER: Failed to execute packet.");
|
|
|
|
|
assert_always();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} while (reader.read_count());
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
trace_writer_.WriteIndirectBufferEnd();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommandProcessor::ExecutePacket(uint32_t ptr, uint32_t count) {
|
2016-01-25 05:54:26 +01:00
|
|
|
// Execute commands!
|
|
|
|
|
RingBuffer reader(memory_->TranslatePhysical(ptr), count * sizeof(uint32_t));
|
|
|
|
|
reader.set_write_offset(count * sizeof(uint32_t));
|
|
|
|
|
do {
|
|
|
|
|
if (!ExecutePacket(&reader)) {
|
|
|
|
|
XELOGE("**** ExecutePacket: Failed to execute packet.");
|
|
|
|
|
assert_always();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} while (reader.read_count());
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacket(RingBuffer* reader) {
|
2018-02-18 19:18:20 +01:00
|
|
|
const uint32_t packet = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
const uint32_t packet_type = packet >> 30;
|
|
|
|
|
if (packet == 0) {
|
2016-01-25 05:54:26 +01:00
|
|
|
trace_writer_.WritePacketStart(uint32_t(reader->read_ptr() - 4), 1);
|
2015-11-08 20:54:36 +01:00
|
|
|
trace_writer_.WritePacketEnd();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-28 19:35:07 +02:00
|
|
|
if (packet == 0xCDCDCDCD) {
|
|
|
|
|
XELOGW("GPU packet is CDCDCDCD - probably read uninitialized memory!");
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
switch (packet_type) {
|
|
|
|
|
case 0x00:
|
|
|
|
|
return ExecutePacketType0(reader, packet);
|
|
|
|
|
case 0x01:
|
|
|
|
|
return ExecutePacketType1(reader, packet);
|
|
|
|
|
case 0x02:
|
|
|
|
|
return ExecutePacketType2(reader, packet);
|
|
|
|
|
case 0x03:
|
|
|
|
|
return ExecutePacketType3(reader, packet);
|
|
|
|
|
default:
|
|
|
|
|
assert_unhandled_case(packet_type);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType0(RingBuffer* reader, uint32_t packet) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// Type-0 packet.
|
|
|
|
|
// Write count registers in sequence to the registers starting at
|
|
|
|
|
// (base_index << 2).
|
|
|
|
|
|
|
|
|
|
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
2016-01-25 05:54:26 +01:00
|
|
|
if (reader->read_count() < count * sizeof(uint32_t)) {
|
2020-02-28 21:30:48 +01:00
|
|
|
XELOGE(
|
|
|
|
|
"ExecutePacketType0 overflow (read count {:08X}, packet count {:08X})",
|
|
|
|
|
reader->read_count(), count * sizeof(uint32_t));
|
2016-01-25 05:54:26 +01:00
|
|
|
return false;
|
|
|
|
|
}
|
2015-11-08 20:54:36 +01:00
|
|
|
|
2017-12-16 05:01:50 +01:00
|
|
|
trace_writer_.WritePacketStart(uint32_t(reader->read_ptr() - 4), 1 + count);
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t base_index = (packet & 0x7FFF);
|
|
|
|
|
uint32_t write_one_reg = (packet >> 15) & 0x1;
|
|
|
|
|
for (uint32_t m = 0; m < count; m++) {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t reg_data = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t target_index = write_one_reg ? base_index : base_index + m;
|
|
|
|
|
WriteRegister(target_index, reg_data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
trace_writer_.WritePacketEnd();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType1(RingBuffer* reader, uint32_t packet) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// Type-1 packet.
|
|
|
|
|
// Contains two registers of data. Type-0 should be more common.
|
2016-01-25 05:54:26 +01:00
|
|
|
trace_writer_.WritePacketStart(uint32_t(reader->read_ptr() - 4), 3);
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t reg_index_1 = packet & 0x7FF;
|
|
|
|
|
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t reg_data_1 = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t reg_data_2 = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
WriteRegister(reg_index_1, reg_data_1);
|
|
|
|
|
WriteRegister(reg_index_2, reg_data_2);
|
|
|
|
|
trace_writer_.WritePacketEnd();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType2(RingBuffer* reader, uint32_t packet) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// Type-2 packet.
|
|
|
|
|
// No-op. Do nothing.
|
2016-01-25 05:54:26 +01:00
|
|
|
trace_writer_.WritePacketStart(uint32_t(reader->read_ptr() - 4), 1);
|
2015-11-08 20:54:36 +01:00
|
|
|
trace_writer_.WritePacketEnd();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// Type-3 packet.
|
|
|
|
|
uint32_t opcode = (packet >> 8) & 0x7F;
|
|
|
|
|
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
2016-01-25 05:54:26 +01:00
|
|
|
auto data_start_offset = reader->read_offset();
|
|
|
|
|
|
|
|
|
|
if (reader->read_count() < count * sizeof(uint32_t)) {
|
2020-02-28 21:30:48 +01:00
|
|
|
XELOGE(
|
|
|
|
|
"ExecutePacketType3 overflow (read count {:08X}, packet count {:08X})",
|
|
|
|
|
reader->read_count(), count * sizeof(uint32_t));
|
2016-01-25 05:54:26 +01:00
|
|
|
return false;
|
|
|
|
|
}
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
// To handle nesting behavior when tracing we special case indirect buffers.
|
|
|
|
|
if (opcode == PM4_INDIRECT_BUFFER) {
|
2016-01-25 05:54:26 +01:00
|
|
|
trace_writer_.WritePacketStart(uint32_t(reader->read_ptr() - 4), 2);
|
2015-11-08 20:54:36 +01:00
|
|
|
} else {
|
2016-01-25 05:54:26 +01:00
|
|
|
trace_writer_.WritePacketStart(uint32_t(reader->read_ptr() - 4), 1 + count);
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// & 1 == predicate - when set, we do bin check to see if we should execute
|
|
|
|
|
// the packet. Only type 3 packets are affected.
|
|
|
|
|
// We also skip predicated swaps, as they are never valid (probably?).
|
|
|
|
|
if (packet & 1) {
|
|
|
|
|
bool any_pass = (bin_select_ & bin_mask_) != 0;
|
|
|
|
|
if (!any_pass || opcode == PM4_XE_SWAP) {
|
2016-01-25 05:54:26 +01:00
|
|
|
reader->AdvanceRead(count * sizeof(uint32_t));
|
2015-11-08 20:54:36 +01:00
|
|
|
trace_writer_.WritePacketEnd();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool result = false;
|
|
|
|
|
switch (opcode) {
|
|
|
|
|
case PM4_ME_INIT:
|
|
|
|
|
result = ExecutePacketType3_ME_INIT(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_NOP:
|
|
|
|
|
result = ExecutePacketType3_NOP(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_INTERRUPT:
|
|
|
|
|
result = ExecutePacketType3_INTERRUPT(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_XE_SWAP:
|
|
|
|
|
result = ExecutePacketType3_XE_SWAP(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_INDIRECT_BUFFER:
|
2015-12-08 04:16:01 +01:00
|
|
|
case PM4_INDIRECT_BUFFER_PFD:
|
2015-11-08 20:54:36 +01:00
|
|
|
result = ExecutePacketType3_INDIRECT_BUFFER(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_WAIT_REG_MEM:
|
|
|
|
|
result = ExecutePacketType3_WAIT_REG_MEM(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_REG_RMW:
|
|
|
|
|
result = ExecutePacketType3_REG_RMW(reader, packet, count);
|
|
|
|
|
break;
|
2015-11-22 03:20:34 +01:00
|
|
|
case PM4_REG_TO_MEM:
|
|
|
|
|
result = ExecutePacketType3_REG_TO_MEM(reader, packet, count);
|
|
|
|
|
break;
|
2016-01-25 05:54:26 +01:00
|
|
|
case PM4_MEM_WRITE:
|
|
|
|
|
result = ExecutePacketType3_MEM_WRITE(reader, packet, count);
|
|
|
|
|
break;
|
2015-11-08 20:54:36 +01:00
|
|
|
case PM4_COND_WRITE:
|
|
|
|
|
result = ExecutePacketType3_COND_WRITE(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_EVENT_WRITE:
|
|
|
|
|
result = ExecutePacketType3_EVENT_WRITE(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_EVENT_WRITE_SHD:
|
|
|
|
|
result = ExecutePacketType3_EVENT_WRITE_SHD(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_EVENT_WRITE_EXT:
|
|
|
|
|
result = ExecutePacketType3_EVENT_WRITE_EXT(reader, packet, count);
|
|
|
|
|
break;
|
2016-01-25 05:54:26 +01:00
|
|
|
case PM4_EVENT_WRITE_ZPD:
|
2016-02-17 01:36:43 +01:00
|
|
|
result = ExecutePacketType3_EVENT_WRITE_ZPD(reader, packet, count);
|
2016-01-25 05:54:26 +01:00
|
|
|
break;
|
2015-11-08 20:54:36 +01:00
|
|
|
case PM4_DRAW_INDX:
|
|
|
|
|
result = ExecutePacketType3_DRAW_INDX(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_DRAW_INDX_2:
|
|
|
|
|
result = ExecutePacketType3_DRAW_INDX_2(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_SET_CONSTANT:
|
|
|
|
|
result = ExecutePacketType3_SET_CONSTANT(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_SET_CONSTANT2:
|
|
|
|
|
result = ExecutePacketType3_SET_CONSTANT2(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_LOAD_ALU_CONSTANT:
|
|
|
|
|
result = ExecutePacketType3_LOAD_ALU_CONSTANT(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_SET_SHADER_CONSTANTS:
|
|
|
|
|
result = ExecutePacketType3_SET_SHADER_CONSTANTS(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_IM_LOAD:
|
|
|
|
|
result = ExecutePacketType3_IM_LOAD(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_IM_LOAD_IMMEDIATE:
|
|
|
|
|
result = ExecutePacketType3_IM_LOAD_IMMEDIATE(reader, packet, count);
|
|
|
|
|
break;
|
|
|
|
|
case PM4_INVALIDATE_STATE:
|
|
|
|
|
result = ExecutePacketType3_INVALIDATE_STATE(reader, packet, count);
|
|
|
|
|
break;
|
2016-01-25 05:54:26 +01:00
|
|
|
case PM4_VIZ_QUERY:
|
2016-02-17 01:36:43 +01:00
|
|
|
result = ExecutePacketType3_VIZ_QUERY(reader, packet, count);
|
2016-01-25 05:54:26 +01:00
|
|
|
break;
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
case PM4_SET_BIN_MASK_LO: {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
bin_mask_ = (bin_mask_ & 0xFFFFFFFF00000000ull) | value;
|
|
|
|
|
result = true;
|
|
|
|
|
} break;
|
|
|
|
|
case PM4_SET_BIN_MASK_HI: {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
bin_mask_ =
|
|
|
|
|
(bin_mask_ & 0xFFFFFFFFull) | (static_cast<uint64_t>(value) << 32);
|
|
|
|
|
result = true;
|
|
|
|
|
} break;
|
|
|
|
|
case PM4_SET_BIN_SELECT_LO: {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
bin_select_ = (bin_select_ & 0xFFFFFFFF00000000ull) | value;
|
|
|
|
|
result = true;
|
|
|
|
|
} break;
|
|
|
|
|
case PM4_SET_BIN_SELECT_HI: {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
bin_select_ =
|
|
|
|
|
(bin_select_ & 0xFFFFFFFFull) | (static_cast<uint64_t>(value) << 32);
|
|
|
|
|
result = true;
|
|
|
|
|
} break;
|
2016-02-17 01:36:43 +01:00
|
|
|
case PM4_SET_BIN_MASK: {
|
|
|
|
|
assert_true(count == 2);
|
2018-02-18 19:18:20 +01:00
|
|
|
uint64_t val_hi = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint64_t val_lo = reader->ReadAndSwap<uint32_t>();
|
2016-02-17 01:36:43 +01:00
|
|
|
bin_mask_ = (val_hi << 32) | val_lo;
|
2016-01-25 05:54:26 +01:00
|
|
|
result = true;
|
2016-02-17 01:36:43 +01:00
|
|
|
} break;
|
|
|
|
|
case PM4_SET_BIN_SELECT: {
|
|
|
|
|
assert_true(count == 2);
|
2018-02-18 19:18:20 +01:00
|
|
|
uint64_t val_hi = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint64_t val_lo = reader->ReadAndSwap<uint32_t>();
|
2016-02-17 01:36:43 +01:00
|
|
|
bin_select_ = (val_hi << 32) | val_lo;
|
|
|
|
|
result = true;
|
|
|
|
|
} break;
|
2018-05-31 03:49:30 +02:00
|
|
|
case PM4_CONTEXT_UPDATE: {
|
|
|
|
|
assert_true(count == 1);
|
2020-11-10 19:28:46 +01:00
|
|
|
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
2020-02-28 21:30:48 +01:00
|
|
|
XELOGGPU("GPU context update = {:08X}", value);
|
2018-05-31 03:49:30 +02:00
|
|
|
assert_true(value == 0);
|
|
|
|
|
result = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2020-11-10 19:28:46 +01:00
|
|
|
case PM4_WAIT_FOR_IDLE: {
|
2021-09-05 20:03:05 +02:00
|
|
|
// This opcode is used by 5454084E while going / being ingame.
|
2020-11-10 19:28:46 +01:00
|
|
|
assert_true(count == 1);
|
|
|
|
|
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
XELOGGPU("GPU wait for idle = {:08X}", value);
|
|
|
|
|
result = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2015-11-22 03:20:34 +01:00
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
default:
|
2020-04-09 18:16:56 +02:00
|
|
|
XELOGGPU("Unimplemented GPU OPCODE: 0x{:02X}\t\tCOUNT: {}\n", opcode,
|
2015-11-22 04:07:07 +01:00
|
|
|
count);
|
2016-10-22 01:14:24 +02:00
|
|
|
assert_always();
|
2016-01-25 05:54:26 +01:00
|
|
|
reader->AdvanceRead(count * sizeof(uint32_t));
|
2015-11-08 20:54:36 +01:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
trace_writer_.WritePacketEnd();
|
2017-12-16 05:01:50 +01:00
|
|
|
if (opcode == PM4_XE_SWAP) {
|
|
|
|
|
// End the trace writer frame.
|
|
|
|
|
if (trace_writer_.is_open()) {
|
|
|
|
|
trace_writer_.WriteEvent(EventCommand::Type::kSwap);
|
|
|
|
|
trace_writer_.Flush();
|
|
|
|
|
if (trace_state_ == TraceState::kSingleFrame) {
|
|
|
|
|
trace_state_ = TraceState::kDisabled;
|
|
|
|
|
trace_writer_.Close();
|
|
|
|
|
}
|
|
|
|
|
} else if (trace_state_ == TraceState::kSingleFrame) {
|
|
|
|
|
// New trace request - we only start tracing at the beginning of a frame.
|
|
|
|
|
uint32_t title_id = kernel_state_->GetExecutableModule()->title_id();
|
2022-01-29 12:02:55 +01:00
|
|
|
auto file_name = fmt::format("{:08X}_{}.xtr", title_id, counter_ - 1);
|
2020-03-02 16:37:11 +01:00
|
|
|
auto path = trace_frame_path_ / file_name;
|
2017-12-16 05:01:50 +01:00
|
|
|
trace_writer_.Open(path, title_id);
|
2019-10-23 22:33:50 +02:00
|
|
|
InitializeTrace();
|
2017-12-16 05:01:50 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
assert_true(reader->read_offset() ==
|
|
|
|
|
(data_start_offset + (count * sizeof(uint32_t))) %
|
|
|
|
|
reader->capacity());
|
2015-11-08 20:54:36 +01:00
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_ME_INIT(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
// initialize CP's micro-engine
|
2018-04-08 22:50:10 +02:00
|
|
|
me_bin_.clear();
|
|
|
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
|
|
|
me_bin_.push_back(reader->ReadAndSwap<uint32_t>());
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_NOP(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet, uint32_t count) {
|
|
|
|
|
// skip N 32-bit words to get to the next packet
|
|
|
|
|
// No-op, ignore some data.
|
2016-01-25 05:54:26 +01:00
|
|
|
reader->AdvanceRead(count * sizeof(uint32_t));
|
2015-11-08 20:54:36 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
|
|
|
|
|
|
// generate interrupt from the command stream
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t cpu_mask = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
for (int n = 0; n < 6; n++) {
|
|
|
|
|
if (cpu_mask & (1 << n)) {
|
|
|
|
|
graphics_system_->DispatchInterruptCallback(1, n);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
|
|
|
|
|
|
XELOGI("XE_SWAP");
|
|
|
|
|
|
2016-02-08 02:23:05 +01:00
|
|
|
Profiler::Flip();
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
// Xenia-specific VdSwap hook.
|
|
|
|
|
// VdSwap will post this to tell us we need to swap the screen/fire an
|
|
|
|
|
// interrupt.
|
|
|
|
|
// 63 words here, but only the first has any data.
|
2021-05-06 18:46:43 +02:00
|
|
|
uint32_t magic = reader->ReadAndSwap<fourcc_t>();
|
|
|
|
|
assert_true(magic == kSwapSignature);
|
2015-11-08 20:54:36 +01:00
|
|
|
|
|
|
|
|
// TODO(benvanik): only swap frontbuffer ptr.
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t frontbuffer_ptr = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t frontbuffer_width = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t frontbuffer_height = reader->ReadAndSwap<uint32_t>();
|
2016-01-25 05:54:26 +01:00
|
|
|
reader->AdvanceRead((count - 4) * sizeof(uint32_t));
|
2015-11-08 20:54:36 +01:00
|
|
|
|
[UI] Image post-processing and full presentation/window rework
[GPU] Add FXAA post-processing
[UI] Add FidelityFX FSR and CAS post-processing
[UI] Add blue noise dithering from 10bpc to 8bpc
[GPU] Apply the DC PWL gamma ramp closer to the spec, supporting fully white color
[UI] Allow the GPU CP thread to present on the host directly, bypassing the UI thread OS paint event
[UI] Allow variable refresh rate (or tearing)
[UI] Present the newest frame (restart) on DXGI
[UI] Replace GraphicsContext with a far more advanced Presenter with more coherent surface connection and UI overlay state management
[UI] Connect presentation to windows via the Surface class, not native window handles
[Vulkan] Switch to simpler Vulkan setup with no instance/device separation due to interdependencies and to pass fewer objects around
[Vulkan] Lower the minimum required Vulkan version to 1.0
[UI/GPU] Various cleanup, mainly ComPtr usage
[UI] Support per-monitor DPI awareness v2 on Windows
[UI] DPI-scale Dear ImGui
[UI] Replace the remaining non-detachable window delegates with unified window event and input listeners
[UI] Allow listeners to safely destroy or close the window, and to register/unregister listeners without use-after-free and the ABA problem
[UI] Explicit Z ordering of input listeners and UI overlays, top-down for input, bottom-up for drawing
[UI] Add explicit window lifecycle phases
[UI] Replace Window virtual functions with explicit desired state, its application, actual state, its feedback
[UI] GTK: Apply the initial size to the drawing area
[UI] Limit internal UI frame rate to that of the monitor
[UI] Hide the cursor using a timer instead of polling due to no repeated UI thread paints with GPU CP thread presentation, and only within the window
2022-01-29 11:22:03 +01:00
|
|
|
if (!ignore_swap_) {
|
2015-11-08 20:54:36 +01:00
|
|
|
IssueSwap(frontbuffer_ptr, frontbuffer_width, frontbuffer_height);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
++counter_;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_INDIRECT_BUFFER(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// indirect buffer dispatch
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t list_ptr = CpuToGpu(reader->ReadAndSwap<uint32_t>());
|
|
|
|
|
uint32_t list_length = reader->ReadAndSwap<uint32_t>();
|
2016-10-22 01:14:24 +02:00
|
|
|
assert_zero(list_length & ~0xFFFFF);
|
|
|
|
|
list_length &= 0xFFFFF;
|
2015-11-08 20:54:36 +01:00
|
|
|
ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
|
|
|
|
|
|
// wait until a register or memory location is a specific value
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t wait_info = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t poll_reg_addr = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t ref = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t mask = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t wait = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
bool matched = false;
|
|
|
|
|
do {
|
|
|
|
|
uint32_t value;
|
|
|
|
|
if (wait_info & 0x10) {
|
|
|
|
|
// Memory.
|
2020-07-11 14:54:22 +02:00
|
|
|
auto endianness = static_cast<xenos::Endian>(poll_reg_addr & 0x3);
|
2015-11-08 20:54:36 +01:00
|
|
|
poll_reg_addr &= ~0x3;
|
|
|
|
|
value = xe::load<uint32_t>(memory_->TranslatePhysical(poll_reg_addr));
|
|
|
|
|
value = GpuSwap(value, endianness);
|
|
|
|
|
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4);
|
|
|
|
|
} else {
|
|
|
|
|
// Register.
|
|
|
|
|
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
|
|
|
|
|
value = register_file_->values[poll_reg_addr].u32;
|
|
|
|
|
if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
|
|
|
|
|
MakeCoherent();
|
|
|
|
|
value = register_file_->values[poll_reg_addr].u32;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
switch (wait_info & 0x7) {
|
|
|
|
|
case 0x0: // Never.
|
|
|
|
|
matched = false;
|
|
|
|
|
break;
|
|
|
|
|
case 0x1: // Less than reference.
|
|
|
|
|
matched = (value & mask) < ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x2: // Less than or equal to reference.
|
|
|
|
|
matched = (value & mask) <= ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x3: // Equal to reference.
|
|
|
|
|
matched = (value & mask) == ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x4: // Not equal to reference.
|
|
|
|
|
matched = (value & mask) != ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x5: // Greater than or equal to reference.
|
|
|
|
|
matched = (value & mask) >= ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x6: // Greater than reference.
|
|
|
|
|
matched = (value & mask) > ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x7: // Always
|
|
|
|
|
matched = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (!matched) {
|
|
|
|
|
// Wait.
|
|
|
|
|
if (wait >= 0x100) {
|
|
|
|
|
PrepareForWait();
|
2019-04-17 21:49:29 +02:00
|
|
|
if (!cvars::vsync) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// User wants it fast and dangerous.
|
|
|
|
|
xe::threading::MaybeYield();
|
|
|
|
|
} else {
|
|
|
|
|
xe::threading::Sleep(std::chrono::milliseconds(wait / 0x100));
|
|
|
|
|
}
|
|
|
|
|
xe::threading::SyncMemory();
|
|
|
|
|
ReturnFromWait();
|
2017-12-19 00:23:14 +01:00
|
|
|
|
|
|
|
|
if (!worker_running_) {
|
|
|
|
|
// Short-circuited exit.
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2015-11-08 20:54:36 +01:00
|
|
|
} else {
|
|
|
|
|
xe::threading::MaybeYield();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} while (!matched);
|
2017-12-19 00:23:14 +01:00
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_REG_RMW(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
// register read/modify/write
|
|
|
|
|
// ? (used during shader upload and edram setup)
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t rmw_info = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t and_mask = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t or_mask = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32;
|
|
|
|
|
if ((rmw_info >> 31) & 0x1) {
|
|
|
|
|
// & reg
|
|
|
|
|
value &= register_file_->values[and_mask & 0x1FFF].u32;
|
|
|
|
|
} else {
|
|
|
|
|
// & imm
|
|
|
|
|
value &= and_mask;
|
|
|
|
|
}
|
2016-02-17 01:36:43 +01:00
|
|
|
if ((rmw_info >> 30) & 0x1) {
|
|
|
|
|
// | reg
|
|
|
|
|
value |= register_file_->values[or_mask & 0x1FFF].u32;
|
|
|
|
|
} else {
|
|
|
|
|
// | imm
|
|
|
|
|
value |= or_mask;
|
|
|
|
|
}
|
2015-11-08 20:54:36 +01:00
|
|
|
WriteRegister(rmw_info & 0x1FFF, value);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_REG_TO_MEM(RingBuffer* reader,
|
2015-11-22 04:07:07 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2015-11-22 03:20:34 +01:00
|
|
|
// Copy Register to Memory (?)
|
|
|
|
|
// Count is 2, assuming a Register Addr and a Memory Addr.
|
|
|
|
|
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t reg_addr = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t mem_addr = reader->ReadAndSwap<uint32_t>();
|
2015-11-22 03:20:34 +01:00
|
|
|
|
|
|
|
|
uint32_t reg_val;
|
|
|
|
|
|
|
|
|
|
assert_true(reg_addr < RegisterFile::kRegisterCount);
|
|
|
|
|
reg_val = register_file_->values[reg_addr].u32;
|
|
|
|
|
|
2020-07-11 14:54:22 +02:00
|
|
|
auto endianness = static_cast<xenos::Endian>(mem_addr & 0x3);
|
2015-11-22 03:20:34 +01:00
|
|
|
mem_addr &= ~0x3;
|
|
|
|
|
reg_val = GpuSwap(reg_val, endianness);
|
|
|
|
|
xe::store(memory_->TranslatePhysical(mem_addr), reg_val);
|
|
|
|
|
trace_writer_.WriteMemoryWrite(CpuToGpu(mem_addr), 4);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_MEM_WRITE(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t write_addr = reader->ReadAndSwap<uint32_t>();
|
2016-01-25 05:54:26 +01:00
|
|
|
for (uint32_t i = 0; i < count - 1; i++) {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t write_data = reader->ReadAndSwap<uint32_t>();
|
2016-01-25 05:54:26 +01:00
|
|
|
|
2020-07-11 14:54:22 +02:00
|
|
|
auto endianness = static_cast<xenos::Endian>(write_addr & 0x3);
|
2016-01-25 05:54:26 +01:00
|
|
|
auto addr = write_addr & ~0x3;
|
|
|
|
|
write_data = GpuSwap(write_data, endianness);
|
|
|
|
|
xe::store(memory_->TranslatePhysical(addr), write_data);
|
|
|
|
|
trace_writer_.WriteMemoryWrite(CpuToGpu(addr), 4);
|
|
|
|
|
write_addr += 4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
// conditional write to memory or register
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t wait_info = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t poll_reg_addr = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t ref = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t mask = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t write_reg_addr = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t write_data = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t value;
|
|
|
|
|
if (wait_info & 0x10) {
|
|
|
|
|
// Memory.
|
2020-07-11 14:54:22 +02:00
|
|
|
auto endianness = static_cast<xenos::Endian>(poll_reg_addr & 0x3);
|
2015-11-08 20:54:36 +01:00
|
|
|
poll_reg_addr &= ~0x3;
|
|
|
|
|
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4);
|
|
|
|
|
value = xe::load<uint32_t>(memory_->TranslatePhysical(poll_reg_addr));
|
|
|
|
|
value = GpuSwap(value, endianness);
|
|
|
|
|
} else {
|
|
|
|
|
// Register.
|
|
|
|
|
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
|
|
|
|
|
value = register_file_->values[poll_reg_addr].u32;
|
|
|
|
|
}
|
|
|
|
|
bool matched = false;
|
|
|
|
|
switch (wait_info & 0x7) {
|
|
|
|
|
case 0x0: // Never.
|
|
|
|
|
matched = false;
|
|
|
|
|
break;
|
|
|
|
|
case 0x1: // Less than reference.
|
|
|
|
|
matched = (value & mask) < ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x2: // Less than or equal to reference.
|
|
|
|
|
matched = (value & mask) <= ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x3: // Equal to reference.
|
|
|
|
|
matched = (value & mask) == ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x4: // Not equal to reference.
|
|
|
|
|
matched = (value & mask) != ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x5: // Greater than or equal to reference.
|
|
|
|
|
matched = (value & mask) >= ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x6: // Greater than reference.
|
|
|
|
|
matched = (value & mask) > ref;
|
|
|
|
|
break;
|
|
|
|
|
case 0x7: // Always
|
|
|
|
|
matched = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (matched) {
|
|
|
|
|
// Write.
|
|
|
|
|
if (wait_info & 0x100) {
|
|
|
|
|
// Memory.
|
2020-07-11 14:54:22 +02:00
|
|
|
auto endianness = static_cast<xenos::Endian>(write_reg_addr & 0x3);
|
2015-11-08 20:54:36 +01:00
|
|
|
write_reg_addr &= ~0x3;
|
|
|
|
|
write_data = GpuSwap(write_data, endianness);
|
|
|
|
|
xe::store(memory_->TranslatePhysical(write_reg_addr), write_data);
|
|
|
|
|
trace_writer_.WriteMemoryWrite(CpuToGpu(write_reg_addr), 4);
|
|
|
|
|
} else {
|
|
|
|
|
// Register.
|
|
|
|
|
WriteRegister(write_reg_addr, write_data);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
// generate an event that creates a write to memory when completed
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t initiator = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
// Writeback initiator.
|
|
|
|
|
WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F);
|
|
|
|
|
if (count == 1) {
|
|
|
|
|
// Just an event flag? Where does this write?
|
|
|
|
|
} else {
|
|
|
|
|
// Write to an address.
|
|
|
|
|
assert_always();
|
2016-01-25 05:54:26 +01:00
|
|
|
reader->AdvanceRead((count - 1) * sizeof(uint32_t));
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// generate a VS|PS_done event
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t initiator = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t address = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
// Writeback initiator.
|
|
|
|
|
WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F);
|
|
|
|
|
uint32_t data_value;
|
|
|
|
|
if ((initiator >> 31) & 0x1) {
|
|
|
|
|
// Write counter (GPU vblank counter?).
|
|
|
|
|
data_value = counter_;
|
|
|
|
|
} else {
|
|
|
|
|
// Write value.
|
|
|
|
|
data_value = value;
|
|
|
|
|
}
|
2020-07-11 14:54:22 +02:00
|
|
|
auto endianness = static_cast<xenos::Endian>(address & 0x3);
|
2015-11-08 20:54:36 +01:00
|
|
|
address &= ~0x3;
|
|
|
|
|
data_value = GpuSwap(data_value, endianness);
|
|
|
|
|
xe::store(memory_->TranslatePhysical(address), data_value);
|
|
|
|
|
trace_writer_.WriteMemoryWrite(CpuToGpu(address), 4);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// generate a screen extent event
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t initiator = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t address = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
// Writeback initiator.
|
|
|
|
|
WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F);
|
2020-07-11 14:54:22 +02:00
|
|
|
auto endianness = static_cast<xenos::Endian>(address & 0x3);
|
2015-11-08 20:54:36 +01:00
|
|
|
address &= ~0x3;
|
2018-02-17 05:03:33 +01:00
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
// Let us hope we can fake this.
|
2018-02-17 05:03:33 +01:00
|
|
|
// This callback tells the driver the xy coordinates affected by a previous
|
|
|
|
|
// drawcall.
|
|
|
|
|
// https://www.google.com/patents/US20060055701
|
2015-11-08 20:54:36 +01:00
|
|
|
uint16_t extents[] = {
|
2020-07-11 17:56:56 +02:00
|
|
|
0 >> 3, // min x
|
|
|
|
|
xenos::kTexture2DCubeMaxWidthHeight >> 3, // max x
|
|
|
|
|
0 >> 3, // min y
|
|
|
|
|
xenos::kTexture2DCubeMaxWidthHeight >> 3, // max y
|
|
|
|
|
0, // min z
|
|
|
|
|
1, // max z
|
2015-11-08 20:54:36 +01:00
|
|
|
};
|
2020-07-11 14:54:22 +02:00
|
|
|
assert_true(endianness == xenos::Endian::k8in16);
|
2018-04-30 01:26:36 +02:00
|
|
|
xe::copy_and_swap_16_unaligned(memory_->TranslatePhysical(address), extents,
|
|
|
|
|
xe::countof(extents));
|
2015-11-08 20:54:36 +01:00
|
|
|
trace_writer_.WriteMemoryWrite(CpuToGpu(address), sizeof(extents));
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-17 01:36:43 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2021-05-20 22:21:41 +02:00
|
|
|
// Set by D3D as BE but struct ABI is LE
|
2021-05-17 17:05:12 +02:00
|
|
|
const uint32_t kQueryFinished = xe::byte_swap(0xFFFFFEED);
|
2016-02-17 01:36:43 +01:00
|
|
|
assert_true(count == 1);
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t initiator = reader->ReadAndSwap<uint32_t>();
|
2016-02-17 01:36:43 +01:00
|
|
|
// Writeback initiator.
|
|
|
|
|
WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F);
|
|
|
|
|
|
2020-06-01 14:00:22 +02:00
|
|
|
// Occlusion queries:
|
|
|
|
|
// This command is send on query begin and end.
|
|
|
|
|
// As a workaround report some fixed amount of passed samples.
|
|
|
|
|
auto fake_sample_count = cvars::query_occlusion_fake_sample_count;
|
|
|
|
|
if (fake_sample_count >= 0) {
|
|
|
|
|
auto* pSampleCounts =
|
|
|
|
|
memory_->TranslatePhysical<xe_gpu_depth_sample_counts*>(
|
|
|
|
|
register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32);
|
|
|
|
|
// 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END
|
|
|
|
|
// and used to detect a finished query.
|
2021-05-17 17:05:12 +02:00
|
|
|
bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
|
|
|
|
|
pSampleCounts->ZPass_B == kQueryFinished;
|
2021-09-05 20:03:05 +02:00
|
|
|
// Older versions of D3D also checks for ZFail (4D5307D5).
|
2021-05-17 17:05:12 +02:00
|
|
|
bool is_end_via_z_fail = pSampleCounts->ZFail_A == kQueryFinished &&
|
|
|
|
|
pSampleCounts->ZFail_B == kQueryFinished;
|
2020-06-01 14:00:22 +02:00
|
|
|
std::memset(pSampleCounts, 0, sizeof(xe_gpu_depth_sample_counts));
|
2021-05-17 17:05:12 +02:00
|
|
|
if (is_end_via_z_pass || is_end_via_z_fail) {
|
2020-06-01 14:00:22 +02:00
|
|
|
pSampleCounts->ZPass_A = fake_sample_count;
|
|
|
|
|
pSampleCounts->Total_A = fake_sample_count;
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-02-17 01:36:43 +01:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-04 22:56:25 +02:00
|
|
|
bool CommandProcessor::ExecutePacketType3Draw(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
const char* opcode_name,
|
|
|
|
|
uint32_t viz_query_condition,
|
|
|
|
|
uint32_t count_remaining) {
|
|
|
|
|
// if viz_query_condition != 0, this is a conditional draw based on viz query.
|
2016-02-17 01:36:43 +01:00
|
|
|
// This ID matches the one issued in PM4_VIZ_QUERY
|
2021-06-04 22:56:25 +02:00
|
|
|
// uint32_t viz_id = viz_query_condition & 0x3F;
|
2020-11-20 02:20:28 +01:00
|
|
|
// when true, render conditionally based on query result
|
2021-06-04 22:56:25 +02:00
|
|
|
// uint32_t viz_use = viz_query_condition & 0x100;
|
2020-11-20 02:20:28 +01:00
|
|
|
|
2021-06-04 22:56:25 +02:00
|
|
|
assert_not_zero(count_remaining);
|
|
|
|
|
if (!count_remaining) {
|
|
|
|
|
XELOGE("{}: Packet too small, can't read VGT_DRAW_INITIATOR", opcode_name);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2020-02-24 21:27:25 +01:00
|
|
|
reg::VGT_DRAW_INITIATOR vgt_draw_initiator;
|
|
|
|
|
vgt_draw_initiator.value = reader->ReadAndSwap<uint32_t>();
|
2021-06-04 22:56:25 +02:00
|
|
|
--count_remaining;
|
2020-02-24 21:27:25 +01:00
|
|
|
WriteRegister(XE_GPU_REG_VGT_DRAW_INITIATOR, vgt_draw_initiator.value);
|
|
|
|
|
|
2021-06-04 22:56:25 +02:00
|
|
|
bool success = true;
|
|
|
|
|
// TODO(Triang3l): Remove IndexBufferInfo and replace handling of all this
|
|
|
|
|
// with PrimitiveProcessor when the old Vulkan renderer is removed.
|
2015-11-08 20:54:36 +01:00
|
|
|
bool is_indexed = false;
|
|
|
|
|
IndexBufferInfo index_buffer_info;
|
2020-02-24 21:27:25 +01:00
|
|
|
switch (vgt_draw_initiator.source_select) {
|
|
|
|
|
case xenos::SourceSelect::kDMA: {
|
|
|
|
|
// Indexed draw.
|
|
|
|
|
is_indexed = true;
|
2021-06-04 22:56:25 +02:00
|
|
|
|
|
|
|
|
// Two separate bounds checks so if there's only one missing register
|
|
|
|
|
// value out of two, one uint32_t will be skipped in the command buffer,
|
|
|
|
|
// not two.
|
|
|
|
|
assert_not_zero(count_remaining);
|
|
|
|
|
if (!count_remaining) {
|
|
|
|
|
XELOGE("{}: Packet too small, can't read VGT_DMA_BASE", opcode_name);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
uint32_t vgt_dma_base = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
--count_remaining;
|
|
|
|
|
WriteRegister(XE_GPU_REG_VGT_DMA_BASE, vgt_dma_base);
|
|
|
|
|
reg::VGT_DMA_SIZE vgt_dma_size;
|
|
|
|
|
assert_not_zero(count_remaining);
|
|
|
|
|
if (!count_remaining) {
|
|
|
|
|
XELOGE("{}: Packet too small, can't read VGT_DMA_SIZE", opcode_name);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
vgt_dma_size.value = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
--count_remaining;
|
|
|
|
|
WriteRegister(XE_GPU_REG_VGT_DMA_SIZE, vgt_dma_size.value);
|
|
|
|
|
|
|
|
|
|
uint32_t index_size_bytes =
|
|
|
|
|
vgt_draw_initiator.index_size == xenos::IndexFormat::kInt16
|
|
|
|
|
? sizeof(uint16_t)
|
|
|
|
|
: sizeof(uint32_t);
|
|
|
|
|
// The base address must already be word-aligned according to the R6xx
|
|
|
|
|
// documentation, but for safety.
|
|
|
|
|
index_buffer_info.guest_base = vgt_dma_base & ~(index_size_bytes - 1);
|
|
|
|
|
index_buffer_info.endianness = vgt_dma_size.swap_mode;
|
2020-02-24 21:27:25 +01:00
|
|
|
index_buffer_info.format = vgt_draw_initiator.index_size;
|
2021-06-04 22:56:25 +02:00
|
|
|
index_buffer_info.length = vgt_dma_size.num_words * index_size_bytes;
|
2020-02-24 21:27:25 +01:00
|
|
|
index_buffer_info.count = vgt_draw_initiator.num_indices;
|
|
|
|
|
} break;
|
|
|
|
|
case xenos::SourceSelect::kImmediate: {
|
|
|
|
|
// TODO(Triang3l): VGT_IMMED_DATA.
|
2021-06-04 22:56:25 +02:00
|
|
|
XELOGE(
|
|
|
|
|
"{}: Using immediate vertex indices, which are not supported yet. "
|
|
|
|
|
"Report the game to Xenia developers!",
|
|
|
|
|
opcode_name, uint32_t(vgt_draw_initiator.source_select));
|
|
|
|
|
success = false;
|
2020-02-24 21:27:25 +01:00
|
|
|
assert_always();
|
|
|
|
|
} break;
|
|
|
|
|
case xenos::SourceSelect::kAutoIndex: {
|
|
|
|
|
// Auto draw.
|
|
|
|
|
index_buffer_info.guest_base = 0;
|
|
|
|
|
index_buffer_info.length = 0;
|
|
|
|
|
} break;
|
|
|
|
|
default: {
|
2021-06-04 22:56:25 +02:00
|
|
|
// Invalid source selection.
|
|
|
|
|
success = false;
|
|
|
|
|
assert_unhandled_case(vgt_draw_initiator.source_select);
|
2020-02-24 21:27:25 +01:00
|
|
|
} break;
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
2016-02-17 01:36:43 +01:00
|
|
|
|
2021-06-04 22:56:25 +02:00
|
|
|
// Skip to the next command, for example, if there are immediate indexes that
|
|
|
|
|
// we don't support yet.
|
|
|
|
|
reader->AdvanceRead(count_remaining * sizeof(uint32_t));
|
|
|
|
|
|
|
|
|
|
if (success) {
|
|
|
|
|
auto viz_query = register_file_->Get<reg::PA_SC_VIZ_QUERY>();
|
|
|
|
|
if (!(viz_query.viz_query_ena && viz_query.kill_pix_post_hi_z)) {
|
|
|
|
|
// TODO(Triang3l): Don't drop the draw call completely if the vertex
|
|
|
|
|
// shader has memexport.
|
|
|
|
|
// TODO(Triang3l || JoelLinn): Handle this properly in the render
|
|
|
|
|
// backends.
|
|
|
|
|
success = IssueDraw(
|
|
|
|
|
vgt_draw_initiator.prim_type, vgt_draw_initiator.num_indices,
|
|
|
|
|
is_indexed ? &index_buffer_info : nullptr,
|
|
|
|
|
xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode,
|
|
|
|
|
vgt_draw_initiator.prim_type));
|
|
|
|
|
if (!success) {
|
|
|
|
|
XELOGE("{}({}, {}, {}): Failed in backend", opcode_name,
|
|
|
|
|
vgt_draw_initiator.num_indices,
|
|
|
|
|
uint32_t(vgt_draw_initiator.prim_type),
|
|
|
|
|
uint32_t(vgt_draw_initiator.source_select));
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-11-20 02:20:28 +01:00
|
|
|
}
|
|
|
|
|
|
2021-06-04 22:56:25 +02:00
|
|
|
return success;
|
|
|
|
|
}
|
2016-10-22 01:14:24 +02:00
|
|
|
|
2021-06-04 22:56:25 +02:00
|
|
|
bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
// "initiate fetch of index buffer and draw"
|
|
|
|
|
// Generally used by Xbox 360 Direct3D 9 for kDMA and kAutoIndex sources.
|
|
|
|
|
// With a viz query token as the first one.
|
|
|
|
|
uint32_t count_remaining = count;
|
|
|
|
|
assert_not_zero(count_remaining);
|
|
|
|
|
if (!count_remaining) {
|
|
|
|
|
XELOGE("PM4_DRAW_INDX: Packet too small, can't read the viz query token");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
uint32_t viz_query_condition = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
--count_remaining;
|
|
|
|
|
return ExecutePacketType3Draw(reader, packet, "PM4_DRAW_INDX",
|
|
|
|
|
viz_query_condition, count_remaining);
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2021-06-04 22:56:25 +02:00
|
|
|
// "draw using supplied indices in packet"
|
|
|
|
|
// Generally used by Xbox 360 Direct3D 9 for kAutoIndex source.
|
|
|
|
|
// No viz query token.
|
|
|
|
|
return ExecutePacketType3Draw(reader, packet, "PM4_DRAW_INDX_2", 0, count);
|
2015-11-08 20:54:36 +01:00
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
// load constant into chip and to memory
|
|
|
|
|
// PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
|
|
|
|
|
// reg - 0x2000
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t offset_type = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t index = offset_type & 0x7FF;
|
|
|
|
|
uint32_t type = (offset_type >> 16) & 0xFF;
|
|
|
|
|
switch (type) {
|
|
|
|
|
case 0: // ALU
|
|
|
|
|
index += 0x4000;
|
|
|
|
|
break;
|
|
|
|
|
case 1: // FETCH
|
|
|
|
|
index += 0x4800;
|
|
|
|
|
break;
|
|
|
|
|
case 2: // BOOL
|
|
|
|
|
index += 0x4900;
|
|
|
|
|
break;
|
|
|
|
|
case 3: // LOOP
|
|
|
|
|
index += 0x4908;
|
|
|
|
|
break;
|
|
|
|
|
case 4: // REGISTERS
|
|
|
|
|
index += 0x2000;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
assert_always();
|
2016-01-25 05:54:26 +01:00
|
|
|
reader->AdvanceRead((count - 1) * sizeof(uint32_t));
|
2015-11-08 20:54:36 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
for (uint32_t n = 0; n < count - 1; n++, index++) {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t data = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
WriteRegister(index, data);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_SET_CONSTANT2(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t offset_type = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t index = offset_type & 0xFFFF;
|
|
|
|
|
for (uint32_t n = 0; n < count - 1; n++, index++) {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t data = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
WriteRegister(index, data);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// load constants from memory
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t address = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
address &= 0x3FFFFFFF;
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t offset_type = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t index = offset_type & 0x7FF;
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t size_dwords = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
size_dwords &= 0xFFF;
|
|
|
|
|
uint32_t type = (offset_type >> 16) & 0xFF;
|
|
|
|
|
switch (type) {
|
|
|
|
|
case 0: // ALU
|
|
|
|
|
index += 0x4000;
|
|
|
|
|
break;
|
|
|
|
|
case 1: // FETCH
|
|
|
|
|
index += 0x4800;
|
|
|
|
|
break;
|
|
|
|
|
case 2: // BOOL
|
|
|
|
|
index += 0x4900;
|
|
|
|
|
break;
|
|
|
|
|
case 3: // LOOP
|
|
|
|
|
index += 0x4908;
|
|
|
|
|
break;
|
|
|
|
|
case 4: // REGISTERS
|
|
|
|
|
index += 0x2000;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
assert_always();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
trace_writer_.WriteMemoryRead(CpuToGpu(address), size_dwords * 4);
|
|
|
|
|
for (uint32_t n = 0; n < size_dwords; n++, index++) {
|
|
|
|
|
uint32_t data = xe::load_and_swap<uint32_t>(
|
|
|
|
|
memory_->TranslatePhysical(address + n * 4));
|
|
|
|
|
WriteRegister(index, data);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool CommandProcessor::ExecutePacketType3_SET_SHADER_CONSTANTS(
|
2016-01-25 05:54:26 +01:00
|
|
|
RingBuffer* reader, uint32_t packet, uint32_t count) {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t offset_type = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t index = offset_type & 0xFFFF;
|
|
|
|
|
for (uint32_t n = 0; n < count - 1; n++, index++) {
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t data = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
WriteRegister(index, data);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingBuffer* reader,
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2016-01-25 05:54:26 +01:00
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
// load sequencer instruction memory (pointer-based)
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t addr_type = reader->ReadAndSwap<uint32_t>();
|
2020-07-11 14:54:22 +02:00
|
|
|
auto shader_type = static_cast<xenos::ShaderType>(addr_type & 0x3);
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t addr = addr_type & ~0x3;
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t start_size = reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t start = start_size >> 16;
|
|
|
|
|
uint32_t size_dwords = start_size & 0xFFFF; // dwords
|
|
|
|
|
assert_true(start == 0);
|
|
|
|
|
trace_writer_.WriteMemoryRead(CpuToGpu(addr), size_dwords * 4);
|
2015-11-08 23:05:37 +01:00
|
|
|
auto shader =
|
|
|
|
|
LoadShader(shader_type, addr, memory_->TranslatePhysical<uint32_t*>(addr),
|
|
|
|
|
size_dwords);
|
|
|
|
|
switch (shader_type) {
|
2020-07-11 14:54:22 +02:00
|
|
|
case xenos::ShaderType::kVertex:
|
2015-11-08 23:05:37 +01:00
|
|
|
active_vertex_shader_ = shader;
|
|
|
|
|
break;
|
2020-07-11 14:54:22 +02:00
|
|
|
case xenos::ShaderType::kPixel:
|
2015-11-08 23:05:37 +01:00
|
|
|
active_pixel_shader_ = shader;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
assert_unhandled_case(shader_type);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2015-11-08 20:54:36 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
// load sequencer instruction memory (code embedded in packet)
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t dword0 = reader->ReadAndSwap<uint32_t>();
|
|
|
|
|
uint32_t dword1 = reader->ReadAndSwap<uint32_t>();
|
2020-07-11 14:54:22 +02:00
|
|
|
auto shader_type = static_cast<xenos::ShaderType>(dword0);
|
2015-11-08 20:54:36 +01:00
|
|
|
uint32_t start_size = dword1;
|
|
|
|
|
uint32_t start = start_size >> 16;
|
|
|
|
|
uint32_t size_dwords = start_size & 0xFFFF; // dwords
|
|
|
|
|
assert_true(start == 0);
|
2016-01-25 05:54:26 +01:00
|
|
|
assert_true(reader->read_count() >= size_dwords * 4);
|
|
|
|
|
assert_true(count - 2 >= size_dwords);
|
|
|
|
|
auto shader =
|
|
|
|
|
LoadShader(shader_type, uint32_t(reader->read_ptr()),
|
|
|
|
|
reinterpret_cast<uint32_t*>(reader->read_ptr()), size_dwords);
|
2015-11-08 23:05:37 +01:00
|
|
|
switch (shader_type) {
|
2020-07-11 14:54:22 +02:00
|
|
|
case xenos::ShaderType::kVertex:
|
2015-11-08 23:05:37 +01:00
|
|
|
active_vertex_shader_ = shader;
|
|
|
|
|
break;
|
2020-07-11 14:54:22 +02:00
|
|
|
case xenos::ShaderType::kPixel:
|
2015-11-08 23:05:37 +01:00
|
|
|
active_pixel_shader_ = shader;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
assert_unhandled_case(shader_type);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2016-01-25 05:54:26 +01:00
|
|
|
reader->AdvanceRead(size_dwords * sizeof(uint32_t));
|
2015-11-08 20:54:36 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-25 05:54:26 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_INVALIDATE_STATE(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
2015-11-08 20:54:36 +01:00
|
|
|
// selective invalidation of state pointers
|
2018-02-18 19:18:20 +01:00
|
|
|
/*uint32_t mask =*/reader->ReadAndSwap<uint32_t>();
|
2015-11-08 20:54:36 +01:00
|
|
|
// driver_->InvalidateState(mask);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-17 01:36:43 +01:00
|
|
|
bool CommandProcessor::ExecutePacketType3_VIZ_QUERY(RingBuffer* reader,
|
|
|
|
|
uint32_t packet,
|
|
|
|
|
uint32_t count) {
|
|
|
|
|
// begin/end initiator for viz query extent processing
|
2018-11-22 16:20:09 +01:00
|
|
|
// https://www.google.com/patents/US20050195186
|
2016-02-17 01:36:43 +01:00
|
|
|
assert_true(count == 1);
|
|
|
|
|
|
2018-02-18 19:18:20 +01:00
|
|
|
uint32_t dword0 = reader->ReadAndSwap<uint32_t>();
|
2016-02-17 01:36:43 +01:00
|
|
|
|
2018-02-17 05:03:33 +01:00
|
|
|
uint32_t id = dword0 & 0x3F;
|
2020-11-20 02:20:28 +01:00
|
|
|
uint32_t end = dword0 & 0x100;
|
2018-02-17 05:03:33 +01:00
|
|
|
if (!end) {
|
|
|
|
|
// begin a new viz query @ id
|
2020-11-20 02:20:28 +01:00
|
|
|
// On hardware this clears the internal state of the scan converter (which
|
|
|
|
|
// is different to the register)
|
2018-02-17 05:03:33 +01:00
|
|
|
WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, VIZQUERY_START);
|
2020-02-28 21:30:48 +01:00
|
|
|
XELOGGPU("Begin viz query ID {:02X}", id);
|
2018-02-17 05:03:33 +01:00
|
|
|
} else {
|
|
|
|
|
// end the viz query
|
|
|
|
|
WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, VIZQUERY_END);
|
2020-02-28 21:30:48 +01:00
|
|
|
XELOGGPU("End viz query ID {:02X}", id);
|
2020-11-20 02:20:28 +01:00
|
|
|
// The scan converter writes the internal result back to the register here.
|
|
|
|
|
// We just fake it and say it was visible in case it is read back.
|
|
|
|
|
if (id < 32) {
|
|
|
|
|
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0].u32 |=
|
|
|
|
|
uint32_t(1) << id;
|
|
|
|
|
} else {
|
|
|
|
|
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1].u32 |=
|
|
|
|
|
uint32_t(1) << (id - 32);
|
|
|
|
|
}
|
2018-02-17 05:03:33 +01:00
|
|
|
}
|
|
|
|
|
|
2016-02-17 01:36:43 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-08 20:54:36 +01:00
|
|
|
} // namespace gpu
|
|
|
|
|
} // namespace xe
|