diff --git a/.gdbinit b/.gdbinit new file mode 100644 index 000000000..09b4af30f --- /dev/null +++ b/.gdbinit @@ -0,0 +1,10 @@ +# Ignore HighResolutionTimer custom event +handle SIG34 nostop noprint +# Ignore PosixTimer custom event +handle SIG35 nostop noprint +# Ignore PosixThread exit event +handle SIG32 nostop noprint +# Ignore PosixThread suspend event +handle SIG36 nostop noprint +# Ignore PosixThread user callback event +handle SIG37 nostop noprint diff --git a/README.md b/README.md index 68c34c916..656c716e0 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ that there are some major work areas still untouched: * Help work through [missing functionality/bugs in games](https://github.com/xenia-project/xenia/labels/compat) * Add input drivers for [DualShock4 (PS4) controllers](https://github.com/xenia-project/xenia/issues/60) (or anything else) -* Skilled with Linux? A strong contributor is needed to [help with porting](https://github.com/xenia-project/xenia/labels/cross%20platform) +* Skilled with Linux? A strong contributor is needed to [help with porting](https://github.com/xenia-project/xenia/labels/platform-linux) See more projects [good for contributors](https://github.com/xenia-project/xenia/labels/good%20first%20issue). It's a good idea to ask on Discord and check the issues page before beginning work on something. diff --git a/src/xenia/app/discord/discord_presence.cc b/src/xenia/app/discord/discord_presence.cc index c51e27851..bee8c78bf 100644 --- a/src/xenia/app/discord/discord_presence.cc +++ b/src/xenia/app/discord/discord_presence.cc @@ -40,9 +40,10 @@ void DiscordPresence::NotPlaying() { } void DiscordPresence::PlayingTitle(const std::string_view game_title) { + auto details = std::string(game_title); DiscordRichPresence discordPresence = {}; discordPresence.state = "In Game"; - discordPresence.details = std::string(game_title).c_str(); + discordPresence.details = details.c_str(); // TODO(gibbed): we don't have state icons yet. // discordPresence.smallImageKey = "app"; // discordPresence.largeImageKey = "state_ingame"; diff --git a/src/xenia/app/emulator_window.cc b/src/xenia/app/emulator_window.cc index 742b6473a..fdc0751bc 100644 --- a/src/xenia/app/emulator_window.cc +++ b/src/xenia/app/emulator_window.cc @@ -65,8 +65,8 @@ std::unique_ptr EmulatorWindow::Create(Emulator* emulator) { std::unique_ptr emulator_window(new EmulatorWindow(emulator)); emulator_window->loop()->PostSynchronous([&emulator_window]() { - xe::threading::set_name("Win32 Loop"); - xe::Profiler::ThreadEnter("Win32 Loop"); + xe::threading::set_name("Windowing Loop"); + xe::Profiler::ThreadEnter("Windowing Loop"); if (!emulator_window->Initialize()) { xe::FatalError("Failed to initialize main window"); diff --git a/src/xenia/apu/xma_context.cc b/src/xenia/apu/xma_context.cc index 16d6e66a8..e5cdb2561 100644 --- a/src/xenia/apu/xma_context.cc +++ b/src/xenia/apu/xma_context.cc @@ -302,6 +302,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) { // No available data. if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) { + data->output_buffer_valid = 0; return; } diff --git a/src/xenia/apu/xma_decoder.cc b/src/xenia/apu/xma_decoder.cc index dd7d30817..ee1c9aa45 100644 --- a/src/xenia/apu/xma_decoder.cc +++ b/src/xenia/apu/xma_decoder.cc @@ -144,7 +144,7 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { WorkerThreadMain(); return 0; })); - worker_thread_->set_name("XMA Decoder Worker"); + worker_thread_->set_name("XMA Decoder"); worker_thread_->set_can_debugger_suspend(true); worker_thread_->Create(); diff --git a/src/xenia/base/logging.cc b/src/xenia/base/logging.cc index aa688c87e..0b195e85a 100644 --- a/src/xenia/base/logging.cc +++ b/src/xenia/base/logging.cc @@ -36,10 +36,8 @@ #include "third_party/fmt/include/fmt/format.h" -DEFINE_path( - log_file, "", - "Logs are written to the given file (specify stdout for command line)", - "Logging"); +DEFINE_path(log_file, "", "Logs are written to the given file", "Logging"); +DEFINE_bool(log_to_stdout, true, "Write log output to stdout", "Logging"); DEFINE_bool(log_to_debugprint, false, "Dump the log to DebugPrint.", "Logging"); DEFINE_bool(flush_log, true, "Flush log file after each log line batch.", "Logging"); @@ -66,41 +64,39 @@ struct LogLine { thread_local char thread_log_buffer_[64 * 1024]; +void FileLogSink::Write(const char* buf, size_t size) { + if (file_) { + fwrite(buf, 1, size, file_); + } +} + +void FileLogSink::Flush() { + if (file_) { + fflush(file_); + } +} + class Logger { public: explicit Logger(const std::string_view app_name) - : file_(nullptr), - running_(true), - wait_strategy_(), + : wait_strategy_(), claim_strategy_(kBlockCount, wait_strategy_), - consumed_(wait_strategy_) { + consumed_(wait_strategy_), + running_(true) { claim_strategy_.add_claim_barrier(consumed_); - if (cvars::log_file.empty()) { - // Default to app name. - auto file_name = fmt::format("{}.log", app_name); - auto file_path = std::filesystem::path(file_name); - xe::filesystem::CreateParentFolder(file_path); - file_ = xe::filesystem::OpenFile(file_path, "wt"); - } else { - if (cvars::log_file == "stdout") { - file_ = stdout; - } else { - xe::filesystem::CreateParentFolder(cvars::log_file); - file_ = xe::filesystem::OpenFile(cvars::log_file, "wt"); - } - } - write_thread_ = xe::threading::Thread::Create({}, [this]() { WriteThread(); }); - write_thread_->set_name("xe::FileLogSink Writer"); + write_thread_->set_name("Logging Writer"); } ~Logger() { running_ = false; xe::threading::Wait(write_thread_.get(), true); - fflush(file_); - fclose(file_); + } + + void AddLogSink(std::unique_ptr&& sink) { + sinks_.push_back(std::move(sink)); } private: @@ -126,14 +122,14 @@ class Logger { dp::multi_threaded_claim_strategy claim_strategy_; dp::sequence_barrier consumed_; - FILE* file_; + std::vector> sinks_; std::atomic running_; std::unique_ptr write_thread_; void Write(const char* buf, size_t size) { - if (file_) { - fwrite(buf, 1, size, file_); + for (const auto& sink : sinks_) { + sink->Write(buf, size); } if (cvars::log_to_debugprint) { debugging::DebugPrint("{}", std::string_view(buf, size)); @@ -246,7 +242,9 @@ class Logger { desired_count = 1; if (cvars::flush_log) { - fflush(file_); + for (const auto& sink : sinks_) { + sink->Flush(); + } } idle_loops = 0; @@ -291,6 +289,27 @@ class Logger { void InitializeLogging(const std::string_view app_name) { auto mem = memory::AlignedAlloc(0x10); logger_ = new (mem) Logger(app_name); + + FILE* log_file = nullptr; + + if (cvars::log_file.empty()) { + // Default to app name. + auto file_name = fmt::format("{}.log", app_name); + auto file_path = std::filesystem::path(file_name); + xe::filesystem::CreateParentFolder(file_path); + + log_file = xe::filesystem::OpenFile(file_path, "wt"); + } else { + xe::filesystem::CreateParentFolder(cvars::log_file); + log_file = xe::filesystem::OpenFile(cvars::log_file, "wt"); + } + auto sink = std::make_unique(log_file); + logger_->AddLogSink(std::move(sink)); + + if (cvars::log_to_stdout) { + auto stdout_sink = std::make_unique(stdout); + logger_->AddLogSink(std::move(stdout_sink)); + } } void ShutdownLogging() { diff --git a/src/xenia/base/logging.h b/src/xenia/base/logging.h index 864d5d620..d2df15cce 100644 --- a/src/xenia/base/logging.h +++ b/src/xenia/base/logging.h @@ -34,6 +34,31 @@ enum class LogLevel { Trace, }; +class LogSink { + public: + virtual ~LogSink() = default; + + virtual void Write(const char* buf, size_t size) = 0; + virtual void Flush() = 0; +}; + +class FileLogSink final : public LogSink { + public: + explicit FileLogSink(FILE* file) : file_(file) {} + virtual ~FileLogSink() { + if (file_) { + fflush(file_); + fclose(file_); + } + } + + void Write(const char* buf, size_t size) override; + void Flush() override; + + private: + FILE* file_; +}; + // Initializes the logging system and any outputs requested. // Must be called on startup. void InitializeLogging(const std::string_view app_name); diff --git a/src/xenia/base/main_win.cc b/src/xenia/base/main_win.cc index 927b3ae5d..d61fe607b 100644 --- a/src/xenia/base/main_win.cc +++ b/src/xenia/base/main_win.cc @@ -29,6 +29,8 @@ DEFINE_bool(win32_high_freq, true, "Requests high performance from the NT kernel", "Kernel"); +DEFINE_bool(enable_console, false, "Open a console window with the main window", + "General"); namespace xe { @@ -37,27 +39,23 @@ bool has_console_attached_ = true; bool has_console_attached() { return has_console_attached_; } void AttachConsole() { - bool has_console = ::AttachConsole(ATTACH_PARENT_PROCESS) == TRUE; - if (!has_console) { - // We weren't launched from a console, so just return. - // We could alloc our own console, but meh: - // has_console = AllocConsole() == TRUE; - has_console_attached_ = false; + if (!cvars::enable_console) { return; } + + AllocConsole(); + has_console_attached_ = true; auto std_handle = (intptr_t)GetStdHandle(STD_OUTPUT_HANDLE); auto con_handle = _open_osfhandle(std_handle, _O_TEXT); auto fp = _fdopen(con_handle, "w"); - *stdout = *fp; - setvbuf(stdout, nullptr, _IONBF, 0); + freopen_s(&fp, "CONOUT$", "w", stdout); std_handle = (intptr_t)GetStdHandle(STD_ERROR_HANDLE); con_handle = _open_osfhandle(std_handle, _O_TEXT); fp = _fdopen(con_handle, "w"); - *stderr = *fp; - setvbuf(stderr, nullptr, _IONBF, 0); + freopen_s(&fp, "CONOUT$", "w", stderr); } static void RequestHighPerformance() { @@ -125,6 +123,10 @@ int Main() { return 1; } + // Attach a console so we can write output to stdout. If the user hasn't + // redirected output themselves it'll pop up a window. + xe::AttachConsole(); + // Setup COM on the main thread. // NOTE: this may fail if COM has already been initialized - that's OK. #pragma warning(suppress : 6031) @@ -163,10 +165,6 @@ int main(int argc_ignored, char** argv_ignored) { return xe::Main(); } // Used in windowed apps; automatically picked based on subsystem. int WINAPI wWinMain(HINSTANCE, HINSTANCE, LPWSTR command_line, int) { - // Attach a console so we can write output to stdout. If the user hasn't - // redirected output themselves it'll pop up a window. - xe::AttachConsole(); - // Run normal entry point. return xe::Main(); } diff --git a/src/xenia/base/string_util.h b/src/xenia/base/string_util.h index f1499bb5f..adb2012af 100644 --- a/src/xenia/base/string_util.h +++ b/src/xenia/base/string_util.h @@ -10,11 +10,15 @@ #ifndef XENIA_BASE_STRING_UTIL_H_ #define XENIA_BASE_STRING_UTIL_H_ +#include #include +#include +#include #include #include "third_party/fmt/include/fmt/format.h" #include "xenia/base/assert.h" +#include "xenia/base/memory.h" #include "xenia/base/platform.h" #include "xenia/base/string.h" #include "xenia/base/vec128.h" @@ -30,6 +34,40 @@ namespace xe { namespace string_util { +inline size_t copy_truncating(char* dest, const std::string_view source, + size_t dest_buffer_count) { + if (!dest_buffer_count) { + return 0; + } + size_t chars_copied = std::min(source.size(), dest_buffer_count - size_t(1)); + std::memcpy(dest, source.data(), chars_copied); + dest[chars_copied] = '\0'; + return chars_copied; +} + +inline size_t copy_truncating(char16_t* dest, const std::u16string_view source, + size_t dest_buffer_count) { + if (!dest_buffer_count) { + return 0; + } + size_t chars_copied = std::min(source.size(), dest_buffer_count - size_t(1)); + std::memcpy(dest, source.data(), chars_copied * sizeof(char16_t)); + dest[chars_copied] = u'\0'; + return chars_copied; +} + +inline size_t copy_and_swap_truncating(char16_t* dest, + const std::u16string_view source, + size_t dest_buffer_count) { + if (!dest_buffer_count) { + return 0; + } + size_t chars_copied = std::min(source.size(), dest_buffer_count - size_t(1)); + xe::copy_and_swap(dest, source.data(), chars_copied); + dest[chars_copied] = u'\0'; + return chars_copied; +} + inline std::string to_hex_string(uint32_t value) { return fmt::format("{:08X}", value); } diff --git a/src/xenia/base/system_win.cc b/src/xenia/base/system_win.cc index abb699844..0b6198445 100644 --- a/src/xenia/base/system_win.cc +++ b/src/xenia/base/system_win.cc @@ -15,7 +15,7 @@ namespace xe { void LaunchWebBrowser(const std::string& url) { auto temp = xe::to_utf16(url); - ShellExecuteW(nullptr, L"open", reinterpret_cast(url.c_str()), + ShellExecuteW(nullptr, L"open", reinterpret_cast(temp.c_str()), nullptr, nullptr, SW_SHOWNORMAL); } diff --git a/src/xenia/base/testing/threading_test.cc b/src/xenia/base/testing/threading_test.cc new file mode 100644 index 000000000..f8fae6339 --- /dev/null +++ b/src/xenia/base/testing/threading_test.cc @@ -0,0 +1,967 @@ +/** +****************************************************************************** +* Xenia : Xbox 360 Emulator Research Project * +****************************************************************************** +* Copyright 2018 Ben Vanik. All rights reserved. * +* Released under the BSD license - see LICENSE in the root for more details. * +****************************************************************************** +*/ + +#include + +#include "xenia/base/threading.h" + +#include "third_party/catch/include/catch.hpp" + +namespace xe { +namespace base { +namespace test { +using namespace threading; +using namespace std::chrono_literals; + +TEST_CASE("Fence") { + std::unique_ptr pFence; + std::unique_ptr pTimer; + + // Signal without wait + pFence = std::make_unique(); + pFence->Signal(); + + // Signal once and wait + pFence = std::make_unique(); + pFence->Signal(); + pFence->Wait(); + + // Signal twice and wait + pFence = std::make_unique(); + pFence->Signal(); + pFence->Signal(); + pFence->Wait(); + + // Signal and wait two times + pFence = std::make_unique(); + pFence->Signal(); + pFence->Wait(); + pFence->Signal(); + pFence->Wait(); + + // Test to synchronize multiple threads + std::atomic started(0); + std::atomic finished(0); + pFence = std::make_unique(); + auto func = [&pFence, &started, &finished] { + started.fetch_add(1); + pFence->Wait(); + finished.fetch_add(1); + }; + + auto threads = std::array({ + std::thread(func), + std::thread(func), + std::thread(func), + std::thread(func), + std::thread(func), + }); + + Sleep(100ms); + REQUIRE(started.load() == threads.size()); + REQUIRE(finished.load() == 0); + + pFence->Signal(); + + for (auto& t : threads) t.join(); + REQUIRE(finished.load() == threads.size()); +} // namespace test + +TEST_CASE("Get number of logical processors") { + auto count = std::thread::hardware_concurrency(); + REQUIRE(logical_processor_count() == count); + REQUIRE(logical_processor_count() == count); + REQUIRE(logical_processor_count() == count); +} + +TEST_CASE("Enable process to set thread affinity") { + EnableAffinityConfiguration(); +} + +TEST_CASE("Yield Current Thread", "MaybeYield") { + // Run to see if there are any errors + MaybeYield(); +} + +TEST_CASE("Sync with Memory Barrier", "SyncMemory") { + // Run to see if there are any errors + SyncMemory(); +} + +TEST_CASE("Sleep Current Thread", "Sleep") { + auto wait_time = 50ms; + auto start = std::chrono::steady_clock::now(); + Sleep(wait_time); + auto duration = std::chrono::steady_clock::now() - start; + REQUIRE(duration >= wait_time); +} + +TEST_CASE("Sleep Current Thread in Alertable State", "Sleep") { + auto wait_time = 50ms; + auto start = std::chrono::steady_clock::now(); + auto result = threading::AlertableSleep(wait_time); + auto duration = std::chrono::steady_clock::now() - start; + REQUIRE(duration >= wait_time); + REQUIRE(result == threading::SleepResult::kSuccess); + + // TODO(bwrsandman): Test a Thread to return kAlerted. + // Need callback to call extended I/O function (ReadFileEx or WriteFileEx) +} + +TEST_CASE("TlsHandle") { + // Test Allocate + auto handle = threading::AllocateTlsHandle(); + + // Test Free + REQUIRE(threading::FreeTlsHandle(handle)); + REQUIRE(!threading::FreeTlsHandle(handle)); + REQUIRE(!threading::FreeTlsHandle(threading::kInvalidTlsHandle)); + + // Test setting values + handle = threading::AllocateTlsHandle(); + REQUIRE(threading::GetTlsValue(handle) == 0); + uint32_t value = 0xDEADBEEF; + threading::SetTlsValue(handle, reinterpret_cast(&value)); + auto p_received_value = threading::GetTlsValue(handle); + REQUIRE(threading::GetTlsValue(handle) != 0); + auto received_value = *reinterpret_cast(p_received_value); + REQUIRE(received_value == value); + + uintptr_t non_thread_local_value = 0; + auto thread = Thread::Create({}, [&non_thread_local_value, &handle] { + non_thread_local_value = threading::GetTlsValue(handle); + }); + + auto result = Wait(thread.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); + REQUIRE(non_thread_local_value == 0); + + // Cleanup + REQUIRE(threading::FreeTlsHandle(handle)); +} + +TEST_CASE("HighResolutionTimer") { + // The wait time is 500ms with an interval of 50ms + // Smaller values are not as precise and fail the test + const auto wait_time = 500ms; + + // Time the actual sleep duration + { + const auto interval = 50ms; + std::atomic counter; + auto start = std::chrono::steady_clock::now(); + auto cb = [&counter] { ++counter; }; + auto pTimer = HighResolutionTimer::CreateRepeating(interval, cb); + Sleep(wait_time); + pTimer.reset(); + auto duration = std::chrono::steady_clock::now() - start; + + // Should have run as many times as wait_time / timer_interval plus or + // minus 1 due to imprecision of Sleep + REQUIRE(duration.count() >= wait_time.count()); + auto ratio = static_cast(duration / interval); + REQUIRE(counter >= ratio - 1); + REQUIRE(counter <= ratio + 1); + } + + // Test concurrent timers + { + const auto interval1 = 100ms; + const auto interval2 = 200ms; + std::atomic counter1(0); + std::atomic counter2(0); + auto start = std::chrono::steady_clock::now(); + auto cb1 = [&counter1] { ++counter1; }; + auto cb2 = [&counter2] { ++counter2; }; + auto pTimer1 = HighResolutionTimer::CreateRepeating(interval1, cb1); + auto pTimer2 = HighResolutionTimer::CreateRepeating(interval2, cb2); + Sleep(wait_time); + pTimer1.reset(); + pTimer2.reset(); + auto duration = std::chrono::steady_clock::now() - start; + + // Should have run as many times as wait_time / timer_interval plus or + // minus 1 due to imprecision of Sleep + REQUIRE(duration.count() >= wait_time.count()); + auto ratio1 = static_cast(duration / interval1); + auto ratio2 = static_cast(duration / interval2); + REQUIRE(counter1 >= ratio1 - 1); + REQUIRE(counter1 <= ratio1 + 1); + REQUIRE(counter2 >= ratio2 - 1); + REQUIRE(counter2 <= ratio2 + 1); + } + + // TODO(bwrsandman): Check on which thread callbacks are executed when + // spawned from differing threads +} + +TEST_CASE("Wait on Multiple Handles", "Wait") { + auto mutant = Mutant::Create(true); + auto semaphore = Semaphore::Create(10, 10); + auto event_ = Event::CreateManualResetEvent(false); + auto thread = Thread::Create({}, [&mutant, &semaphore, &event_] { + event_->Set(); + Wait(mutant.get(), false, 25ms); + semaphore->Release(1, nullptr); + Wait(mutant.get(), false, 25ms); + mutant->Release(); + }); + + std::vector handles = { + mutant.get(), + semaphore.get(), + event_.get(), + thread.get(), + }; + + auto any_result = WaitAny(handles, false, 100ms); + REQUIRE(any_result.first == WaitResult::kSuccess); + REQUIRE(any_result.second == 0); + + auto all_result = WaitAll(handles, false, 100ms); + REQUIRE(all_result == WaitResult::kSuccess); +} + +TEST_CASE("Signal and Wait") { + WaitResult result; + auto mutant = Mutant::Create(true); + auto event_ = Event::CreateAutoResetEvent(false); + auto thread = Thread::Create({}, [&mutant, &event_] { + Wait(mutant.get(), false); + event_->Set(); + }); + result = Wait(event_.get(), false, 50ms); + REQUIRE(result == WaitResult::kTimeout); + result = SignalAndWait(mutant.get(), event_.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); + result = Wait(thread.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); +} + +TEST_CASE("Wait on Event", "Event") { + auto evt = Event::CreateAutoResetEvent(false); + WaitResult result; + + // Call wait on unset Event + result = Wait(evt.get(), false, 50ms); + REQUIRE(result == WaitResult::kTimeout); + + // Call wait on set Event + evt->Set(); + result = Wait(evt.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); + + // Call wait on now consumed Event + result = Wait(evt.get(), false, 50ms); + REQUIRE(result == WaitResult::kTimeout); +} + +TEST_CASE("Reset Event", "Event") { + auto evt = Event::CreateAutoResetEvent(false); + WaitResult result; + + // Call wait on reset Event + evt->Set(); + evt->Reset(); + result = Wait(evt.get(), false, 50ms); + REQUIRE(result == WaitResult::kTimeout); + + // Test resetting the unset event + evt->Reset(); + result = Wait(evt.get(), false, 50ms); + REQUIRE(result == WaitResult::kTimeout); + + // Test setting the reset event + evt->Set(); + result = Wait(evt.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); +} + +TEST_CASE("Wait on Multiple Events", "Event") { + auto events = std::array, 4>{ + Event::CreateAutoResetEvent(false), + Event::CreateAutoResetEvent(false), + Event::CreateAutoResetEvent(false), + Event::CreateManualResetEvent(false), + }; + + std::array order = {0}; + std::atomic_uint index(0); + auto sign_in = [&order, &index](uint32_t id) { + auto i = index.fetch_add(1, std::memory_order::memory_order_relaxed); + order[i] = static_cast('0' + id); + }; + + auto threads = std::array{ + std::thread([&events, &sign_in] { + auto res = WaitAll({events[1].get(), events[3].get()}, false, 100ms); + if (res == WaitResult::kSuccess) { + sign_in(1); + } + }), + std::thread([&events, &sign_in] { + auto res = WaitAny({events[0].get(), events[2].get()}, false, 100ms); + if (res.first == WaitResult::kSuccess) { + sign_in(2); + } + }), + std::thread([&events, &sign_in] { + auto res = WaitAll({events[0].get(), events[2].get(), events[3].get()}, + false, 100ms); + if (res == WaitResult::kSuccess) { + sign_in(3); + } + }), + std::thread([&events, &sign_in] { + auto res = WaitAny({events[1].get(), events[3].get()}, false, 100ms); + if (res.first == WaitResult::kSuccess) { + sign_in(4); + } + }), + }; + + Sleep(10ms); + events[3]->Set(); // Signals thread id=4 and stays on for 1 and 3 + Sleep(10ms); + events[1]->Set(); // Signals thread id=1 + Sleep(10ms); + events[0]->Set(); // Signals thread id=2 + Sleep(10ms); + events[2]->Set(); // Partial signals thread id=3 + events[0]->Set(); // Signals thread id=3 + + for (auto& t : threads) { + t.join(); + } + + INFO(order.data()); + REQUIRE(order[0] == '4'); + // TODO(bwrsandman): Order is not always maintained on linux + // REQUIRE(order[1] == '1'); + // REQUIRE(order[2] == '2'); + // REQUIRE(order[3] == '3'); +} + +TEST_CASE("Wait on Semaphore", "Semaphore") { + WaitResult result; + std::unique_ptr sem; + int previous_count = 0; + + // Wait on semaphore with no room + sem = Semaphore::Create(0, 5); + result = Wait(sem.get(), false, 10ms); + REQUIRE(result == WaitResult::kTimeout); + + // Add room in semaphore + REQUIRE(sem->Release(2, &previous_count)); + REQUIRE(previous_count == 0); + REQUIRE(sem->Release(1, &previous_count)); + REQUIRE(previous_count == 2); + result = Wait(sem.get(), false, 10ms); + REQUIRE(result == WaitResult::kSuccess); + REQUIRE(sem->Release(1, &previous_count)); + REQUIRE(previous_count == 2); + + // Set semaphore over maximum_count + sem = Semaphore::Create(5, 5); + previous_count = -1; + REQUIRE_FALSE(sem->Release(1, &previous_count)); + REQUIRE(previous_count == -1); + REQUIRE_FALSE(sem->Release(10, &previous_count)); + REQUIRE(previous_count == -1); + sem = Semaphore::Create(0, 5); + REQUIRE_FALSE(sem->Release(10, &previous_count)); + REQUIRE(previous_count == -1); + REQUIRE_FALSE(sem->Release(10, &previous_count)); + REQUIRE(previous_count == -1); + + // Test invalid Release parameters + REQUIRE_FALSE(sem->Release(0, &previous_count)); + REQUIRE(previous_count == -1); + REQUIRE_FALSE(sem->Release(-1, &previous_count)); + REQUIRE(previous_count == -1); + + // Wait on fully available semaphore + sem = Semaphore::Create(5, 5); + result = Wait(sem.get(), false, 10ms); + REQUIRE(result == WaitResult::kSuccess); + result = Wait(sem.get(), false, 10ms); + REQUIRE(result == WaitResult::kSuccess); + result = Wait(sem.get(), false, 10ms); + REQUIRE(result == WaitResult::kSuccess); + result = Wait(sem.get(), false, 10ms); + REQUIRE(result == WaitResult::kSuccess); + result = Wait(sem.get(), false, 10ms); + REQUIRE(result == WaitResult::kSuccess); + result = Wait(sem.get(), false, 10ms); + REQUIRE(result == WaitResult::kTimeout); + + // Semaphore between threads + sem = Semaphore::Create(5, 5); + Sleep(10ms); + // Occupy the semaphore with 5 threads + auto func = [&sem] { + auto res = Wait(sem.get(), false, 100ms); + Sleep(500ms); + if (res == WaitResult::kSuccess) { + sem->Release(1, nullptr); + } + }; + auto threads = std::array{ + std::thread(func), std::thread(func), std::thread(func), + std::thread(func), std::thread(func), + }; + // Give threads time to acquire semaphore + Sleep(10ms); + // Attempt to acquire full semaphore with current (6th) thread + result = Wait(sem.get(), false, 20ms); + REQUIRE(result == WaitResult::kTimeout); + // Give threads time to release semaphore + for (auto& t : threads) { + t.join(); + } + result = Wait(sem.get(), false, 10ms); + REQUIRE(result == WaitResult::kSuccess); + sem->Release(1, &previous_count); + REQUIRE(previous_count == 4); + + // Test invalid construction parameters + // These are invalid according to documentation + // TODO(bwrsandman): Many of these invalid invocations succeed + sem = Semaphore::Create(-1, 5); + // REQUIRE(sem.get() == nullptr); + sem = Semaphore::Create(10, 5); + // REQUIRE(sem.get() == nullptr); + sem = Semaphore::Create(0, 0); + // REQUIRE(sem.get() == nullptr); + sem = Semaphore::Create(0, -1); + // REQUIRE(sem.get() == nullptr); +} + +TEST_CASE("Wait on Multiple Semaphores", "Semaphore") { + WaitResult all_result; + std::pair any_result; + int previous_count; + std::unique_ptr sem0, sem1; + + // Test Wait all which should fail + sem0 = Semaphore::Create(0, 5); + sem1 = Semaphore::Create(5, 5); + all_result = WaitAll({sem0.get(), sem1.get()}, false, 10ms); + REQUIRE(all_result == WaitResult::kTimeout); + previous_count = -1; + REQUIRE(sem0->Release(1, &previous_count)); + REQUIRE(previous_count == 0); + previous_count = -1; + REQUIRE_FALSE(sem1->Release(1, &previous_count)); + REQUIRE(previous_count == -1); + + // Test Wait all again which should succeed + sem0 = Semaphore::Create(1, 5); + sem1 = Semaphore::Create(5, 5); + all_result = WaitAll({sem0.get(), sem1.get()}, false, 10ms); + REQUIRE(all_result == WaitResult::kSuccess); + previous_count = -1; + REQUIRE(sem0->Release(1, &previous_count)); + REQUIRE(previous_count == 0); + previous_count = -1; + REQUIRE(sem1->Release(1, &previous_count)); + REQUIRE(previous_count == 4); + + // Test Wait Any which should fail + sem0 = Semaphore::Create(0, 5); + sem1 = Semaphore::Create(0, 5); + any_result = WaitAny({sem0.get(), sem1.get()}, false, 10ms); + REQUIRE(any_result.first == WaitResult::kTimeout); + REQUIRE(any_result.second == 0); + previous_count = -1; + REQUIRE(sem0->Release(1, &previous_count)); + REQUIRE(previous_count == 0); + previous_count = -1; + REQUIRE(sem1->Release(1, &previous_count)); + REQUIRE(previous_count == 0); + + // Test Wait Any which should succeed + sem0 = Semaphore::Create(0, 5); + sem1 = Semaphore::Create(5, 5); + any_result = WaitAny({sem0.get(), sem1.get()}, false, 10ms); + REQUIRE(any_result.first == WaitResult::kSuccess); + REQUIRE(any_result.second == 1); + previous_count = -1; + REQUIRE(sem0->Release(1, &previous_count)); + REQUIRE(previous_count == 0); + previous_count = -1; + REQUIRE(sem1->Release(1, &previous_count)); + REQUIRE(previous_count == 4); +} + +TEST_CASE("Wait on Mutant", "Mutant") { + WaitResult result; + std::unique_ptr mut; + + // Release on initially owned mutant + mut = Mutant::Create(true); + REQUIRE(mut->Release()); + REQUIRE_FALSE(mut->Release()); + + // Release on initially not-owned mutant + mut = Mutant::Create(false); + REQUIRE_FALSE(mut->Release()); + + // Wait on initially owned mutant + mut = Mutant::Create(true); + result = Wait(mut.get(), false, 1ms); + REQUIRE(result == WaitResult::kSuccess); + REQUIRE(mut->Release()); + REQUIRE(mut->Release()); + REQUIRE_FALSE(mut->Release()); + + // Wait on initially not owned mutant + mut = Mutant::Create(false); + result = Wait(mut.get(), false, 1ms); + REQUIRE(result == WaitResult::kSuccess); + REQUIRE(mut->Release()); + REQUIRE_FALSE(mut->Release()); + + // Multiple waits (or locks) + mut = Mutant::Create(false); + for (int i = 0; i < 10; ++i) { + result = Wait(mut.get(), false, 1ms); + REQUIRE(result == WaitResult::kSuccess); + } + for (int i = 0; i < 10; ++i) { + REQUIRE(mut->Release()); + } + REQUIRE_FALSE(mut->Release()); + + // Test mutants on other threads + auto thread1 = std::thread([&mut] { + Sleep(5ms); + mut = Mutant::Create(true); + Sleep(100ms); + mut->Release(); + }); + Sleep(10ms); + REQUIRE_FALSE(mut->Release()); + Sleep(10ms); + result = Wait(mut.get(), false, 50ms); + REQUIRE(result == WaitResult::kTimeout); + thread1.join(); + result = Wait(mut.get(), false, 1ms); + REQUIRE(result == WaitResult::kSuccess); + REQUIRE(mut->Release()); +} + +TEST_CASE("Wait on Multiple Mutants", "Mutant") { + WaitResult all_result; + std::pair any_result; + std::unique_ptr mut0, mut1; + + // Test which should fail for WaitAll and WaitAny + auto thread0 = std::thread([&mut0, &mut1] { + mut0 = Mutant::Create(true); + mut1 = Mutant::Create(true); + Sleep(50ms); + mut0->Release(); + mut1->Release(); + }); + Sleep(10ms); + all_result = WaitAll({mut0.get(), mut1.get()}, false, 10ms); + REQUIRE(all_result == WaitResult::kTimeout); + REQUIRE_FALSE(mut0->Release()); + REQUIRE_FALSE(mut1->Release()); + any_result = WaitAny({mut0.get(), mut1.get()}, false, 10ms); + REQUIRE(any_result.first == WaitResult::kTimeout); + REQUIRE(any_result.second == 0); + REQUIRE_FALSE(mut0->Release()); + REQUIRE_FALSE(mut1->Release()); + thread0.join(); + + // Test which should fail for WaitAll but not WaitAny + auto thread1 = std::thread([&mut0, &mut1] { + mut0 = Mutant::Create(true); + mut1 = Mutant::Create(false); + Sleep(50ms); + mut0->Release(); + }); + Sleep(10ms); + all_result = WaitAll({mut0.get(), mut1.get()}, false, 10ms); + REQUIRE(all_result == WaitResult::kTimeout); + REQUIRE_FALSE(mut0->Release()); + REQUIRE_FALSE(mut1->Release()); + any_result = WaitAny({mut0.get(), mut1.get()}, false, 10ms); + REQUIRE(any_result.first == WaitResult::kSuccess); + REQUIRE(any_result.second == 1); + REQUIRE_FALSE(mut0->Release()); + REQUIRE(mut1->Release()); + thread1.join(); + + // Test which should pass for WaitAll and WaitAny + auto thread2 = std::thread([&mut0, &mut1] { + mut0 = Mutant::Create(false); + mut1 = Mutant::Create(false); + Sleep(50ms); + }); + Sleep(10ms); + all_result = WaitAll({mut0.get(), mut1.get()}, false, 10ms); + REQUIRE(all_result == WaitResult::kSuccess); + REQUIRE(mut0->Release()); + REQUIRE(mut1->Release()); + any_result = WaitAny({mut0.get(), mut1.get()}, false, 10ms); + REQUIRE(any_result.first == WaitResult::kSuccess); + REQUIRE(any_result.second == 0); + REQUIRE(mut0->Release()); + REQUIRE_FALSE(mut1->Release()); + thread2.join(); +} + +TEST_CASE("Wait on Timer", "Timer") { + WaitResult result; + std::unique_ptr timer; + + // Test Manual Reset + timer = Timer::CreateManualResetTimer(); + result = Wait(timer.get(), false, 1ms); + REQUIRE(result == WaitResult::kTimeout); + REQUIRE(timer->SetOnce(1ms)); // Signals it + result = Wait(timer.get(), false, 2ms); + REQUIRE(result == WaitResult::kSuccess); + result = Wait(timer.get(), false, 1ms); + REQUIRE(result == WaitResult::kSuccess); // Did not reset + + // Test Synchronization + timer = Timer::CreateSynchronizationTimer(); + result = Wait(timer.get(), false, 1ms); + REQUIRE(result == WaitResult::kTimeout); + REQUIRE(timer->SetOnce(1ms)); // Signals it + result = Wait(timer.get(), false, 2ms); + REQUIRE(result == WaitResult::kSuccess); + result = Wait(timer.get(), false, 1ms); + REQUIRE(result == WaitResult::kTimeout); // Did reset + + // TODO(bwrsandman): This test unexpectedly fails under windows + // Test long due time + // timer = Timer::CreateSynchronizationTimer(); + // REQUIRE(timer->SetOnce(10s)); + // result = Wait(timer.get(), false, 10ms); // Still signals under windows + // REQUIRE(result == WaitResult::kTimeout); + + // Test Repeating + REQUIRE(timer->SetRepeating(1ms, 10ms)); + for (int i = 0; i < 10; ++i) { + result = Wait(timer.get(), false, 20ms); + INFO(i); + REQUIRE(result == WaitResult::kSuccess); + } + MaybeYield(); + Sleep(10ms); // Skip a few events + for (int i = 0; i < 10; ++i) { + result = Wait(timer.get(), false, 20ms); + REQUIRE(result == WaitResult::kSuccess); + } + // Cancel it + timer->Cancel(); + result = Wait(timer.get(), false, 20ms); + REQUIRE(result == WaitResult::kTimeout); + MaybeYield(); + Sleep(10ms); // Skip a few events + result = Wait(timer.get(), false, 20ms); + REQUIRE(result == WaitResult::kTimeout); + // Cancel with SetOnce + REQUIRE(timer->SetRepeating(1ms, 10ms)); + for (int i = 0; i < 10; ++i) { + result = Wait(timer.get(), false, 20ms); + REQUIRE(result == WaitResult::kSuccess); + } + REQUIRE(timer->SetOnce(1ms)); + result = Wait(timer.get(), false, 20ms); + REQUIRE(result == WaitResult::kSuccess); // Signal from Set Once + result = Wait(timer.get(), false, 20ms); + REQUIRE(result == WaitResult::kTimeout); // No more signals from repeating +} + +TEST_CASE("Wait on Multiple Timers", "Timer") { + WaitResult all_result; + std::pair any_result; + + auto timer0 = Timer::CreateSynchronizationTimer(); + auto timer1 = Timer::CreateManualResetTimer(); + + // None signaled + all_result = WaitAll({timer0.get(), timer1.get()}, false, 1ms); + REQUIRE(all_result == WaitResult::kTimeout); + any_result = WaitAny({timer0.get(), timer1.get()}, false, 1ms); + REQUIRE(any_result.first == WaitResult::kTimeout); + REQUIRE(any_result.second == 0); + + // Some signaled + REQUIRE(timer1->SetOnce(1ms)); + all_result = WaitAll({timer0.get(), timer1.get()}, false, 100ms); + REQUIRE(all_result == WaitResult::kTimeout); + any_result = WaitAny({timer0.get(), timer1.get()}, false, 100ms); + REQUIRE(any_result.first == WaitResult::kSuccess); + REQUIRE(any_result.second == 1); + + // All signaled + REQUIRE(timer0->SetOnce(1ms)); + all_result = WaitAll({timer0.get(), timer1.get()}, false, 100ms); + REQUIRE(all_result == WaitResult::kSuccess); + REQUIRE(timer0->SetOnce(1ms)); + Sleep(1ms); + any_result = WaitAny({timer0.get(), timer1.get()}, false, 100ms); + REQUIRE(any_result.first == WaitResult::kSuccess); + REQUIRE(any_result.second == 0); + + // Check that timer0 reset + any_result = WaitAny({timer0.get(), timer1.get()}, false, 100ms); + REQUIRE(any_result.first == WaitResult::kSuccess); + REQUIRE(any_result.second == 1); +} + +TEST_CASE("Create and Trigger Timer Callbacks", "Timer") { + // TODO(bwrsandman): Check which thread performs callback and timing of + // callback + REQUIRE(true); +} + +TEST_CASE("Set and Test Current Thread ID", "Thread") { + // System ID + auto system_id = current_thread_system_id(); + REQUIRE(system_id > 0); + + // Thread ID + auto thread_id = current_thread_id(); + REQUIRE(thread_id == system_id); + + // Set a new thread id + const uint32_t new_thread_id = 0xDEADBEEF; + set_current_thread_id(new_thread_id); + REQUIRE(current_thread_id() == new_thread_id); + + // Set back original thread id of system + set_current_thread_id(std::numeric_limits::max()); + REQUIRE(current_thread_id() == system_id); + + // TODO(bwrsandman): Test on Thread object +} + +TEST_CASE("Set and Test Current Thread Name", "Thread") { + auto current_thread = Thread::GetCurrentThread(); + REQUIRE(current_thread); + auto old_thread_name = current_thread->name(); + + std::string new_thread_name = "Threading Test"; + REQUIRE_NOTHROW(set_name(new_thread_name)); + + // Restore the old catch.hpp thread name + REQUIRE_NOTHROW(set_name(old_thread_name)); +} + +TEST_CASE("Create and Run Thread", "Thread") { + std::unique_ptr thread; + WaitResult result; + Thread::CreationParameters params = {}; + auto func = [] { Sleep(20ms); }; + + // Create most basic case of thread + thread = Thread::Create(params, func); + REQUIRE(thread->native_handle() != nullptr); + REQUIRE_NOTHROW(thread->affinity_mask()); + REQUIRE(thread->name().empty()); + result = Wait(thread.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); + + // Add thread name + std::string new_name = "Test thread name"; + thread = Thread::Create(params, func); + auto name = thread->name(); + INFO(name.c_str()); + REQUIRE(name.empty()); + thread->set_name(new_name); + REQUIRE(thread->name() == new_name); + result = Wait(thread.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); + + // Use Terminate to end an infinitely looping thread + thread = Thread::Create(params, [] { + while (true) { + Sleep(1ms); + } + }); + result = Wait(thread.get(), false, 50ms); + REQUIRE(result == WaitResult::kTimeout); + thread->Terminate(-1); + result = Wait(thread.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); + + // Call Exit from inside an infinitely looping thread + thread = Thread::Create(params, [] { + while (true) { + Thread::Exit(-1); + } + }); + result = Wait(thread.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); + + // Call timeout wait on self + result = Wait(Thread::GetCurrentThread(), false, 50ms); + REQUIRE(result == WaitResult::kTimeout); + + params.stack_size = 16 * 1024; + thread = Thread::Create(params, [] { + while (true) { + Thread::Exit(-1); + } + }); + REQUIRE(thread != nullptr); + result = Wait(thread.get(), false, 50ms); + REQUIRE(result == WaitResult::kSuccess); + + // TODO(bwrsandman): Test with different priorities + // TODO(bwrsandman): Test setting and getting thread affinity +} + +TEST_CASE("Test Suspending Thread", "Thread") { + std::unique_ptr thread; + WaitResult result; + Thread::CreationParameters params = {}; + auto func = [] { Sleep(20ms); }; + + // Create initially suspended + params.create_suspended = true; + thread = threading::Thread::Create(params, func); + result = threading::Wait(thread.get(), false, 50ms); + REQUIRE(result == threading::WaitResult::kTimeout); + thread->Resume(); + result = threading::Wait(thread.get(), false, 50ms); + REQUIRE(result == threading::WaitResult::kSuccess); + params.create_suspended = false; + + // Create and then suspend + thread = threading::Thread::Create(params, func); + thread->Suspend(); + result = threading::Wait(thread.get(), false, 50ms); + REQUIRE(result == threading::WaitResult::kTimeout); + thread->Resume(); + result = threading::Wait(thread.get(), false, 50ms); + REQUIRE(result == threading::WaitResult::kSuccess); + + // Test recursive suspend + thread = threading::Thread::Create(params, func); + thread->Suspend(); + thread->Suspend(); + result = threading::Wait(thread.get(), false, 50ms); + REQUIRE(result == threading::WaitResult::kTimeout); + thread->Resume(); + result = threading::Wait(thread.get(), false, 50ms); + REQUIRE(result == threading::WaitResult::kTimeout); + thread->Resume(); + result = threading::Wait(thread.get(), false, 50ms); + REQUIRE(result == threading::WaitResult::kSuccess); + + // Test suspend count + uint32_t suspend_count = 0; + thread = threading::Thread::Create(params, func); + thread->Suspend(&suspend_count); + REQUIRE(suspend_count == 0); + thread->Suspend(&suspend_count); + REQUIRE(suspend_count == 1); + thread->Suspend(&suspend_count); + REQUIRE(suspend_count == 2); + thread->Resume(&suspend_count); + REQUIRE(suspend_count == 3); + thread->Resume(&suspend_count); + REQUIRE(suspend_count == 2); + thread->Resume(&suspend_count); + REQUIRE(suspend_count == 1); + thread->Suspend(&suspend_count); + REQUIRE(suspend_count == 0); + thread->Resume(&suspend_count); + REQUIRE(suspend_count == 1); + result = threading::Wait(thread.get(), false, 50ms); + REQUIRE(result == threading::WaitResult::kSuccess); +} + +TEST_CASE("Test Thread QueueUserCallback", "Thread") { + std::unique_ptr thread; + WaitResult result; + Thread::CreationParameters params = {}; + std::atomic_int order; + int is_modified; + int has_finished; + auto callback = [&is_modified, &order] { + is_modified = std::atomic_fetch_add_explicit( + &order, 1, std::memory_order::memory_order_relaxed); + }; + + // Without alertable + order = 0; + is_modified = -1; + has_finished = -1; + thread = Thread::Create(params, [&has_finished, &order] { + // Not using Alertable so callback is not registered + Sleep(90ms); + has_finished = std::atomic_fetch_add_explicit( + &order, 1, std::memory_order::memory_order_relaxed); + }); + result = Wait(thread.get(), true, 50ms); + REQUIRE(result == WaitResult::kTimeout); + REQUIRE(is_modified == -1); + thread->QueueUserCallback(callback); + result = Wait(thread.get(), true, 100ms); + REQUIRE(result == WaitResult::kSuccess); + REQUIRE(is_modified == -1); + REQUIRE(has_finished == 0); + + // With alertable + order = 0; + is_modified = -1; + has_finished = -1; + thread = Thread::Create(params, [&has_finished, &order] { + // Using Alertable so callback is registered + AlertableSleep(90ms); + has_finished = std::atomic_fetch_add_explicit( + &order, 1, std::memory_order::memory_order_relaxed); + }); + result = Wait(thread.get(), true, 50ms); + REQUIRE(result == WaitResult::kTimeout); + REQUIRE(is_modified == -1); + thread->QueueUserCallback(callback); + result = Wait(thread.get(), true, 100ms); + REQUIRE(result == WaitResult::kSuccess); + REQUIRE(is_modified == 0); + REQUIRE(has_finished == 1); + + // Test Exit command with QueueUserCallback + order = 0; + is_modified = -1; + has_finished = -1; + thread = Thread::Create(params, [&is_modified, &has_finished, &order] { + is_modified = std::atomic_fetch_add_explicit( + &order, 1, std::memory_order::memory_order_relaxed); + // Using Alertable so callback is registered + AlertableSleep(200ms); + has_finished = std::atomic_fetch_add_explicit( + &order, 1, std::memory_order::memory_order_relaxed); + }); + result = Wait(thread.get(), true, 100ms); + REQUIRE(result == WaitResult::kTimeout); + thread->QueueUserCallback([] { Thread::Exit(0); }); + result = Wait(thread.get(), true, 500ms); + REQUIRE(result == WaitResult::kSuccess); + REQUIRE(is_modified == 0); + REQUIRE(has_finished == -1); + + // TODO(bwrsandman): Test alertable wait returning kUserCallback by using IO + // callbacks. +} + +} // namespace test +} // namespace base +} // namespace xe diff --git a/src/xenia/base/threading.h b/src/xenia/base/threading.h index fef37dd06..776a158e0 100644 --- a/src/xenia/base/threading.h +++ b/src/xenia/base/threading.h @@ -24,29 +24,56 @@ #include #include +#include "xenia/base/assert.h" + namespace xe { namespace threading { +// This is more like an Event with self-reset when returning from Wait() class Fence { public: - Fence() : signaled_(false) {} + Fence() : signal_state_(0) {} + void Signal() { std::unique_lock lock(mutex_); - signaled_.store(true); + signal_state_ |= SIGMASK_; cond_.notify_all(); } + + // Wait for the Fence to be signaled. Clears the signal on return. void Wait() { std::unique_lock lock(mutex_); - while (!signaled_.load()) { + assert_true((signal_state_ & ~SIGMASK_) < (SIGMASK_ - 1) && + "Too many threads?"); + + // keep local copy to minimize loads + auto signal_state = ++signal_state_; + for (; !(signal_state & SIGMASK_); signal_state = signal_state_) { cond_.wait(lock); } - signaled_.store(false); + + // We can't just clear the signal as other threads may not have read it yet + assert_true((signal_state & ~SIGMASK_) > 0); // wait_count > 0 + if (signal_state == (1 | SIGMASK_)) { // wait_count == 1 + // Last one out turn off the lights + signal_state_ = 0; + } else { + // Oops, another thread is still waiting, set the new count and keep the + // signal. + signal_state_ = --signal_state; + } } private: + using state_t_ = uint_fast32_t; + static constexpr state_t_ SIGMASK_ = state_t_(1) + << (sizeof(state_t_) * 8 - 1); + std::mutex mutex_; std::condition_variable cond_; - std::atomic signaled_; + // Use the highest bit (sign bit) as the signal flag and the rest to count + // waiting threads. + volatile state_t_ signal_state_; }; // Returns the total number of logical processors in the host system. @@ -308,12 +335,12 @@ class Timer : public WaitHandle { std::chrono::milliseconds period, std::function opt_callback = nullptr) = 0; template - void SetRepeating(std::chrono::nanoseconds due_time, + bool SetRepeating(std::chrono::nanoseconds due_time, std::chrono::duration period, std::function opt_callback = nullptr) { - SetRepeating(due_time, - std::chrono::duration_cast(period), - std::move(opt_callback)); + return SetRepeating( + due_time, std::chrono::duration_cast(period), + std::move(opt_callback)); } // Stops the timer before it can be set to the signaled state and cancels @@ -391,7 +418,7 @@ class Thread : public WaitHandle { // Decrements a thread's suspend count. When the suspend count is decremented // to zero, the execution of the thread is resumed. - virtual bool Resume(uint32_t* out_new_suspend_count = nullptr) = 0; + virtual bool Resume(uint32_t* out_previous_suspend_count = nullptr) = 0; // Suspends the specified thread. virtual bool Suspend(uint32_t* out_previous_suspend_count = nullptr) = 0; diff --git a/src/xenia/base/threading_posix.cc b/src/xenia/base/threading_posix.cc index 28597e608..9e39b17a5 100644 --- a/src/xenia/base/threading_posix.cc +++ b/src/xenia/base/threading_posix.cc @@ -13,16 +13,64 @@ #include "xenia/base/logging.h" #include +#include #include #include #include #include -#include #include +#include +#include namespace xe { namespace threading { +template +inline timespec DurationToTimeSpec( + std::chrono::duration<_Rep, _Period> duration) { + auto nanoseconds = + std::chrono::duration_cast(duration); + auto div = ldiv(nanoseconds.count(), 1000000000L); + return timespec{div.quot, div.rem}; +} + +// Thread interruption is done using user-defined signals +// This implementation uses the SIGRTMAX - SIGRTMIN to signal to a thread +// gdb tip, for SIG = SIGRTMIN + SignalType : handle SIG nostop +// lldb tip, for SIG = SIGRTMIN + SignalType : process handle SIG -s false +enum class SignalType { + kHighResolutionTimer, + kTimer, + kThreadSuspend, + kThreadUserCallback, + k_Count +}; + +int GetSystemSignal(SignalType num) { + auto result = SIGRTMIN + static_cast(num); + assert_true(result < SIGRTMAX); + return result; +} + +SignalType GetSystemSignalType(int num) { + return static_cast(num - SIGRTMIN); +} + +thread_local std::array(SignalType::k_Count)> + signal_handler_installed = {}; + +static void signal_handler(int signal, siginfo_t* info, void* context); + +void install_signal_handler(SignalType type) { + if (signal_handler_installed[static_cast(type)]) return; + struct sigaction action {}; + action.sa_flags = SA_SIGINFO; + action.sa_sigaction = signal_handler; + sigemptyset(&action.sa_mask); + if (sigaction(GetSystemSignal(type), &action, nullptr) == -1) + signal_handler_installed[static_cast(type)] = true; +} + // TODO(dougvj) void EnableAffinityConfiguration() {} @@ -47,55 +95,81 @@ void MaybeYield() { void SyncMemory() { __sync_synchronize(); } void Sleep(std::chrono::microseconds duration) { - timespec rqtp = {time_t(duration.count() / 1000000), - time_t(duration.count() % 1000)}; - nanosleep(&rqtp, nullptr); - // TODO(benvanik): spin while rmtp >0? + timespec rqtp = DurationToTimeSpec(duration); + timespec rmtp = {}; + auto p_rqtp = &rqtp; + auto p_rmtp = &rmtp; + int ret = 0; + do { + ret = nanosleep(p_rqtp, p_rmtp); + // Swap requested for remaining in case of signal interruption + // in which case, we start sleeping again for the remainder + std::swap(p_rqtp, p_rmtp); + } while (ret == -1 && errno == EINTR); } -// TODO(dougvj) Not sure how to implement the equivalent of this on POSIX. +// TODO(bwrsandman) Implement by allowing alert interrupts from IO operations +thread_local bool alertable_state_ = false; SleepResult AlertableSleep(std::chrono::microseconds duration) { - sleep(duration.count() / 1000); + alertable_state_ = true; + Sleep(duration); + alertable_state_ = false; return SleepResult::kSuccess; } -// TODO(dougvj) We can probably wrap this with pthread_key_t but the type of -// TlsHandle probably needs to be refactored TlsHandle AllocateTlsHandle() { - assert_always(); - return 0; + auto key = static_cast(-1); + auto res = pthread_key_create(&key, nullptr); + assert_zero(res); + assert_true(key != static_cast(-1)); + return static_cast(key); } -bool FreeTlsHandle(TlsHandle handle) { return true; } +bool FreeTlsHandle(TlsHandle handle) { + return pthread_key_delete(static_cast(handle)) == 0; +} uintptr_t GetTlsValue(TlsHandle handle) { - assert_always(); - return 0; + return reinterpret_cast( + pthread_getspecific(static_cast(handle))); } bool SetTlsValue(TlsHandle handle, uintptr_t value) { - assert_always(); - return false; + return pthread_setspecific(static_cast(handle), + reinterpret_cast(value)) == 0; } -// TODO(dougvj) class PosixHighResolutionTimer : public HighResolutionTimer { public: - PosixHighResolutionTimer(std::function callback) - : callback_(callback) {} - ~PosixHighResolutionTimer() override {} + explicit PosixHighResolutionTimer(std::function callback) + : callback_(std::move(callback)), timer_(nullptr) {} + ~PosixHighResolutionTimer() override { + if (timer_) timer_delete(timer_); + } bool Initialize(std::chrono::milliseconds period) { - assert_always(); - return false; + // Create timer + sigevent sev{}; + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = GetSystemSignal(SignalType::kHighResolutionTimer); + sev.sigev_value.sival_ptr = (void*)&callback_; + if (timer_create(CLOCK_REALTIME, &sev, &timer_) == -1) return false; + + // Start timer + itimerspec its{}; + its.it_value = DurationToTimeSpec(period); + its.it_interval = its.it_value; + return timer_settime(timer_, 0, &its, nullptr) != -1; } private: std::function callback_; + timer_t timer_; }; std::unique_ptr HighResolutionTimer::CreateRepeating( std::chrono::milliseconds period, std::function callback) { + install_signal_handler(SignalType::kHighResolutionTimer); auto timer = std::make_unique(std::move(callback)); if (!timer->Initialize(period)) { return nullptr; @@ -103,209 +177,669 @@ std::unique_ptr HighResolutionTimer::CreateRepeating( return std::unique_ptr(timer.release()); } -// TODO(dougvj) There really is no native POSIX handle for a single wait/signal -// construct pthreads is at a lower level with more handles for such a mechanism -// This simple wrapper class could function as our handle, but probably needs -// some more functionality -class PosixCondition { +class PosixConditionBase { public: - PosixCondition() : signal_(false) { - pthread_mutex_init(&mutex_, NULL); - pthread_cond_init(&cond_, NULL); + virtual bool Signal() = 0; + + WaitResult Wait(std::chrono::milliseconds timeout) { + bool executed; + auto predicate = [this] { return this->signaled(); }; + auto lock = std::unique_lock(mutex_); + if (predicate()) { + executed = true; + } else { + if (timeout == std::chrono::milliseconds::max()) { + cond_.wait(lock, predicate); + executed = true; // Did not time out; + } else { + executed = cond_.wait_for(lock, timeout, predicate); + } + } + if (executed) { + post_execution(); + return WaitResult::kSuccess; + } else { + return WaitResult::kTimeout; + } } - ~PosixCondition() { - pthread_mutex_destroy(&mutex_); - pthread_cond_destroy(&cond_); + static std::pair WaitMultiple( + std::vector&& handles, bool wait_all, + std::chrono::milliseconds timeout) { + using iter_t = std::vector::const_iterator; + bool executed; + auto predicate = [](auto h) { return h->signaled(); }; + + // Construct a condition for all or any depending on wait_all + auto operation = wait_all ? std::all_of + : std::any_of; + auto aggregate = [&handles, operation, predicate] { + return operation(handles.cbegin(), handles.cend(), predicate); + }; + + // TODO(bwrsandman, Triang3l) This is controversial, see issue #1677 + // This will probably cause a deadlock on the next thread doing any waiting + // if the thread is suspended between locking and waiting + std::unique_lock lock(PosixConditionBase::mutex_); + + // Check if the aggregate lambda (all or any) is already satisfied + if (aggregate()) { + executed = true; + } else { + // If the aggregate is not yet satisfied and the timeout is infinite, + // wait without timeout. + if (timeout == std::chrono::milliseconds::max()) { + PosixConditionBase::cond_.wait(lock, aggregate); + executed = true; + } else { + // Wait with timeout. + executed = PosixConditionBase::cond_.wait_for(lock, timeout, aggregate); + } + } + if (executed) { + auto first_signaled = std::numeric_limits::max(); + for (auto i = 0u; i < handles.size(); ++i) { + if (handles[i]->signaled()) { + if (first_signaled > i) { + first_signaled = i; + } + handles[i]->post_execution(); + if (!wait_all) break; + } + } + return std::make_pair(WaitResult::kSuccess, first_signaled); + } else { + return std::make_pair(WaitResult::kTimeout, 0); + } } - void Signal() { - pthread_mutex_lock(&mutex_); + virtual void* native_handle() const { return cond_.native_handle(); } + + protected: + inline virtual bool signaled() const = 0; + inline virtual void post_execution() = 0; + static std::condition_variable cond_; + static std::mutex mutex_; +}; + +std::condition_variable PosixConditionBase::cond_; +std::mutex PosixConditionBase::mutex_; + +// There really is no native POSIX handle for a single wait/signal construct +// pthreads is at a lower level with more handles for such a mechanism. +// This simple wrapper class functions as our handle and uses conditional +// variables for waits and signals. +template +class PosixCondition {}; + +template <> +class PosixCondition : public PosixConditionBase { + public: + PosixCondition(bool manual_reset, bool initial_state) + : signal_(initial_state), manual_reset_(manual_reset) {} + virtual ~PosixCondition() = default; + + bool Signal() override { + auto lock = std::unique_lock(mutex_); signal_ = true; - pthread_cond_broadcast(&cond_); - pthread_mutex_unlock(&mutex_); + if (manual_reset_) { + cond_.notify_all(); + } else { + // FIXME(bwrsandman): Potential cause for deadlock + // See issue #1678 for possible fix and discussion + cond_.notify_one(); + } + return true; } void Reset() { - pthread_mutex_lock(&mutex_); + auto lock = std::unique_lock(mutex_); signal_ = false; - pthread_mutex_unlock(&mutex_); - } - - bool Wait(unsigned int timeout_ms) { - // Assume 0 means no timeout, not instant timeout - if (timeout_ms == 0) { - Wait(); - } - struct timespec time_to_wait; - struct timeval now; - gettimeofday(&now, NULL); - - // Add the number of seconds we want to wait to the current time - time_to_wait.tv_sec = now.tv_sec + (timeout_ms / 1000); - // Add the number of nanoseconds we want to wait to the current nanosecond - // stride - long nsec = (now.tv_usec + (timeout_ms % 1000)) * 1000; - // If we overflowed the nanosecond count then we add a second - time_to_wait.tv_sec += nsec / 1000000000UL; - // We only add nanoseconds within the 1 second stride - time_to_wait.tv_nsec = nsec % 1000000000UL; - pthread_mutex_lock(&mutex_); - while (!signal_) { - int status = pthread_cond_timedwait(&cond_, &mutex_, &time_to_wait); - if (status == ETIMEDOUT) return false; // We timed out - } - pthread_mutex_unlock(&mutex_); - return true; // We didn't time out - } - - bool Wait() { - pthread_mutex_lock(&mutex_); - while (!signal_) { - pthread_cond_wait(&cond_, &mutex_); - } - pthread_mutex_unlock(&mutex_); - return true; // Did not time out; } private: + inline bool signaled() const override { return signal_; } + inline void post_execution() override { + if (!manual_reset_) { + signal_ = false; + } + } bool signal_; - pthread_cond_t cond_; - pthread_mutex_t mutex_; + const bool manual_reset_; }; -// Native posix thread handle -template -class PosixThreadHandle : public T { +template <> +class PosixCondition : public PosixConditionBase { public: - explicit PosixThreadHandle(pthread_t handle) : handle_(handle) {} - ~PosixThreadHandle() override {} + PosixCondition(uint32_t initial_count, uint32_t maximum_count) + : count_(initial_count), maximum_count_(maximum_count) {} - protected: - void* native_handle() const override { - return reinterpret_cast(handle_); + bool Signal() override { return Release(1, nullptr); } + + bool Release(uint32_t release_count, int* out_previous_count) { + if (maximum_count_ - count_ >= release_count) { + auto lock = std::unique_lock(mutex_); + if (out_previous_count) *out_previous_count = count_; + count_ += release_count; + cond_.notify_all(); + return true; + } + return false; } - pthread_t handle_; + private: + inline bool signaled() const override { return count_ > 0; } + inline void post_execution() override { + count_--; + cond_.notify_all(); + } + uint32_t count_; + const uint32_t maximum_count_; }; -// This is wraps a condition object as our handle because posix has no single -// native handle for higher level concurrency constructs such as semaphores -template -class PosixConditionHandle : public T { +template <> +class PosixCondition : public PosixConditionBase { public: - ~PosixConditionHandle() override {} - - protected: - void* native_handle() const override { - return reinterpret_cast(const_cast(&handle_)); + explicit PosixCondition(bool initial_owner) : count_(0) { + if (initial_owner) { + count_ = 1; + owner_ = std::this_thread::get_id(); + } } - PosixCondition handle_; + bool Signal() override { return Release(); } + + bool Release() { + if (owner_ == std::this_thread::get_id() && count_ > 0) { + auto lock = std::unique_lock(mutex_); + --count_; + // Free to be acquired by another thread + if (count_ == 0) { + cond_.notify_one(); + } + return true; + } + return false; + } + + void* native_handle() const override { return mutex_.native_handle(); } + + private: + inline bool signaled() const override { + return count_ == 0 || owner_ == std::this_thread::get_id(); + } + inline void post_execution() override { + count_++; + owner_ = std::this_thread::get_id(); + } + uint32_t count_; + std::thread::id owner_; }; -template -class PosixFdHandle : public T { +template <> +class PosixCondition : public PosixConditionBase { public: - explicit PosixFdHandle(intptr_t handle) : handle_(handle) {} - ~PosixFdHandle() override { - close(handle_); - handle_ = 0; + explicit PosixCondition(bool manual_reset) + : callback_(), + timer_(nullptr), + signal_(false), + manual_reset_(manual_reset) {} + + virtual ~PosixCondition() { Cancel(); } + + bool Signal() override { + CompletionRoutine(); + return true; } - protected: - void* native_handle() const override { - return reinterpret_cast(handle_); - } + // TODO(bwrsandman): due_times of under 1ms deadlock under travis + bool Set(std::chrono::nanoseconds due_time, std::chrono::milliseconds period, + std::function opt_callback = nullptr) { + std::lock_guard lock(mutex_); - intptr_t handle_; -}; + callback_ = std::move(opt_callback); + signal_ = false; -// TODO(dougvj) -WaitResult Wait(WaitHandle* wait_handle, bool is_alertable, - std::chrono::milliseconds timeout) { - intptr_t handle = reinterpret_cast(wait_handle->native_handle()); - - fd_set set; - struct timeval time_val; - int ret; - - FD_ZERO(&set); - FD_SET(handle, &set); - - time_val.tv_sec = timeout.count() / 1000; - time_val.tv_usec = timeout.count() * 1000; - ret = select(handle + 1, &set, NULL, NULL, &time_val); - if (ret == -1) { - return WaitResult::kFailed; - } else if (ret == 0) { - return WaitResult::kTimeout; - } else { - uint64_t buf = 0; - ret = read(handle, &buf, sizeof(buf)); - if (ret < 8) { - return WaitResult::kTimeout; + // Create timer + if (timer_ == nullptr) { + sigevent sev{}; + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = GetSystemSignal(SignalType::kTimer); + sev.sigev_value.sival_ptr = this; + if (timer_create(CLOCK_REALTIME, &sev, &timer_) == -1) return false; } - return WaitResult::kSuccess; + // Start timer + itimerspec its{}; + its.it_value = DurationToTimeSpec(due_time); + its.it_interval = DurationToTimeSpec(period); + return timer_settime(timer_, 0, &its, nullptr) == 0; } + + void CompletionRoutine() { + // As the callback may reset the timer, store local. + std::function callback; + { + std::lock_guard lock(mutex_); + // Store callback + if (callback_) callback = callback_; + signal_ = true; + if (manual_reset_) { + cond_.notify_all(); + } else { + cond_.notify_one(); + } + } + // Call callback + if (callback) callback(); + } + + bool Cancel() { + std::lock_guard lock(mutex_); + bool result = true; + if (timer_) { + result = timer_delete(timer_) == 0; + timer_ = nullptr; + } + return result; + } + + void* native_handle() const override { + return reinterpret_cast(timer_); + } + + private: + inline bool signaled() const override { return signal_; } + inline void post_execution() override { + if (!manual_reset_) { + signal_ = false; + } + } + std::function callback_; + timer_t timer_; + volatile bool signal_; + const bool manual_reset_; +}; + +struct ThreadStartData { + std::function start_routine; + bool create_suspended; + Thread* thread_obj; +}; + +template <> +class PosixCondition : public PosixConditionBase { + enum class State { + kUninitialized, + kRunning, + kSuspended, + kFinished, + }; + + public: + PosixCondition() + : thread_(0), + signaled_(false), + exit_code_(0), + state_(State::kUninitialized), + suspend_count_(0) {} + bool Initialize(Thread::CreationParameters params, + ThreadStartData* start_data) { + start_data->create_suspended = params.create_suspended; + pthread_attr_t attr; + if (pthread_attr_init(&attr) != 0) return false; + if (pthread_attr_setstacksize(&attr, params.stack_size) != 0) { + pthread_attr_destroy(&attr); + return false; + } + if (params.initial_priority != 0) { + sched_param sched{}; + sched.sched_priority = params.initial_priority + 1; + if (pthread_attr_setschedpolicy(&attr, SCHED_FIFO) != 0) { + pthread_attr_destroy(&attr); + return false; + } + if (pthread_attr_setschedparam(&attr, &sched) != 0) { + pthread_attr_destroy(&attr); + return false; + } + } + if (pthread_create(&thread_, &attr, ThreadStartRoutine, start_data) != 0) { + return false; + } + pthread_attr_destroy(&attr); + return true; + } + + /// Constructor for existing thread. This should only happen once called by + /// Thread::GetCurrentThread() on the main thread + explicit PosixCondition(pthread_t thread) + : thread_(thread), + signaled_(false), + exit_code_(0), + state_(State::kRunning) {} + + virtual ~PosixCondition() { + if (thread_ && !signaled_) { + if (pthread_cancel(thread_) != 0) { + assert_always(); + } + if (pthread_join(thread_, nullptr) != 0) { + assert_always(); + } + } + } + + bool Signal() override { return true; } + + std::string name() const { + WaitStarted(); + auto result = std::array{'\0'}; + std::unique_lock lock(state_mutex_); + if (state_ != State::kUninitialized && state_ != State::kFinished) { + if (pthread_getname_np(thread_, result.data(), result.size() - 1) != 0) + assert_always(); + } + return std::string(result.data()); + } + + void set_name(const std::string& name) { + WaitStarted(); + std::unique_lock lock(state_mutex_); + if (state_ != State::kUninitialized && state_ != State::kFinished) { + threading::set_name(static_cast(thread_), + name); + } + } + + uint32_t system_id() const { return static_cast(thread_); } + + uint64_t affinity_mask() { + WaitStarted(); + cpu_set_t cpu_set; + if (pthread_getaffinity_np(thread_, sizeof(cpu_set_t), &cpu_set) != 0) + assert_always(); + uint64_t result = 0; + auto cpu_count = std::min(CPU_SETSIZE, 64); + for (auto i = 0u; i < cpu_count; i++) { + auto set = CPU_ISSET(i, &cpu_set); + result |= set << i; + } + return result; + } + + void set_affinity_mask(uint64_t mask) { + WaitStarted(); + cpu_set_t cpu_set; + CPU_ZERO(&cpu_set); + for (auto i = 0u; i < 64; i++) { + if (mask & (1 << i)) { + CPU_SET(i, &cpu_set); + } + } + if (pthread_setaffinity_np(thread_, sizeof(cpu_set_t), &cpu_set) != 0) { + assert_always(); + } + } + + int priority() { + WaitStarted(); + int policy; + sched_param param{}; + int ret = pthread_getschedparam(thread_, &policy, ¶m); + if (ret != 0) { + return -1; + } + + return param.sched_priority; + } + + void set_priority(int new_priority) { + WaitStarted(); + sched_param param{}; + param.sched_priority = new_priority; + if (pthread_setschedparam(thread_, SCHED_FIFO, ¶m) != 0) + assert_always(); + } + + void QueueUserCallback(std::function callback) { + WaitStarted(); + std::unique_lock lock(callback_mutex_); + user_callback_ = std::move(callback); + sigval value{}; + value.sival_ptr = this; + pthread_sigqueue(thread_, GetSystemSignal(SignalType::kThreadUserCallback), + value); + } + + void CallUserCallback() { + std::unique_lock lock(callback_mutex_); + user_callback_(); + } + + bool Resume(uint32_t* out_previous_suspend_count = nullptr) { + if (out_previous_suspend_count) { + *out_previous_suspend_count = 0; + } + WaitStarted(); + std::unique_lock lock(state_mutex_); + if (state_ != State::kSuspended) return false; + if (out_previous_suspend_count) { + *out_previous_suspend_count = suspend_count_; + } + --suspend_count_; + state_signal_.notify_all(); + return true; + } + + bool Suspend(uint32_t* out_previous_suspend_count = nullptr) { + if (out_previous_suspend_count) { + *out_previous_suspend_count = 0; + } + WaitStarted(); + { + if (out_previous_suspend_count) { + *out_previous_suspend_count = suspend_count_; + } + state_ = State::kSuspended; + ++suspend_count_; + } + int result = + pthread_kill(thread_, GetSystemSignal(SignalType::kThreadSuspend)); + return result == 0; + } + + void Terminate(int exit_code) { + { + std::unique_lock lock(state_mutex_); + state_ = State::kFinished; + } + + std::lock_guard lock(mutex_); + + // Sometimes the thread can call terminate twice before stopping + if (thread_ == 0) return; + auto thread = thread_; + + exit_code_ = exit_code; + signaled_ = true; + cond_.notify_all(); + + if (pthread_cancel(thread) != 0) assert_always(); + } + + void WaitStarted() const { + std::unique_lock lock(state_mutex_); + state_signal_.wait(lock, + [this] { return state_ != State::kUninitialized; }); + } + + /// Set state to suspended and wait until it reset by another thread + void WaitSuspended() { + std::unique_lock lock(state_mutex_); + state_signal_.wait(lock, [this] { return suspend_count_ == 0; }); + state_ = State::kRunning; + } + + void* native_handle() const override { + return reinterpret_cast(thread_); + } + + private: + static void* ThreadStartRoutine(void* parameter); + inline bool signaled() const override { return signaled_; } + inline void post_execution() override { + if (thread_) { + pthread_join(thread_, nullptr); + thread_ = 0; + } + } + pthread_t thread_; + bool signaled_; + int exit_code_; + volatile State state_; + volatile uint32_t suspend_count_; + mutable std::mutex state_mutex_; + mutable std::mutex callback_mutex_; + mutable std::condition_variable state_signal_; + std::function user_callback_; +}; + +class PosixWaitHandle { + public: + virtual PosixConditionBase& condition() = 0; +}; + +// This wraps a condition object as our handle because posix has no single +// native handle for higher level concurrency constructs such as semaphores +template +class PosixConditionHandle : public T, public PosixWaitHandle { + public: + PosixConditionHandle() = default; + explicit PosixConditionHandle(bool); + explicit PosixConditionHandle(pthread_t thread); + PosixConditionHandle(bool manual_reset, bool initial_state); + PosixConditionHandle(uint32_t initial_count, uint32_t maximum_count); + ~PosixConditionHandle() override = default; + + PosixConditionBase& condition() override { return handle_; } + void* native_handle() const override { return handle_.native_handle(); } + + protected: + PosixCondition handle_; + friend PosixCondition; +}; + +template <> +PosixConditionHandle::PosixConditionHandle(uint32_t initial_count, + uint32_t maximum_count) + : handle_(initial_count, maximum_count) {} + +template <> +PosixConditionHandle::PosixConditionHandle(bool initial_owner) + : handle_(initial_owner) {} + +template <> +PosixConditionHandle::PosixConditionHandle(bool manual_reset) + : handle_(manual_reset) {} + +template <> +PosixConditionHandle::PosixConditionHandle(bool manual_reset, + bool initial_state) + : handle_(manual_reset, initial_state) {} + +template <> +PosixConditionHandle::PosixConditionHandle(pthread_t thread) + : handle_(thread) {} + +WaitResult Wait(WaitHandle* wait_handle, bool is_alertable, + std::chrono::milliseconds timeout) { + auto posix_wait_handle = dynamic_cast(wait_handle); + if (posix_wait_handle == nullptr) { + return WaitResult::kFailed; + } + if (is_alertable) alertable_state_ = true; + auto result = posix_wait_handle->condition().Wait(timeout); + if (is_alertable) alertable_state_ = false; + return result; } -// TODO(dougvj) WaitResult SignalAndWait(WaitHandle* wait_handle_to_signal, WaitHandle* wait_handle_to_wait_on, bool is_alertable, std::chrono::milliseconds timeout) { - assert_always(); - return WaitResult::kFailed; + auto result = WaitResult::kFailed; + auto posix_wait_handle_to_signal = + dynamic_cast(wait_handle_to_signal); + auto posix_wait_handle_to_wait_on = + dynamic_cast(wait_handle_to_wait_on); + if (posix_wait_handle_to_signal == nullptr || + posix_wait_handle_to_wait_on == nullptr) { + return WaitResult::kFailed; + } + if (is_alertable) alertable_state_ = true; + if (posix_wait_handle_to_signal->condition().Signal()) { + result = posix_wait_handle_to_wait_on->condition().Wait(timeout); + } + if (is_alertable) alertable_state_ = false; + return result; } -// TODO(dougvj) std::pair WaitMultiple(WaitHandle* wait_handles[], size_t wait_handle_count, bool wait_all, bool is_alertable, std::chrono::milliseconds timeout) { - assert_always(); - return std::pair(WaitResult::kFailed, 0); + std::vector conditions; + conditions.reserve(wait_handle_count); + for (size_t i = 0u; i < wait_handle_count; ++i) { + auto handle = dynamic_cast(wait_handles[i]); + if (handle == nullptr) { + return std::make_pair(WaitResult::kFailed, 0); + } + conditions.push_back(&handle->condition()); + } + if (is_alertable) alertable_state_ = true; + auto result = PosixConditionBase::WaitMultiple(std::move(conditions), + wait_all, timeout); + if (is_alertable) alertable_state_ = false; + return result; } -// TODO(dougvj) -class PosixEvent : public PosixFdHandle { +class PosixEvent : public PosixConditionHandle { public: - PosixEvent(intptr_t fd) : PosixFdHandle(fd) {} + PosixEvent(bool manual_reset, bool initial_state) + : PosixConditionHandle(manual_reset, initial_state) {} ~PosixEvent() override = default; - void Set() override { - uint64_t buf = 1; - write(handle_, &buf, sizeof(buf)); + void Set() override { handle_.Signal(); } + void Reset() override { handle_.Reset(); } + void Pulse() override { + using namespace std::chrono_literals; + handle_.Signal(); + MaybeYield(); + Sleep(10us); + handle_.Reset(); } - void Reset() override { assert_always(); } - void Pulse() override { assert_always(); } - - private: - PosixCondition condition_; }; std::unique_ptr Event::CreateManualResetEvent(bool initial_state) { - // Linux's eventfd doesn't appear to support manual reset natively. - return nullptr; + return std::make_unique(true, initial_state); } std::unique_ptr Event::CreateAutoResetEvent(bool initial_state) { - int fd = eventfd(initial_state ? 1 : 0, EFD_CLOEXEC); - if (fd == -1) { - return nullptr; - } - - return std::make_unique(PosixEvent(fd)); + return std::make_unique(false, initial_state); } -// TODO(dougvj) class PosixSemaphore : public PosixConditionHandle { public: - PosixSemaphore(int initial_count, int maximum_count) { assert_always(); } + PosixSemaphore(int initial_count, int maximum_count) + : PosixConditionHandle(static_cast(initial_count), + static_cast(maximum_count)) {} ~PosixSemaphore() override = default; bool Release(int release_count, int* out_previous_count) override { - assert_always(); - return false; + if (release_count < 1) { + return false; + } + return handle_.Release(static_cast(release_count), + out_previous_count); } }; @@ -314,149 +848,210 @@ std::unique_ptr Semaphore::Create(int initial_count, return std::make_unique(initial_count, maximum_count); } -// TODO(dougvj) class PosixMutant : public PosixConditionHandle { public: - PosixMutant(bool initial_owner) { assert_always(); } - ~PosixMutant() = default; - bool Release() override { - assert_always(); - return false; - } + explicit PosixMutant(bool initial_owner) + : PosixConditionHandle(initial_owner) {} + ~PosixMutant() override = default; + bool Release() override { return handle_.Release(); } }; std::unique_ptr Mutant::Create(bool initial_owner) { return std::make_unique(initial_owner); } -// TODO(dougvj) class PosixTimer : public PosixConditionHandle { public: - PosixTimer(bool manual_reset) { assert_always(); } - ~PosixTimer() = default; + explicit PosixTimer(bool manual_reset) : PosixConditionHandle(manual_reset) {} + ~PosixTimer() override = default; bool SetOnce(std::chrono::nanoseconds due_time, std::function opt_callback) override { - assert_always(); - return false; + return handle_.Set(due_time, std::chrono::milliseconds::zero(), + std::move(opt_callback)); } bool SetRepeating(std::chrono::nanoseconds due_time, std::chrono::milliseconds period, std::function opt_callback) override { - assert_always(); - return false; - } - bool Cancel() override { - assert_always(); - return false; + return handle_.Set(due_time, period, std::move(opt_callback)); } + bool Cancel() override { return handle_.Cancel(); } }; std::unique_ptr Timer::CreateManualResetTimer() { + install_signal_handler(SignalType::kTimer); return std::make_unique(true); } std::unique_ptr Timer::CreateSynchronizationTimer() { + install_signal_handler(SignalType::kTimer); return std::make_unique(false); } -class PosixThread : public PosixThreadHandle { +class PosixThread : public PosixConditionHandle { public: - explicit PosixThread(pthread_t handle) : PosixThreadHandle(handle) {} - ~PosixThread() = default; + PosixThread() = default; + explicit PosixThread(pthread_t thread) : PosixConditionHandle(thread) {} + ~PosixThread() override = default; + + bool Initialize(CreationParameters params, + std::function start_routine) { + auto start_data = + new ThreadStartData({std::move(start_routine), false, this}); + return handle_.Initialize(params, start_data); + } void set_name(std::string name) override { - pthread_setname_np(handle_, name.c_str()); - } - - uint32_t system_id() const override { return 0; } - - // TODO(DrChat) - uint64_t affinity_mask() override { return 0; } - void set_affinity_mask(uint64_t mask) override { assert_always(); } - - int priority() override { - int policy; - struct sched_param param; - int ret = pthread_getschedparam(handle_, &policy, ¶m); - if (ret != 0) { - return -1; + handle_.WaitStarted(); + Thread::set_name(name); + if (name.length() > 15) { + name = name.substr(0, 15); } - - return param.sched_priority; + handle_.set_name(name); } + uint32_t system_id() const override { return handle_.system_id(); } + + uint64_t affinity_mask() override { return handle_.affinity_mask(); } + void set_affinity_mask(uint64_t mask) override { + handle_.set_affinity_mask(mask); + } + + int priority() override { return handle_.priority(); } void set_priority(int new_priority) override { - struct sched_param param; - param.sched_priority = new_priority; - int ret = pthread_setschedparam(handle_, SCHED_FIFO, ¶m); + handle_.set_priority(new_priority); } - // TODO(DrChat) void QueueUserCallback(std::function callback) override { - assert_always(); + handle_.QueueUserCallback(std::move(callback)); } - bool Resume(uint32_t* out_new_suspend_count = nullptr) override { - assert_always(); - return false; + bool Resume(uint32_t* out_previous_suspend_count) override { + return handle_.Resume(out_previous_suspend_count); } - bool Suspend(uint32_t* out_previous_suspend_count = nullptr) override { - assert_always(); - return false; + bool Suspend(uint32_t* out_previous_suspend_count) override { + return handle_.Suspend(out_previous_suspend_count); } - void Terminate(int exit_code) override {} + void Terminate(int exit_code) override { handle_.Terminate(exit_code); } + + void WaitSuspended() { handle_.WaitSuspended(); } }; -thread_local std::unique_ptr current_thread_ = nullptr; +thread_local PosixThread* current_thread_ = nullptr; -struct ThreadStartData { - std::function start_routine; -}; -void* ThreadStartRoutine(void* parameter) { - current_thread_ = - std::unique_ptr(new PosixThread(::pthread_self())); +void* PosixCondition::ThreadStartRoutine(void* parameter) { + if (pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, nullptr) != 0) { + assert_always(); + } + threading::set_name(""); - auto start_data = reinterpret_cast(parameter); - start_data->start_routine(); + auto start_data = static_cast(parameter); + assert_not_null(start_data); + assert_not_null(start_data->thread_obj); + + auto thread = dynamic_cast(start_data->thread_obj); + auto start_routine = std::move(start_data->start_routine); + auto create_suspended = start_data->create_suspended; delete start_data; - return 0; + + current_thread_ = thread; + { + std::unique_lock lock(thread->handle_.state_mutex_); + thread->handle_.state_ = + create_suspended ? State::kSuspended : State::kRunning; + thread->handle_.state_signal_.notify_all(); + } + + if (create_suspended) { + std::unique_lock lock(thread->handle_.state_mutex_); + thread->handle_.suspend_count_ = 1; + thread->handle_.state_signal_.wait( + lock, [thread] { return thread->handle_.suspend_count_ == 0; }); + } + + start_routine(); + + { + std::unique_lock lock(thread->handle_.state_mutex_); + thread->handle_.state_ = State::kFinished; + } + + std::unique_lock lock(mutex_); + thread->handle_.exit_code_ = 0; + thread->handle_.signaled_ = true; + cond_.notify_all(); + + current_thread_ = nullptr; + return nullptr; } std::unique_ptr Thread::Create(CreationParameters params, std::function start_routine) { - auto start_data = new ThreadStartData({std::move(start_routine)}); - - assert_false(params.create_suspended); - pthread_t handle; - pthread_attr_t attr; - pthread_attr_init(&attr); - int ret = pthread_create(&handle, &attr, ThreadStartRoutine, start_data); - if (ret != 0) { - // TODO(benvanik): pass back? - auto last_error = errno; - XELOGE("Unable to pthread_create: {}", last_error); - delete start_data; - return nullptr; - } - - return std::unique_ptr(new PosixThread(handle)); + install_signal_handler(SignalType::kThreadSuspend); + install_signal_handler(SignalType::kThreadUserCallback); + auto thread = std::make_unique(); + if (!thread->Initialize(params, std::move(start_routine))) return nullptr; + assert_not_null(thread); + return thread; } Thread* Thread::GetCurrentThread() { if (current_thread_) { - return current_thread_.get(); + return current_thread_; } + // Should take this route only for threads not created by Thread::Create. + // The only thread not created by Thread::Create should be the main thread. pthread_t handle = pthread_self(); - current_thread_ = std::make_unique(handle); - return current_thread_.get(); + current_thread_ = new PosixThread(handle); + atexit([] { delete current_thread_; }); + + return current_thread_; } void Thread::Exit(int exit_code) { - pthread_exit(reinterpret_cast(exit_code)); + if (current_thread_) { + current_thread_->Terminate(exit_code); + // Sometimes the current thread keeps running after being cancelled. + // Prevent other calls from this thread from using current_thread_. + current_thread_ = nullptr; + } else { + // Should only happen with the main thread + pthread_exit(reinterpret_cast(exit_code)); + } +} + +static void signal_handler(int signal, siginfo_t* info, void* /*context*/) { + switch (GetSystemSignalType(signal)) { + case SignalType::kHighResolutionTimer: { + assert_not_null(info->si_value.sival_ptr); + auto callback = + *static_cast*>(info->si_value.sival_ptr); + callback(); + } break; + case SignalType::kTimer: { + assert_not_null(info->si_value.sival_ptr); + auto pTimer = + static_cast*>(info->si_value.sival_ptr); + pTimer->CompletionRoutine(); + } break; + case SignalType::kThreadSuspend: { + assert_not_null(current_thread_); + current_thread_->WaitSuspended(); + } break; + case SignalType::kThreadUserCallback: { + assert_not_null(info->si_value.sival_ptr); + auto p_thread = + static_cast*>(info->si_value.sival_ptr); + if (alertable_state_) { + p_thread->CallUserCallback(); + } + } break; + default: + assert_always(); + } } } // namespace threading diff --git a/src/xenia/base/threading_win.cc b/src/xenia/base/threading_win.cc index 605c2ccbf..6b4e31a99 100644 --- a/src/xenia/base/threading_win.cc +++ b/src/xenia/base/threading_win.cc @@ -388,16 +388,16 @@ class Win32Thread : public Win32Handle { QueueUserAPC(DispatchApc, handle_, reinterpret_cast(apc_data)); } - bool Resume(uint32_t* out_new_suspend_count = nullptr) override { - if (out_new_suspend_count) { - *out_new_suspend_count = 0; + bool Resume(uint32_t* out_previous_suspend_count = nullptr) override { + if (out_previous_suspend_count) { + *out_previous_suspend_count = 0; } DWORD result = ResumeThread(handle_); if (result == UINT_MAX) { return false; } - if (out_new_suspend_count) { - *out_new_suspend_count = result; + if (out_previous_suspend_count) { + *out_previous_suspend_count = result; } return true; } diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 9854f5030..4d9354946 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -73,7 +73,7 @@ bool CommandProcessor::Initialize( WorkerThreadMain(); return 0; })); - worker_thread_->set_name("GraphicsSystem Command Processor"); + worker_thread_->set_name("GPU Commands"); worker_thread_->Create(); return true; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 8db6f1626..ff9041fbd 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1996,15 +1996,44 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, current_external_pipeline_ = nullptr; } + // Get dynamic rasterizer state. + // Supersampling replacing multisampling due to difficulties of emulating + // EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also + // resolution scale. + uint32_t pixel_size_x, pixel_size_y; + if (edram_rov_used_) { + pixel_size_x = 1; + pixel_size_y = 1; + } else { + xenos::MsaaSamples msaa_samples = + regs.Get().msaa_samples; + pixel_size_x = msaa_samples >= xenos::MsaaSamples::k4X ? 2 : 1; + pixel_size_y = msaa_samples >= xenos::MsaaSamples::k2X ? 2 : 1; + } + if (texture_cache_->IsResolutionScale2X()) { + pixel_size_x *= 2; + pixel_size_y *= 2; + } + draw_util::ViewportInfo viewport_info; + draw_util::GetHostViewportInfo(regs, float(pixel_size_x), float(pixel_size_y), + true, float(D3D12_VIEWPORT_BOUNDS_MAX), false, + viewport_info); + draw_util::Scissor scissor; + draw_util::GetScissor(regs, scissor); + scissor.left *= pixel_size_x; + scissor.top *= pixel_size_y; + scissor.width *= pixel_size_x; + scissor.height *= pixel_size_y; + // Update viewport, scissor, blend factor and stencil reference. - UpdateFixedFunctionState(primitive_two_faced); + UpdateFixedFunctionState(viewport_info, scissor, primitive_two_faced); // Update system constants before uploading them. UpdateSystemConstantValues( memexport_used, primitive_two_faced, line_loop_closing_index, indexed ? index_buffer_info->endianness : xenos::Endian::kNone, - used_texture_mask, early_z, GetCurrentColorMask(pixel_shader), - pipeline_render_targets); + viewport_info, pixel_size_x, pixel_size_y, used_texture_mask, early_z, + GetCurrentColorMask(pixel_shader), pipeline_render_targets); // Update constant buffers, descriptors and root parameters. if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) { @@ -2753,87 +2782,21 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() { command_allocator_writable_last_ = nullptr; } -void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { +void D3D12CommandProcessor::UpdateFixedFunctionState( + const draw_util::ViewportInfo& viewport_info, + const draw_util::Scissor& scissor, bool primitive_two_faced) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES - const RegisterFile& regs = *register_file_; - - // Window parameters. - // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h - // See r200UpdateWindow: - // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - auto pa_sc_window_offset = regs.Get(); - - // Supersampling replacing multisampling due to difficulties of emulating - // EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also - // resolution scale. - uint32_t pixel_size_x, pixel_size_y; - if (edram_rov_used_) { - pixel_size_x = 1; - pixel_size_y = 1; - } else { - xenos::MsaaSamples msaa_samples = - regs.Get().msaa_samples; - pixel_size_x = msaa_samples >= xenos::MsaaSamples::k4X ? 2 : 1; - pixel_size_y = msaa_samples >= xenos::MsaaSamples::k2X ? 2 : 1; - } - if (texture_cache_->IsResolutionScale2X()) { - pixel_size_x *= 2; - pixel_size_y *= 2; - } - // Viewport. - // PA_CL_VTE_CNTL contains whether offsets and scales are enabled. - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - // In games, either all are enabled (for regular drawing) or none are (for - // rectangle lists usually). - // - // If scale/offset is enabled, the Xenos shader is writing (neglecting W - // division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1) - // box. If it's not, the position is in screen space. Since we can only use - // the NDC in PC APIs, we use a viewport of the largest possible size, and - // divide the position by it in translated shaders. - auto pa_cl_vte_cntl = regs.Get(); - float viewport_scale_x = - pa_cl_vte_cntl.vport_x_scale_ena - ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) - : 4096.0f; - float viewport_scale_y = - pa_cl_vte_cntl.vport_y_scale_ena - ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) - : 4096.0f; - float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 - : 1.0f; - float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 - : std::abs(viewport_scale_x); - float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 - : std::abs(viewport_scale_y); - float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 - : 0.0f; - if (regs.Get().vtx_window_offset_enable) { - viewport_offset_x += float(pa_sc_window_offset.window_x_offset); - viewport_offset_y += float(pa_sc_window_offset.window_y_offset); - } D3D12_VIEWPORT viewport; - viewport.TopLeftX = - (viewport_offset_x - viewport_scale_x) * float(pixel_size_x); - viewport.TopLeftY = - (viewport_offset_y - viewport_scale_y) * float(pixel_size_y); - viewport.Width = viewport_scale_x * 2.0f * float(pixel_size_x); - viewport.Height = viewport_scale_y * 2.0f * float(pixel_size_y); - viewport.MinDepth = viewport_offset_z; - viewport.MaxDepth = viewport_offset_z + viewport_scale_z; - if (viewport_scale_z < 0.0f) { - // MinDepth > MaxDepth doesn't work on Nvidia, emulating it in vertex - // shaders and when applying polygon offset. - std::swap(viewport.MinDepth, viewport.MaxDepth); - } + viewport.TopLeftX = viewport_info.left; + viewport.TopLeftY = viewport_info.top; + viewport.Width = viewport_info.width; + viewport.Height = viewport_info.height; + viewport.MinDepth = viewport_info.z_min; + viewport.MaxDepth = viewport_info.z_max; ff_viewport_update_needed_ |= ff_viewport_.TopLeftX != viewport.TopLeftX; ff_viewport_update_needed_ |= ff_viewport_.TopLeftY != viewport.TopLeftY; ff_viewport_update_needed_ |= ff_viewport_.Width != viewport.Width; @@ -2847,13 +2810,11 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { } // Scissor. - draw_util::Scissor scissor; - draw_util::GetScissor(regs, scissor); D3D12_RECT scissor_rect; - scissor_rect.left = LONG(scissor.left * pixel_size_x); - scissor_rect.top = LONG(scissor.top * pixel_size_y); - scissor_rect.right = LONG((scissor.left + scissor.width) * pixel_size_x); - scissor_rect.bottom = LONG((scissor.top + scissor.height) * pixel_size_y); + scissor_rect.left = LONG(scissor.left); + scissor_rect.top = LONG(scissor.top); + scissor_rect.right = LONG(scissor.left + scissor.width); + scissor_rect.bottom = LONG(scissor.top + scissor.height); ff_scissor_update_needed_ |= ff_scissor_.left != scissor_rect.left; ff_scissor_update_needed_ |= ff_scissor_.top != scissor_rect.top; ff_scissor_update_needed_ |= ff_scissor_.right != scissor_rect.right; @@ -2865,6 +2826,8 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { } if (!edram_rov_used_) { + const RegisterFile& regs = *register_file_; + // Blend factor. ff_blend_factor_update_needed_ |= ff_blend_factor_[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32; @@ -2908,7 +2871,9 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { void D3D12CommandProcessor::UpdateSystemConstantValues( bool shared_memory_is_uav, bool primitive_two_faced, uint32_t line_loop_closing_index, xenos::Endian index_endian, - uint32_t used_texture_mask, bool early_z, uint32_t color_mask, + const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x, + uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z, + uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); @@ -2920,7 +2885,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( auto pa_su_point_minmax = regs.Get(); auto pa_su_point_size = regs.Get(); auto pa_su_sc_mode_cntl = regs.Get(); - auto pa_su_vtx_cntl = regs.Get(); float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; auto rb_colorcontrol = regs.Get(); auto rb_depth_info = regs.Get(); @@ -2986,11 +2950,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } } - // Get viewport Z scale - needed for flags and ROV output. - float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 - : 1.0f; - bool dirty = false; // Flags. @@ -3023,10 +2982,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( flags |= (pa_cl_clip_cntl.value & 0b111111) << DxbcShaderTranslator::kSysFlag_UserClipPlane0_Shift; } - // Reversed depth. - if (viewport_scale_z < 0.0f) { - flags |= DxbcShaderTranslator::kSysFlag_ReverseZ; - } // Whether SV_IsFrontFace matters. if (primitive_two_faced) { flags |= DxbcShaderTranslator::kSysFlag_PrimitiveTwoFaced; @@ -3122,81 +3077,24 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } // Conversion to Direct3D 12 normalized device coordinates. - // See viewport configuration in UpdateFixedFunctionState for explanations. - // X and Y scale/offset is to convert unnormalized coordinates generated by - // shaders (for rectangle list drawing, for instance) to the viewport of the - // largest possible render target size that is used to emulate unnormalized - // coordinates. Z scale/offset is to convert from OpenGL NDC to Direct3D NDC - // if needed. Also apply half-pixel offset to reproduce Direct3D 9 - // rasterization rules - must be done before clipping, not through the - // viewport, for SSAA and resolution scale to work correctly. - float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; - float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // Kill all primitives if multipass or both faces are culled, but still need // to do memexport. if (sq_program_cntl.vs_export_mode == xenos::VertexShaderExportMode::kMultipass || (primitive_two_faced && pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back)) { - dirty |= !std::isnan(system_constants_.ndc_scale[0]); - dirty |= !std::isnan(system_constants_.ndc_scale[1]); - dirty |= !std::isnan(system_constants_.ndc_scale[2]); - dirty |= !std::isnan(system_constants_.ndc_offset[0]); - dirty |= !std::isnan(system_constants_.ndc_offset[1]); - dirty |= !std::isnan(system_constants_.ndc_offset[2]); float nan_value = std::nanf(""); - system_constants_.ndc_scale[0] = nan_value; - system_constants_.ndc_scale[1] = nan_value; - system_constants_.ndc_scale[2] = nan_value; - system_constants_.ndc_offset[0] = nan_value; - system_constants_.ndc_offset[1] = nan_value; - system_constants_.ndc_offset[2] = nan_value; - } else { - // When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to - // be written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF - // isn't set in this case in draws in games. - bool gl_clip_space_def = - !pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena; - float ndc_scale_x = pa_cl_vte_cntl.vport_x_scale_ena - ? (viewport_scale_x >= 0.0f ? 1.0f : -1.0f) - : (1.0f / 4096.0f); - float ndc_scale_y = pa_cl_vte_cntl.vport_y_scale_ena - ? (viewport_scale_y >= 0.0f ? -1.0f : 1.0f) - : (-1.0f / 4096.0f); - float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; - float ndc_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? 0.0f : -1.0f; - float ndc_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? 0.0f : 1.0f; - float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; - if (cvars::half_pixel_offset && !pa_su_vtx_cntl.pix_center) { - // Signs are hopefully correct here, tested in GTA IV on both clearing - // (without a viewport) and drawing things near the edges of the screen. - if (pa_cl_vte_cntl.vport_x_scale_ena) { - if (viewport_scale_x != 0.0f) { - ndc_offset_x += 0.5f / viewport_scale_x; - } - } else { - ndc_offset_x += 1.0f / xenos::kTexture2DCubeMaxWidthHeight; - } - if (pa_cl_vte_cntl.vport_y_scale_ena) { - if (viewport_scale_y != 0.0f) { - ndc_offset_y += 0.5f / viewport_scale_y; - } - } else { - ndc_offset_y -= 1.0f / xenos::kTexture2DCubeMaxWidthHeight; - } + for (uint32_t i = 0; i < 3; ++i) { + dirty |= !std::isnan(system_constants_.ndc_scale[i]); + system_constants_.ndc_scale[i] = nan_value; + } + } else { + for (uint32_t i = 0; i < 3; ++i) { + dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i]; + dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i]; + system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i]; + system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i]; } - dirty |= system_constants_.ndc_scale[0] != ndc_scale_x; - dirty |= system_constants_.ndc_scale[1] != ndc_scale_y; - dirty |= system_constants_.ndc_scale[2] != ndc_scale_z; - dirty |= system_constants_.ndc_offset[0] != ndc_offset_x; - dirty |= system_constants_.ndc_offset[1] != ndc_offset_y; - dirty |= system_constants_.ndc_offset[2] != ndc_offset_z; - system_constants_.ndc_scale[0] = ndc_scale_x; - system_constants_.ndc_scale[1] = ndc_scale_y; - system_constants_.ndc_scale[2] = ndc_scale_z; - system_constants_.ndc_offset[0] = ndc_offset_x; - system_constants_.ndc_offset[1] = ndc_offset_y; - system_constants_.ndc_offset[2] = ndc_offset_z; } // Point size. @@ -3212,19 +3110,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.point_size[1] = point_size_y; system_constants_.point_size_min_max[0] = point_size_min; system_constants_.point_size_min_max[1] = point_size_max; - float point_screen_to_ndc_x, point_screen_to_ndc_y; - if (pa_cl_vte_cntl.vport_x_scale_ena) { - point_screen_to_ndc_x = - (viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f; - } else { - point_screen_to_ndc_x = 1.0f / xenos::kTexture2DCubeMaxWidthHeight; - } - if (pa_cl_vte_cntl.vport_y_scale_ena) { - point_screen_to_ndc_y = - (viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f; - } else { - point_screen_to_ndc_y = -1.0f / xenos::kTexture2DCubeMaxWidthHeight; - } + float point_screen_to_ndc_x = + (0.5f * 2.0f * pixel_size_x) / viewport_info.width; + float point_screen_to_ndc_y = + (0.5f * 2.0f * pixel_size_y) / viewport_info.height; dirty |= system_constants_.point_screen_to_ndc[0] != point_screen_to_ndc_x; dirty |= system_constants_.point_screen_to_ndc[1] != point_screen_to_ndc_y; system_constants_.point_screen_to_ndc[0] = point_screen_to_ndc_x; @@ -3374,20 +3263,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords; system_constants_.edram_depth_base_dwords = depth_base_dwords; - // The Z range is reversed in the vertex shader if it's reverse - use the - // absolute value of the scale. - float depth_range_scale = std::abs(viewport_scale_z); + float depth_range_scale = viewport_info.z_max - viewport_info.z_min; dirty |= system_constants_.edram_depth_range_scale != depth_range_scale; system_constants_.edram_depth_range_scale = depth_range_scale; - float depth_range_offset = pa_cl_vte_cntl.vport_z_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 - : 0.0f; - if (viewport_scale_z < 0.0f) { - // Similar to MinDepth in fixed-function viewport calculation. - depth_range_offset += viewport_scale_z; - } - dirty |= system_constants_.edram_depth_range_offset != depth_range_offset; - system_constants_.edram_depth_range_offset = depth_range_offset; + dirty |= system_constants_.edram_depth_range_offset != viewport_info.z_min; + system_constants_.edram_depth_range_offset = viewport_info.z_min; // For non-polygons, front polygon offset is used, and it's enabled if // POLY_OFFSET_PARA_ENABLED is set, for polygons, separate front and back diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index ceffe5fd0..982f9eac5 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -26,6 +26,7 @@ #include "xenia/gpu/d3d12/primitive_converter.h" #include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/texture_cache.h" +#include "xenia/gpu/draw_util.h" #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" @@ -345,11 +346,15 @@ class D3D12CommandProcessor : public CommandProcessor { D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out); - void UpdateFixedFunctionState(bool primitive_two_faced); + void UpdateFixedFunctionState(const draw_util::ViewportInfo& viewport_info, + const draw_util::Scissor& scissor, + bool primitive_two_faced); void UpdateSystemConstantValues( bool shared_memory_is_uav, bool primitive_two_faced, uint32_t line_loop_closing_index, xenos::Endian index_endian, - uint32_t used_texture_mask, bool early_z, uint32_t color_mask, + const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x, + uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z, + uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]); bool UpdateBindings(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index 202d34965..c78d5122a 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -111,6 +111,178 @@ int32_t FloatToD3D11Fixed16p8(float f32) { return result.s; } +void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x, + float pixel_size_y, bool origin_bottom_left, + float xy_max, bool allow_reverse_z, + ViewportInfo& viewport_info_out) { + assert_true(pixel_size_x >= 1.0f); + assert_true(pixel_size_y >= 1.0f); + assert_true(xy_max >= 1.0f); + + // PA_CL_VTE_CNTL contains whether offsets and scales are enabled. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // In games, either all are enabled (for regular drawing) or none are (for + // rectangle lists usually). + // + // If scale/offset is enabled, the Xenos shader is writing (neglecting W + // division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1) + // box. If it's not, the position is in screen space. Since we can only use + // the NDC in PC APIs, we use a viewport of the largest possible size, and + // divide the position by it in translated shaders. + + auto pa_cl_clip_cntl = regs.Get(); + auto pa_cl_vte_cntl = regs.Get(); + auto pa_su_sc_mode_cntl = regs.Get(); + auto pa_su_vtx_cntl = regs.Get(); + + float viewport_left, viewport_top; + float viewport_width, viewport_height; + float ndc_scale_x, ndc_scale_y; + float ndc_offset_x, ndc_offset_y; + // To avoid zero size viewports, which would harm division and aren't allowed + // on Vulkan. Nothing will ever be covered by a viewport of this size - this + // is 2 orders of magnitude smaller than a .8 subpixel, and thus shouldn't + // have any effect on rounding, n and n + 1 / 1024 would be rounded to the + // same .8 fixed-point value, thus in fixed-point, the viewport would have + // zero size. + const float size_min = 1.0f / 1024.0f; + + float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 + : 0.0f; + float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 + : 0.0f; + if (pa_su_sc_mode_cntl.vtx_window_offset_enable) { + auto pa_sc_window_offset = regs.Get(); + viewport_offset_x += float(pa_sc_window_offset.window_x_offset); + viewport_offset_y += float(pa_sc_window_offset.window_y_offset); + } + + if (pa_cl_vte_cntl.vport_x_scale_ena) { + float pa_cl_vport_xscale = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; + float viewport_scale_x_abs = std::abs(pa_cl_vport_xscale) * pixel_size_x; + viewport_left = viewport_offset_x * pixel_size_x - viewport_scale_x_abs; + float viewport_right = viewport_left + viewport_scale_x_abs * 2.0f; + // Keep the viewport in the positive quarter-plane for simplicity of + // clamping to the maximum supported bounds. + float cutoff_left = std::fmax(-viewport_left, 0.0f); + float cutoff_right = std::fmax(viewport_right - xy_max, 0.0f); + viewport_left = std::fmax(viewport_left, 0.0f); + viewport_right = std::fmin(viewport_right, xy_max); + viewport_width = viewport_right - viewport_left; + if (viewport_width > size_min) { + ndc_scale_x = + (viewport_width + cutoff_left + cutoff_right) / viewport_width; + if (pa_cl_vport_xscale < 0.0f) { + ndc_scale_x = -ndc_scale_x; + } + ndc_offset_x = + ((cutoff_right - cutoff_left) * (0.5f * 2.0f)) / viewport_width; + } else { + // Empty viewport, but don't pass 0 because that's against the Vulkan + // specification. + viewport_left = 0.0f; + viewport_width = size_min; + ndc_scale_x = 0.0f; + ndc_offset_x = 0.0f; + } + } else { + // Drawing without a viewport and without clipping to one - use a viewport + // covering the entire potential guest render target or the positive part of + // the host viewport area, whichever is smaller, and apply the offset, if + // enabled, via the shader. + viewport_left = 0.0f; + viewport_width = std::min( + float(xenos::kTexture2DCubeMaxWidthHeight) * pixel_size_x, xy_max); + ndc_scale_x = (2.0f * pixel_size_x) / viewport_width; + ndc_offset_x = viewport_offset_x * ndc_scale_x - 1.0f; + } + + if (pa_cl_vte_cntl.vport_y_scale_ena) { + float pa_cl_vport_yscale = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; + float viewport_scale_y_abs = std::abs(pa_cl_vport_yscale) * pixel_size_y; + viewport_top = viewport_offset_y * pixel_size_y - viewport_scale_y_abs; + float viewport_bottom = viewport_top + viewport_scale_y_abs * 2.0f; + float cutoff_top = std::fmax(-viewport_top, 0.0f); + float cutoff_bottom = std::fmax(viewport_bottom - xy_max, 0.0f); + viewport_top = std::fmax(viewport_top, 0.0f); + viewport_bottom = std::fmin(viewport_bottom, xy_max); + viewport_height = viewport_bottom - viewport_top; + if (viewport_height > size_min) { + ndc_scale_y = + (viewport_height + cutoff_top + cutoff_bottom) / viewport_height; + if (pa_cl_vport_yscale < 0.0f) { + ndc_scale_y = -ndc_scale_y; + } + ndc_offset_y = + ((cutoff_bottom - cutoff_top) * (0.5f * 2.0f)) / viewport_height; + } else { + // Empty viewport, but don't pass 0 because that's against the Vulkan + // specification. + viewport_top = 0.0f; + viewport_height = size_min; + ndc_scale_y = 0.0f; + ndc_offset_y = 0.0f; + } + } else { + viewport_height = std::min( + float(xenos::kTexture2DCubeMaxWidthHeight) * pixel_size_y, xy_max); + ndc_scale_y = (2.0f * pixel_size_y) / viewport_height; + ndc_offset_y = viewport_offset_y * ndc_scale_y - 1.0f; + } + + // Apply the vertex half-pixel offset via the shader (it must not affect + // clipping, otherwise with SSAA or resolution scale, samples in the left/top + // half will never be covered). + if (cvars::half_pixel_offset && !pa_su_vtx_cntl.pix_center) { + ndc_offset_x += (0.5f * 2.0f * pixel_size_x) / viewport_width; + ndc_offset_y += (0.5f * 2.0f * pixel_size_y) / viewport_height; + } + + if (origin_bottom_left) { + ndc_scale_y = -ndc_scale_y; + ndc_offset_y = -ndc_offset_y; + } + + float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 + : 1.0f; + float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 + : 0.0f; + // Vulkan requires the depth bounds to be in the 0 to 1 range without + // VK_EXT_depth_range_unrestricted (which isn't used on the Xbox 360). + float viewport_z_min = std::min(std::fmax(viewport_offset_z, 0.0f), 1.0f); + float viewport_z_max = + std::min(std::fmax(viewport_offset_z + viewport_scale_z, 0.0f), 1.0f); + // When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to be + // written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF isn't + // set in this case in draws in games. + bool gl_clip_space_def = + !pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena; + float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; + float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; + if (viewport_z_min > viewport_z_max && !allow_reverse_z) { + std::swap(viewport_z_min, viewport_z_max); + ndc_scale_z = -ndc_scale_z; + ndc_offset_z = 1.0f - ndc_offset_z; + } + + viewport_info_out.left = viewport_left; + viewport_info_out.top = viewport_top; + viewport_info_out.width = viewport_width; + viewport_info_out.height = viewport_height; + viewport_info_out.z_min = viewport_z_min; + viewport_info_out.z_max = viewport_z_max; + viewport_info_out.ndc_scale[0] = ndc_scale_x; + viewport_info_out.ndc_scale[1] = ndc_scale_y; + viewport_info_out.ndc_scale[2] = ndc_scale_z; + viewport_info_out.ndc_offset[0] = ndc_offset_x; + viewport_info_out.ndc_offset[1] = ndc_offset_y; + viewport_info_out.ndc_offset[2] = ndc_offset_z; +} + void GetScissor(const RegisterFile& regs, Scissor& scissor_out) { // FIXME(Triang3l): Screen scissor isn't applied here, but it seems to be // unused on Xbox 360 Direct3D 9. diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 7ef3186a0..2cee26de7 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -33,6 +33,28 @@ namespace draw_util { // for use with the top-left rasterization rule later. int32_t FloatToD3D11Fixed16p8(float f32); +struct ViewportInfo { + // The returned viewport will always be in the positive quarter-plane for + // simplicity of clamping to the maximum size supported by the host, negative + // offset will be applied via ndc_offset. + float left; + float top; + float width; + float height; + float z_min; + float z_max; + float ndc_scale[3]; + float ndc_offset[3]; +}; +// Converts the guest viewport (or fakes one if drawing without a viewport) to +// a viewport, plus values to multiply-add the returned position by, usable on +// host graphics APIs such as Direct3D 11+ and Vulkan, also forcing it to the +// Direct3D clip space with 0...W Z rather than -W...W. +void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x, + float pixel_size_y, bool origin_bottom_left, + float xy_max, bool allow_reverse_z, + ViewportInfo& viewport_info_out); + struct Scissor { uint32_t left; uint32_t top; diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 3f9140158..56278157d 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -1044,10 +1044,9 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() { DxbcOpEndIf(); } - // Apply scale for drawing without a viewport, and also remap from OpenGL - // Z clip space to Direct3D if needed. Also, if the vertex shader is - // multipass, the NDC scale constant can be used to set position to NaN to - // kill all primitives. + // Apply scale for guest to host viewport and clip space conversion. Also, if + // the vertex shader is multipass, the NDC scale constant can be used to set + // position to NaN to kill all primitives. system_constants_used_ |= 1ull << kSysConst_NDCScale_Index; DxbcOpMul(DxbcDest::R(system_temp_position_, 0b0111), DxbcSrc::R(system_temp_position_), @@ -1056,16 +1055,7 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() { kSysConst_NDCScale_Vec, kSysConst_NDCScale_Comp * 0b010101 + 0b100100)); - // Reverse Z (Z = W - Z) if the viewport depth is inverted. - DxbcOpAnd(temp_x_dest, flags_src, DxbcSrc::LU(kSysFlag_ReverseZ)); - DxbcOpIf(true, temp_x_src); - DxbcOpAdd(DxbcDest::R(system_temp_position_, 0b0100), - DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW), - -DxbcSrc::R(system_temp_position_, DxbcSrc::kZZZZ)); - DxbcOpEndIf(); - - // Apply offset (multiplied by W) for drawing without a viewport and for half - // pixel offset. + // Apply offset (multiplied by W) used for the same purposes. system_constants_used_ |= 1ull << kSysConst_NDCOffset_Index; DxbcOpMAd(DxbcDest::R(system_temp_position_, 0b0111), DxbcSrc::CB(cbuffer_index_system_constants_, diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index c45cfc4d9..997be5fe7 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -123,7 +123,6 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_UserClipPlane3_Shift, kSysFlag_UserClipPlane4_Shift, kSysFlag_UserClipPlane5_Shift, - kSysFlag_ReverseZ_Shift, kSysFlag_KillIfAnyVertexKilled_Shift, kSysFlag_PrimitiveTwoFaced_Shift, kSysFlag_AlphaPassIfLess_Shift, @@ -165,7 +164,6 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_UserClipPlane3 = 1u << kSysFlag_UserClipPlane3_Shift, kSysFlag_UserClipPlane4 = 1u << kSysFlag_UserClipPlane4_Shift, kSysFlag_UserClipPlane5 = 1u << kSysFlag_UserClipPlane5_Shift, - kSysFlag_ReverseZ = 1u << kSysFlag_ReverseZ_Shift, kSysFlag_KillIfAnyVertexKilled = 1u << kSysFlag_KillIfAnyVertexKilled_Shift, kSysFlag_PrimitiveTwoFaced = 1u << kSysFlag_PrimitiveTwoFaced_Shift, kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift, @@ -220,8 +218,7 @@ class DxbcShaderTranslator : public ShaderTranslator { float point_size[2]; float point_size_min_max[2]; - // Inverse scale of the host viewport (but not supersampled), with signs - // pre-applied. + // Screen point size * 2 (but not supersampled) -> size in NDC. float point_screen_to_ndc[2]; float user_clip_planes[6][4]; diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index 4943faad9..c9b608e9e 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -136,7 +136,7 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor, })); // As we run vblank interrupts the debugger must be able to suspend us. vsync_worker_thread_->set_can_debugger_suspend(true); - vsync_worker_thread_->set_name("GraphicsSystem Vsync"); + vsync_worker_thread_->set_name("GPU VSync"); vsync_worker_thread_->Create(); if (cvars::trace_gpu_stream) { diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.cso index b871af09c..13cca4e6c 100644 Binary files a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.cso and b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.cso differ diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.h index 997fb892b..de5761c47 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.h @@ -1,11 +1,11 @@ // generated from `xb buildhlsl` // source: primitive_point_list.gs.hlsl const uint8_t primitive_point_list_gs[] = { - 0x44, 0x58, 0x42, 0x43, 0x6F, 0x7A, 0xE0, 0xA0, 0x82, 0xF0, 0x8E, 0x77, - 0x2B, 0x62, 0x44, 0x00, 0xA3, 0x34, 0x47, 0x40, 0x01, 0x00, 0x00, 0x00, - 0x0C, 0x1E, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x44, 0x58, 0x42, 0x43, 0x16, 0x84, 0x10, 0x1C, 0xE9, 0xAD, 0x76, 0xF9, + 0x92, 0xF2, 0xD5, 0x65, 0x7C, 0x8A, 0x5F, 0xC5, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x1E, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0xD0, 0x0A, 0x00, 0x00, 0x28, 0x0D, 0x00, 0x00, 0xAC, 0x0F, 0x00, 0x00, - 0x70, 0x1D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x94, 0x0A, 0x00, 0x00, + 0x84, 0x1D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x94, 0x0A, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x47, 0x00, 0x05, 0x00, 0x00, 0x6A, 0x0A, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, @@ -335,8 +335,8 @@ const uint8_t primitive_point_list_gs[] = { 0x54, 0x45, 0x58, 0x43, 0x4F, 0x4F, 0x52, 0x44, 0x00, 0x53, 0x56, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x53, 0x56, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, - 0x00, 0xAB, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58, 0xBC, 0x0D, 0x00, 0x00, - 0x51, 0x00, 0x02, 0x00, 0x6F, 0x03, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, + 0x00, 0xAB, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58, 0xD0, 0x0D, 0x00, 0x00, + 0x51, 0x00, 0x02, 0x00, 0x74, 0x03, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x04, 0xF2, 0x10, 0x20, 0x00, @@ -369,7 +369,7 @@ const uint8_t primitive_point_list_gs[] = { 0x13, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x04, 0x32, 0x10, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x04, 0x42, 0x10, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x68, 0x00, 0x00, 0x02, 0x02, 0x00, 0x00, 0x00, 0x5D, 0x08, 0x00, 0x01, + 0x68, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x5D, 0x08, 0x00, 0x01, 0x8F, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5C, 0x28, 0x00, 0x01, 0x65, 0x00, 0x00, 0x03, 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0xF2, 0x20, 0x10, 0x00, @@ -426,113 +426,13 @@ const uint8_t primitive_point_list_gs[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x38, 0x00, 0x18, 0x08, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF6, 0x1F, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x38, 0x00, 0x78, 0x0A, - 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x04, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0xBF, - 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0xBF, - 0x00, 0x00, 0x78, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x14, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x07, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x09, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x08, - 0x32, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x42, 0x20, 0x10, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x2A, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, - 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x75, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x60, 0x08, 0xC2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x04, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x14, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x07, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x09, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, - 0xF2, 0x20, 0x10, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x08, - 0x32, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, - 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x42, 0x20, 0x10, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x2A, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, - 0x12, 0x00, 0x00, 0x00, 0xE6, 0x0A, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, - 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x75, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x18, 0x09, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x46, 0x00, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x38, 0x06, + 0x72, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, 0x10, 0x80, + 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x40, 0x05, + 0x82, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x08, 0xF2, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0xC6, 0x09, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x46, 0x14, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, @@ -573,7 +473,7 @@ const uint8_t primitive_point_list_gs[] = { 0x32, 0x20, 0x10, 0x00, 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -613,6 +513,56 @@ const uint8_t primitive_point_list_gs[] = { 0x0E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x08, 0x32, 0x20, 0x10, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0x42, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x2A, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, + 0xE6, 0x0A, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0xA6, 0x1E, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0x32, 0x20, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x08, + 0xA2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x04, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x14, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x08, 0x32, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x42, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, @@ -620,26 +570,78 @@ const uint8_t primitive_point_list_gs[] = { 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, - 0xE6, 0x0A, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0xD6, 0x05, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, - 0x53, 0x54, 0x41, 0x54, 0x94, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x20, 0x05, + 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x08, 0x32, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x86, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x08, 0x32, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x00, 0x00, 0x80, 0x3F, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0x42, 0x20, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2A, 0x10, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, + 0x32, 0x20, 0x10, 0x00, 0x11, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, + 0x32, 0x20, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xC2, 0x20, 0x10, 0x00, + 0x12, 0x00, 0x00, 0x00, 0xA6, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0xF2, 0x20, 0x10, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x46, 0x1E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x06, 0x32, 0x20, 0x10, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x46, 0x10, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x03, 0x00, 0x00, 0x11, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, + 0x94, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.txt index 7fdfc3a62..436c59923 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.txt +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.txt @@ -130,7 +130,7 @@ dcl_input_siv v[1][18].xyzw, position dcl_input v[1][19].xyzw dcl_input v[1][20].xy dcl_input v[1][20].z -dcl_temps 2 +dcl_temps 3 dcl_inputprimitive point dcl_stream m0 dcl_outputtopology trianglestrip @@ -170,58 +170,9 @@ max [precise(xy)] r0.xy, r0.xyxx, CB0[0][2].xxxx min [precise(xy)] r0.xy, r0.xyxx, CB0[0][2].yyyy mul [precise(xy)] r0.xy, r0.xyxx, CB0[0][2].zwzz mul [precise(xy)] r0.xy, r0.xyxx, v[0][18].wwww -mul [precise] r1.xyzw, r0.xyxy, l(-1.000000, 1.000000, 1.000000, -1.000000) -add [precise] r1.xyzw, r1.xyzw, v[0][18].xyxy -mov o0.xyzw, v[0][0].xyzw -mov o1.xyzw, v[0][1].xyzw -mov o2.xyzw, v[0][2].xyzw -mov o3.xyzw, v[0][3].xyzw -mov o4.xyzw, v[0][4].xyzw -mov o5.xyzw, v[0][5].xyzw -mov o6.xyzw, v[0][6].xyzw -mov o7.xyzw, v[0][7].xyzw -mov o8.xyzw, v[0][8].xyzw -mov o9.xyzw, v[0][9].xyzw -mov o10.xyzw, v[0][10].xyzw -mov o11.xyzw, v[0][11].xyzw -mov o12.xyzw, v[0][12].xyzw -mov o13.xyzw, v[0][13].xyzw -mov o14.xyzw, v[0][14].xyzw -mov o15.xyzw, v[0][15].xyzw -mov o16.xy, l(0,1.000000,0,0) -mov o16.z, v[0][16].z -mov o17.xy, v[0][17].xyxx -mov o18.xy, r1.xyxx -mov o18.zw, v[0][18].zzzw -mov o19.xyzw, v[0][19].xyzw -mov o20.xy, v[0][20].xyxx -emit_stream m0 -add [precise(zw)] r0.zw, r0.xxxy, v[0][18].xxxy -mov o0.xyzw, v[0][0].xyzw -mov o1.xyzw, v[0][1].xyzw -mov o2.xyzw, v[0][2].xyzw -mov o3.xyzw, v[0][3].xyzw -mov o4.xyzw, v[0][4].xyzw -mov o5.xyzw, v[0][5].xyzw -mov o6.xyzw, v[0][6].xyzw -mov o7.xyzw, v[0][7].xyzw -mov o8.xyzw, v[0][8].xyzw -mov o9.xyzw, v[0][9].xyzw -mov o10.xyzw, v[0][10].xyzw -mov o11.xyzw, v[0][11].xyzw -mov o12.xyzw, v[0][12].xyzw -mov o13.xyzw, v[0][13].xyzw -mov o14.xyzw, v[0][14].xyzw -mov o15.xyzw, v[0][15].xyzw -mov o16.xy, l(1.000000,1.000000,0,0) -mov o16.z, v[0][16].z -mov o17.xy, v[0][17].xyxx -mov o18.xy, r0.zwzz -mov o18.zw, v[0][18].zzzw -mov o19.xyzw, v[0][19].xyzw -mov o20.xy, v[0][20].xyxx -emit_stream m0 -add [precise(xy)] r0.xy, -r0.xyxx, v[0][18].xyxx +mov [precise(xyz)] r1.xyz, -r0.xxyx +mov [precise(w)] r1.w, r0.y +add [precise] r2.xyzw, r1.xwyz, v[0][18].xyxy mov o0.xyzw, v[0][0].xyzw mov o1.xyzw, v[0][1].xyzw mov o2.xyzw, v[0][2].xyzw @@ -241,7 +192,7 @@ mov o15.xyzw, v[0][15].xyzw mov o16.xy, l(0,0,0,0) mov o16.z, v[0][16].z mov o17.xy, v[0][17].xyxx -mov o18.xy, r0.xyxx +mov o18.xy, r2.xyxx mov o18.zw, v[0][18].zzzw mov o19.xyzw, v[0][19].xyzw mov o20.xy, v[0][20].xyxx @@ -262,14 +213,65 @@ mov o12.xyzw, v[0][12].xyzw mov o13.xyzw, v[0][13].xyzw mov o14.xyzw, v[0][14].xyzw mov o15.xyzw, v[0][15].xyzw +mov o16.xy, l(0,1.000000,0,0) +mov o16.z, v[0][16].z +mov o17.xy, v[0][17].xyxx +mov o18.xy, r2.zwzz +mov o18.zw, v[0][18].zzzw +mov o19.xyzw, v[0][19].xyzw +mov o20.xy, v[0][20].xyxx +emit_stream m0 +add [precise(yw)] r0.yw, r0.xxxy, v[0][18].xxxy +mov o0.xyzw, v[0][0].xyzw +mov o1.xyzw, v[0][1].xyzw +mov o2.xyzw, v[0][2].xyzw +mov o3.xyzw, v[0][3].xyzw +mov o4.xyzw, v[0][4].xyzw +mov o5.xyzw, v[0][5].xyzw +mov o6.xyzw, v[0][6].xyzw +mov o7.xyzw, v[0][7].xyzw +mov o8.xyzw, v[0][8].xyzw +mov o9.xyzw, v[0][9].xyzw +mov o10.xyzw, v[0][10].xyzw +mov o11.xyzw, v[0][11].xyzw +mov o12.xyzw, v[0][12].xyzw +mov o13.xyzw, v[0][13].xyzw +mov o14.xyzw, v[0][14].xyzw +mov o15.xyzw, v[0][15].xyzw mov o16.xy, l(1.000000,0,0,0) mov o16.z, v[0][16].z mov o17.xy, v[0][17].xyxx -mov o18.xy, r1.zwzz +mov o18.xy, r0.ywyy +mov o18.zw, v[0][18].zzzw +mov o19.xyzw, v[0][19].xyzw +mov o20.xy, v[0][20].xyxx +emit_stream m0 +mov [precise(z)] r0.z, r1.z +add [precise(xy)] r0.xy, r0.xzxx, v[0][18].xyxx +mov o0.xyzw, v[0][0].xyzw +mov o1.xyzw, v[0][1].xyzw +mov o2.xyzw, v[0][2].xyzw +mov o3.xyzw, v[0][3].xyzw +mov o4.xyzw, v[0][4].xyzw +mov o5.xyzw, v[0][5].xyzw +mov o6.xyzw, v[0][6].xyzw +mov o7.xyzw, v[0][7].xyzw +mov o8.xyzw, v[0][8].xyzw +mov o9.xyzw, v[0][9].xyzw +mov o10.xyzw, v[0][10].xyzw +mov o11.xyzw, v[0][11].xyzw +mov o12.xyzw, v[0][12].xyzw +mov o13.xyzw, v[0][13].xyzw +mov o14.xyzw, v[0][14].xyzw +mov o15.xyzw, v[0][15].xyzw +mov o16.xy, l(1.000000,1.000000,0,0) +mov o16.z, v[0][16].z +mov o17.xy, v[0][17].xyxx +mov o18.xy, r0.xyxx mov o18.zw, v[0][18].zzzw mov o19.xyzw, v[0][19].xyzw mov o20.xy, v[0][20].xyxx emit_stream m0 cut_stream m0 ret -// Approximately 116 instruction slots used +// Approximately 118 instruction slots used diff --git a/src/xenia/gpu/shaders/primitive_point_list.gs.hlsl b/src/xenia/gpu/shaders/primitive_point_list.gs.hlsl index f9b0e6753..33d5a5c48 100644 --- a/src/xenia/gpu/shaders/primitive_point_list.gs.hlsl +++ b/src/xenia/gpu/shaders/primitive_point_list.gs.hlsl @@ -26,19 +26,22 @@ void main(point XeVertexPreGS xe_in[1], clamp(point_size, xe_point_size_min_max.xx, xe_point_size_min_max.yy) * xe_point_screen_to_ndc * xe_in[0].post_gs.position.w; - xe_out.point_params.xy = float2(0.0, 1.0); - xe_out.position.xy = - xe_in[0].post_gs.position.xy + float2(-1.0, 1.0) * point_size; - xe_stream.Append(xe_out); - xe_out.point_params.xy = float2(1.0, 1.0); - xe_out.position.xy = xe_in[0].post_gs.position.xy + point_size; - xe_stream.Append(xe_out); xe_out.point_params.xy = float2(0.0, 0.0); + // TODO(Triang3l): On Vulkan, sign of Y needs to inverted because of + // upper-left origin. + // TODO(Triang3l): Investigate the true signs of point sprites. + xe_out.position.xy = + xe_in[0].post_gs.position.xy + float2(-point_size.x, point_size.y); + xe_stream.Append(xe_out); + xe_out.point_params.xy = float2(0.0, 1.0); xe_out.position.xy = xe_in[0].post_gs.position.xy - point_size; xe_stream.Append(xe_out); xe_out.point_params.xy = float2(1.0, 0.0); + xe_out.position.xy = xe_in[0].post_gs.position.xy + point_size; + xe_stream.Append(xe_out); + xe_out.point_params.xy = float2(1.0, 1.0); xe_out.position.xy = - xe_in[0].post_gs.position.xy + float2(1.0, -1.0) * point_size; + xe_in[0].post_gs.position.xy + float2(point_size.x, -point_size.y); xe_stream.Append(xe_out); xe_stream.RestartStrip(); } diff --git a/src/xenia/kernel/kernel_state.cc b/src/xenia/kernel/kernel_state.cc index 570342646..8884d8efa 100644 --- a/src/xenia/kernel/kernel_state.cc +++ b/src/xenia/kernel/kernel_state.cc @@ -245,7 +245,7 @@ object_ref KernelState::LaunchModule(object_ref module) { module->entry_point(), 0, X_CREATE_SUSPENDED, true, true)); // We know this is the 'main thread'. - thread->set_name(fmt::format("Main XThread{:08X}", thread->handle())); + thread->set_name("Main XThread"); X_STATUS result = thread->Create(); if (XFAILED(result)) { @@ -340,7 +340,7 @@ void KernelState::SetExecutableModule(object_ref module) { } return 0; })); - dispatch_thread_->set_name("Kernel Dispatch Thread"); + dispatch_thread_->set_name("Kernel Dispatch"); dispatch_thread_->Create(); } } diff --git a/src/xenia/kernel/xam/xam_content.cc b/src/xenia/kernel/xam/xam_content.cc index 7dd874b7b..0e74c4dd8 100644 --- a/src/xenia/kernel/xam/xam_content.cc +++ b/src/xenia/kernel/xam/xam_content.cc @@ -8,6 +8,7 @@ */ #include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xam/xam_private.h" @@ -45,14 +46,12 @@ struct DeviceInfo { // they incorrectly only look at the lower 32-bits of free_bytes, // when it is a 64-bit value. Which means any size above ~4GB // will not be recognized properly. -// -// NOTE(randprint): you can use 120 GB and 42 GB 'fullness' -// with the proper deviceID feel free to change at your discression #define ONE_GB (1024ull * 1024ull * 1024ull) static const DeviceInfo dummy_device_info_ = { - 0x00000001, 1, // found from debugging / reversing UE3 engine titles - 4ull * ONE_GB, // 4GB - 3ull * ONE_GB, // 3GB, so it looks a little used. + 0x00000001, // id + 1, // 1=HDD + 20ull * ONE_GB, // 20GB + 3ull * ONE_GB, // 3GB, so it looks a little used. u"Dummy HDD", }; #undef ONE_GB @@ -117,7 +116,7 @@ DECLARE_XAM_EXPORT1(XamContentGetDeviceState, kContent, kStub); typedef struct { xe::be device_id; - xe::be unknown; + xe::be device_type; xe::be total_bytes; xe::be free_bytes; xe::be name[28]; @@ -134,7 +133,7 @@ dword_result_t XamContentGetDeviceData( device_data.Zero(); const auto& device_info = dummy_device_info_; device_data->device_id = device_info.device_id; - device_data->unknown = device_id & 0xFFFF; // Fake it. + device_data->device_type = device_info.device_type; device_data->total_bytes = device_info.total_bytes; device_data->free_bytes = device_info.free_bytes; xe::store_and_swap(&device_data->name[0], device_info.name); @@ -223,7 +222,8 @@ dword_result_t XamContentCreateDeviceEnumerator(dword_t content_type, xe::store_and_swap(&dev->device_type, dummy_device_info_.device_type); xe::store_and_swap(&dev->total_bytes, dummy_device_info_.total_bytes); xe::store_and_swap(&dev->free_bytes, dummy_device_info_.free_bytes); - xe::copy_and_swap(dev->name, dummy_device_info_.name, 28); + xe::copy_and_swap(dev->name, dummy_device_info_.name, + xe::countof(dev->name)); } *handle_out = e->handle(); diff --git a/src/xenia/kernel/xam/xam_info.cc b/src/xenia/kernel/xam/xam_info.cc index 0589e83a1..a08ab60aa 100644 --- a/src/xenia/kernel/xam/xam_info.cc +++ b/src/xenia/kernel/xam/xam_info.cc @@ -8,6 +8,7 @@ */ #include "xenia/base/logging.h" +#include "xenia/base/string_util.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/user_module.h" #include "xenia/kernel/util/shim_utils.h" @@ -74,15 +75,15 @@ static SYSTEMTIME xeGetLocalSystemTime(uint64_t filetime) { void XamFormatDateString(dword_t unk, qword_t filetime, lpvoid_t output_buffer, dword_t output_count) { - std::memset(output_buffer, 0, output_count * 2); + std::memset(output_buffer, 0, output_count * sizeof(char16_t)); // TODO: implement this for other platforms #if XE_PLATFORM_WIN32 auto st = xeGetLocalSystemTime(filetime); // TODO: format this depending on users locale? auto str = fmt::format(u"{:02d}/{:02d}/{}", st.wMonth, st.wDay, st.wYear); - auto copy_length = std::min(size_t(output_count), str.size()) * 2; - xe::copy_and_swap(output_buffer.as(), str.c_str(), copy_length); + xe::string_util::copy_and_swap_truncating(output_buffer.as(), str, + output_count); #else assert_always(); #endif @@ -91,15 +92,15 @@ DECLARE_XAM_EXPORT1(XamFormatDateString, kNone, kImplemented); void XamFormatTimeString(dword_t unk, qword_t filetime, lpvoid_t output_buffer, dword_t output_count) { - std::memset(output_buffer, 0, output_count * 2); + std::memset(output_buffer, 0, output_count * sizeof(char16_t)); // TODO: implement this for other platforms #if XE_PLATFORM_WIN32 auto st = xeGetLocalSystemTime(filetime); // TODO: format this depending on users locale? auto str = fmt::format(u"{:02d}:{:02d}", st.wHour, st.wMinute); - auto copy_count = std::min(size_t(output_count), str.size()); - xe::copy_and_swap(output_buffer.as(), str.c_str(), copy_count); + xe::string_util::copy_and_swap_truncating(output_buffer.as(), str, + output_count); #else assert_always(); #endif @@ -113,7 +114,7 @@ dword_result_t keXamBuildResourceLocator(uint64_t module, uint32_t buffer_count) { std::u16string path; if (!module) { - path = fmt::format(u"file://media:/{0}.xzp#{0}", container, resource); + path = fmt::format(u"file://media:/{}.xzp#{}", container, resource); XELOGD( "XamBuildResourceLocator({0}) returning locator to local file {0}.xzp", xe::to_utf8(container)); @@ -121,8 +122,8 @@ dword_result_t keXamBuildResourceLocator(uint64_t module, path = fmt::format(u"section://{:X},{}#{}", (uint32_t)module, container, resource); } - auto copy_count = std::min(size_t(buffer_count), path.size()); - xe::copy_and_swap(buffer_ptr.as(), path.c_str(), copy_count); + xe::string_util::copy_and_swap_truncating(buffer_ptr.as(), path, + buffer_count); return 0; } diff --git a/src/xenia/kernel/xam/xam_ui.cc b/src/xenia/kernel/xam/xam_ui.cc index 4e5f077aa..4f1348a69 100644 --- a/src/xenia/kernel/xam/xam_ui.cc +++ b/src/xenia/kernel/xam/xam_ui.cc @@ -9,6 +9,7 @@ #include "third_party/imgui/imgui.h" #include "xenia/base/logging.h" +#include "xenia/base/string_util.h" #include "xenia/emulator.h" #include "xenia/kernel/kernel_flags.h" #include "xenia/kernel/kernel_state.h" @@ -188,8 +189,8 @@ class KeyboardInputDialog : public xe::ui::ImGuiDialog { *out_text_ = default_text; } text_buffer_.resize(max_length); - std::strncpy(text_buffer_.data(), default_text_.c_str(), - std::min(text_buffer_.size() - 1, default_text_.size())); + xe::string_util::copy_truncating(text_buffer_.data(), default_text_, + text_buffer_.size()); } void OnDraw(ImGuiIO& io) override { diff --git a/src/xenia/kernel/xam/xam_user.cc b/src/xenia/kernel/xam/xam_user.cc index 02dda8d2e..9cc2f1dce 100644 --- a/src/xenia/kernel/xam/xam_user.cc +++ b/src/xenia/kernel/xam/xam_user.cc @@ -10,6 +10,8 @@ #include #include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/string_util.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xam/xam_private.h" @@ -91,7 +93,8 @@ X_HRESULT_result_t XamUserGetSigninInfo(dword_t user_index, dword_t flags, const auto& user_profile = kernel_state()->user_profile(); info->xuid = user_profile->xuid(); info->signin_state = user_profile->signin_state(); - std::strncpy(info->name, user_profile->name().data(), 15); + xe::string_util::copy_truncating(info->name, user_profile->name(), + xe::countof(info->name)); return X_E_SUCCESS; } DECLARE_XAM_EXPORT1(XamUserGetSigninInfo, kUserProfiles, kImplemented); @@ -110,10 +113,8 @@ dword_result_t XamUserGetName(dword_t user_index, lpstring_t buffer, const auto& user_name = user_profile->name(); // Real XAM will only copy a maximum of 15 characters out. - size_t copy_length = std::min( - {size_t(15), user_name.size(), static_cast(buffer_len) - 1}); - std::memcpy(buffer, user_name.data(), copy_length); - buffer[copy_length] = '\0'; + xe::string_util::copy_truncating(buffer, user_name, + std::min(buffer_len.value(), uint32_t(15))); return X_ERROR_SUCCESS; } DECLARE_XAM_EXPORT1(XamUserGetName, kUserProfiles, kImplemented); diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc index a9aeb36bd..814120140 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc @@ -1,4 +1,4 @@ -/** +/** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** @@ -41,10 +41,23 @@ struct CreateOptions { static bool IsValidPath(const std::string_view s, bool is_pattern) { // TODO(gibbed): validate path components individually + bool got_asterisk = false; for (const auto& c : s) { if (c <= 31 || c >= 127) { return false; } + if (got_asterisk) { + // * must be followed by a . (*.) + // + // Viva Piñata: Party Animals (4D530819) has a bug in its game code where + // it attempts to FindFirstFile() with filters of "Game:\\*_X3.rkv", + // "Game:\\m*_X3.rkv", and "Game:\\w*_X3.rkv" and will infinite loop if + // the path filter is allowed. + if (c != '.') { + return false; + } + got_asterisk = false; + } switch (c) { case '"': // case '*': @@ -59,12 +72,20 @@ static bool IsValidPath(const std::string_view s, bool is_pattern) { case '|': { return false; } - case '*': + case '*': { + // Pattern-specific (for NtQueryDirectoryFile) + if (!is_pattern) { + return false; + } + got_asterisk = true; + break; + } case '?': { // Pattern-specific (for NtQueryDirectoryFile) if (!is_pattern) { return false; } + break; } default: { break; @@ -425,7 +446,7 @@ dword_result_t NtQueryDirectoryFile( // Enforce that the path is ASCII. if (!IsValidPath(name, true)) { - return X_STATUS_OBJECT_NAME_INVALID; + return X_STATUS_INVALID_PARAMETER; } if (file) { diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc index 20aef4564..b2aafe294 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc @@ -156,12 +156,15 @@ XboxkrnlModule::XboxkrnlModule(Emulator* emulator, KernelState* kernel_state) // // aomega08 says the value is 0x02000817, bit 27: debug mode on. // When that is set, though, allocs crash in weird ways. + // + // From kernel dissasembly, after storage is initialized + // XboxHardwareInfo flags is set with flag 5 (0x20). uint32_t pXboxHardwareInfo = memory_->SystemHeapAlloc(16); auto lpXboxHardwareInfo = memory_->TranslateVirtual(pXboxHardwareInfo); export_resolver_->SetVariableMapping( "xboxkrnl.exe", ordinals::XboxHardwareInfo, pXboxHardwareInfo); - xe::store_and_swap(lpXboxHardwareInfo + 0, 0); // flags - xe::store_and_swap(lpXboxHardwareInfo + 4, 0x06); // cpu count + xe::store_and_swap(lpXboxHardwareInfo + 0, 0x20); // flags + xe::store_and_swap(lpXboxHardwareInfo + 4, 0x06); // cpu count // Remaining 11b are zeroes? // ExConsoleGameRegion, probably same values as keyvault region uses? diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 1e723ff65..34a95dd7c 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -370,10 +370,6 @@ X_STATUS XThread::Create() { pcr->dpc_active = 0; // DPC active bool? - // Assign the thread to the logical processor, and also set up the current CPU - // in KPCR and KTHREAD. - SetActiveCpu(cpu_index); - // Always retain when starting - the thread owns itself until exited. RetainHandle(); @@ -434,6 +430,10 @@ X_STATUS XThread::Create() { thread_->set_priority(creation_params_.creation_flags & 0x20 ? 1 : 0); } + // Assign the newly created thread to the logical processor, and also set up + // the current CPU in KPCR and KTHREAD. + SetActiveCpu(cpu_index); + // Notify processor of our creation. emulator()->processor()->OnThreadCreated(handle(), thread_state_, this); @@ -728,11 +728,12 @@ void XThread::SetActiveCpu(uint8_t cpu_index) { thread_object.current_cpu = cpu_index; } - if (xe::threading::logical_processor_count() < 6) { - XELOGW("Too few processors - scheduling will be wonky"); - } - if (!cvars::ignore_thread_affinities) { - thread_->set_affinity_mask(uint64_t(1) << cpu_index); + if (xe::threading::logical_processor_count() >= 6) { + if (!cvars::ignore_thread_affinities) { + thread_->set_affinity_mask(uint64_t(1) << cpu_index); + } + } else { + XELOGW("Too few processor cores - scheduling will be wonky"); } }