diff --git a/.ci/build-mac.sh b/.ci/build-mac.sh index f386866264..9e16fce4ec 100755 --- a/.ci/build-mac.sh +++ b/.ci/build-mac.sh @@ -13,15 +13,16 @@ export HOMEBREW_NO_AUTO_UPDATE=1 export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 export HOMEBREW_NO_ENV_HINTS=1 export HOMEBREW_NO_INSTALL_CLEANUP=1 +brew update brew install -f --overwrite --quiet ccache "llvm@$LLVM_COMPILER_VER" brew link -f --overwrite --quiet "llvm@$LLVM_COMPILER_VER" if [ "$AARCH64" -eq 1 ]; then - brew install -f --overwrite --quiet googletest opencv@4 sdl3 vulkan-headers vulkan-loader molten-vk - brew unlink --quiet ffmpeg fmt qtbase qtsvg qtdeclarative + brew install -f --overwrite --quiet googletest opencv@4 sdl3 vulkan-headers vulkan-loader molten-vk + brew unlink --quiet ffmpeg fmt qtbase qtsvg qtdeclarative protobuf || true else arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" arch -x86_64 /usr/local/bin/brew install -f --overwrite --quiet python@3.14 opencv@4 "llvm@$LLVM_COMPILER_VER" sdl3 vulkan-headers vulkan-loader molten-vk - arch -x86_64 /usr/local/bin/brew unlink --quiet ffmpeg qtbase qtsvg qtdeclarative + arch -x86_64 /usr/local/bin/brew unlink --quiet ffmpeg qtbase qtsvg qtdeclarative protobuf || true fi export CXX=clang++ @@ -122,7 +123,6 @@ cmake .. \ -DUSE_SYSTEM_MVK=ON \ -DUSE_SYSTEM_SDL=ON \ -DUSE_SYSTEM_OPENCV=ON \ - -DUSE_SYSTEM_PROTOBUF=ON \ -G Ninja fi diff --git a/.ci/deploy-mac.sh b/.ci/deploy-mac.sh index 70db614b59..819e0725bc 100755 --- a/.ci/deploy-mac.sh +++ b/.ci/deploy-mac.sh @@ -21,7 +21,7 @@ rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \ "rpcs3.app/Contents/Frameworks/QtVirtualKeyboard.framework" \ "rpcs3.app/Contents/Plugins/platforminputcontexts" \ "rpcs3.app/Contents/Plugins/virtualkeyboard" \ -"rpcs3.app/Contents/Resources/git" +"rpcs3.app/Contents/Resources/git" || true ../../.ci/optimize-mac.sh rpcs3.app @@ -49,7 +49,7 @@ QT_TRANS="$WORKDIR/qt-downloader/$QT_VER/clang_64/translations" cp $QT_TRANS/qt_*.qm rpcs3.app/Contents/translations cp $QT_TRANS/qtbase_*.qm rpcs3.app/Contents/translations cp $QT_TRANS/qtmultimedia_*.qm rpcs3.app/Contents/translations -rm -f rpcs3.app/Contents/translations/qt_help_*.qm +rm -f rpcs3.app/Contents/translations/qt_help_*.qm || true # Need to do this rename hack due to case insensitive filesystem mv rpcs3.app RPCS3_.app diff --git a/.ci/setup-windows.sh b/.ci/setup-windows.sh index cee6d624de..d8016d8c13 100755 --- a/.ci/setup-windows.sh +++ b/.ci/setup-windows.sh @@ -6,7 +6,7 @@ QT_HOST="http://qt.mirror.constant.com/" QT_URL_VER=$(echo "$QT_VER" | sed "s/\.//g") QT_VER_MSVC_UP=$(echo "${QT_VER_MSVC}" | tr '[:lower:]' '[:upper:]') -QT_PREFIX="online/qtsdkrepository/windows_x86/desktop/qt${QT_VER_MAIN}_${QT_URL_VER}/qt${QT_VER_MAIN}_${QT_URL_VER}/qt.qt${QT_VER_MAIN}.${QT_URL_VER}." +QT_PREFIX="online/qtsdkrepository/windows_x86/desktop/qt${QT_VER_MAIN}_${QT_URL_VER}/qt${QT_VER_MAIN}_${QT_URL_VER}_${QT_VER_MSVC}_64/qt.qt${QT_VER_MAIN}.${QT_URL_VER}." QT_PREFIX_2="win64_${QT_VER_MSVC}_64/${QT_VER}-0-${QT_DATE}" QT_SUFFIX="-Windows-Windows_11_24H2-${QT_VER_MSVC_UP}-Windows-Windows_11_24H2-X86_64.7z" QT_BASE_URL="${QT_HOST}${QT_PREFIX}${QT_PREFIX_2}qtbase${QT_SUFFIX}" diff --git a/.github/workflows/rpcs3.yml b/.github/workflows/rpcs3.yml index 740e844d04..f474643e09 100644 --- a/.github/workflows/rpcs3.yml +++ b/.github/workflows/rpcs3.yml @@ -30,23 +30,23 @@ jobs: matrix: include: - os: ubuntu-24.04 - docker_img: "rpcs3/rpcs3-ci-jammy:1.9" + docker_img: "rpcs3/rpcs3-ci-jammy:1.10" build_sh: "/rpcs3/.ci/build-linux.sh" compiler: clang UPLOAD_COMMIT_HASH: d812f1254a1157c80fd402f94446310560f54e5f UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-linux" - os: ubuntu-24.04 - docker_img: "rpcs3/rpcs3-ci-jammy:1.9" + docker_img: "rpcs3/rpcs3-ci-jammy:1.10" build_sh: "/rpcs3/.ci/build-linux.sh" compiler: gcc - os: ubuntu-24.04-arm - docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.9" + docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.10" build_sh: "/rpcs3/.ci/build-linux-aarch64.sh" compiler: clang UPLOAD_COMMIT_HASH: a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1 UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-linux-arm64" - os: ubuntu-24.04-arm - docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.9" + docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.10" build_sh: "/rpcs3/.ci/build-linux-aarch64.sh" compiler: gcc name: RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} @@ -134,7 +134,7 @@ jobs: runs-on: macos-14 env: CCACHE_DIR: /tmp/ccache_dir - QT_VER: '6.10.2' + QT_VER: '6.11.0' QT_VER_MAIN: '6' LLVM_COMPILER_VER: '21' RELEASE_MESSAGE: ../GitHubReleaseMessage.txt @@ -213,9 +213,9 @@ jobs: env: COMPILER: msvc QT_VER_MAIN: '6' - QT_VER: '6.10.2' + QT_VER: '6.11.0' QT_VER_MSVC: 'msvc2022' - QT_DATE: '202601261212' + QT_DATE: '202603180535' LLVM_VER: '19.1.7' VULKAN_VER: '1.3.268.0' VULKAN_SDK_SHA: '8459ef49bd06b697115ddd3d97c9aec729e849cd775f5be70897718a9b3b9db5' diff --git a/.gitignore b/.gitignore index 4688d5fa52..a3911be3a0 100644 --- a/.gitignore +++ b/.gitignore @@ -69,6 +69,9 @@ CMakeSettings.json *PVS-Studio* PVS/* +# Zed Editor files +.zed/* + # Ignore other system generated files x64/* rpcs3/x64/* diff --git a/3rdparty/curl/CMakeLists.txt b/3rdparty/curl/CMakeLists.txt index b20763af65..2b725169f9 100644 --- a/3rdparty/curl/CMakeLists.txt +++ b/3rdparty/curl/CMakeLists.txt @@ -18,9 +18,6 @@ else() set(USE_LIBIDN2 OFF CACHE BOOL "Use libidn2 for IDN support") # Disabled because MacOS CI doesn't work otherwise set(CURL_CA_PATH "none" CACHE STRING "Location of default CA path. Set 'none' to disable or 'auto' for auto-detection. Defaults to 'auto'.") option(CURL_DISABLE_INSTALL "Disable installation targets" ON) - if(USE_MSVC_STATIC_CRT) - set(CURL_STATIC_CRT ON CACHE BOOL "Use static crt to build curl") - endif() if(WIN32) set(ENABLE_UNICODE ON CACHE BOOL "enable Unicode") endif() diff --git a/3rdparty/libpng/libpng b/3rdparty/libpng/libpng index c3e304954a..d5515b5b8b 160000 --- a/3rdparty/libpng/libpng +++ b/3rdparty/libpng/libpng @@ -1 +1 @@ -Subproject commit c3e304954a9cfd154bc0dfbfea2b01cd61d6546d +Subproject commit d5515b5b8be3901aac04e5bd8bd5c89f287bcd33 diff --git a/3rdparty/protobuf/CMakeLists.txt b/3rdparty/protobuf/CMakeLists.txt index f8e6bed59b..e1f82f146f 100644 --- a/3rdparty/protobuf/CMakeLists.txt +++ b/3rdparty/protobuf/CMakeLists.txt @@ -20,6 +20,8 @@ else() option(protobuf_DISABLE_RTTI "Remove runtime type information in the binaries" OFF) option(protobuf_FORCE_FETCH_DEPENDENCIES "Force all dependencies to be downloaded from GitHub. Local installations will be ignored." OFF) option(protobuf_LOCAL_DEPENDENCIES_ONLY "Prevent downloading any dependencies from GitHub. If this option is set, the dependency must be available locally as an installed package." OFF) + option(protobuf_BUILD_SHARED_LIBS "Build Shared Libraries" OFF) + option(protobuf_MSVC_STATIC_RUNTIME "Link static runtime libraries" OFF) add_subdirectory(protobuf EXCLUDE_FROM_ALL) target_include_directories(3rdparty_protobuf SYSTEM INTERFACE protobuf/src) diff --git a/3rdparty/wolfssl/wolfssl b/3rdparty/wolfssl/wolfssl index b077c81eb6..922d04b356 160000 --- a/3rdparty/wolfssl/wolfssl +++ b/3rdparty/wolfssl/wolfssl @@ -1 +1 @@ -Subproject commit b077c81eb635392e694ccedbab8b644297ec0285 +Subproject commit 922d04b3568c6428a9fb905ddee3ef5a68db3108 diff --git a/3rdparty/zlib/CMakeLists.txt b/3rdparty/zlib/CMakeLists.txt index 55d7353acf..47645d290c 100644 --- a/3rdparty/zlib/CMakeLists.txt +++ b/3rdparty/zlib/CMakeLists.txt @@ -6,7 +6,8 @@ if (USE_SYSTEM_ZLIB) target_link_libraries(3rdparty_zlib INTERFACE ZLIB::ZLIB) target_compile_definitions(3rdparty_zlib INTERFACE -DZLIB_CONST=1) else() - option(ZLIB_BUILD_EXAMPLES "Enable Zlib Examples" OFF) + option(ZLIB_BUILD_TESTING "Enable Zlib Examples as tests" OFF) + option(ZLIB_BUILD_SHARED "Enable building zlib shared library" OFF) message(STATUS "RPCS3: Using builtin ZLIB") set(SKIP_INSTALL_ALL ON) add_subdirectory(zlib EXCLUDE_FROM_ALL) diff --git a/BUILDING.md b/BUILDING.md index 597621e810..c1774908fd 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -20,26 +20,26 @@ The following tools are required to build RPCS3 on Windows 10 or later: with standalone **CMake** tool. - [Python 3.6+](https://www.python.org/downloads/) (add to PATH) -- [Qt 6.10.2](https://www.qt.io/download-qt-installer) In case you can't download from the official installer, you can use [Another Qt installer](https://github.com/miurahr/aqtinstall) (In that case you will need to manually add the "qtmultimedia" module when installing Qt) +- [Qt 6.11.0](https://www.qt.io/download-qt-installer) In case you can't download from the official installer, you can use [Another Qt installer](https://github.com/miurahr/aqtinstall) (In that case you will need to manually add the "qtmultimedia" module when installing Qt) - [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (see "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/windows/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0. The `sln` solution available only on **Visual Studio** is the preferred building solution. It easily allows to build the **RPCS3** application in `Release` and `Debug` mode. In order to build **RPCS3** with the `sln` solution (with **Visual Studio**), **Qt** libs need to be detected. To detect the libs: -- add and set the `QTDIR` environment variable, e.g. `\6.10.2\msvc2022_64\` +- add and set the `QTDIR` environment variable, e.g. `\6.11.0\msvc2022_64\` - or use the [Visual Studio Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.QtVisualStudioTools2022) **NOTE:** If you have issues with the **Visual Studio Qt Plugin**, you may want to uninstall it and install the [Legacy Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.LEGACYQtVisualStudioTools2022) instead. In order to build **RPCS3** with the `CMake` solution (with both **Visual Studio** and standalone **CMake** tool): -- add and set the `Qt6_ROOT` environment variable to the **Qt** libs path, e.g. `\6.10.2\msvc2022_64\` +- add and set the `Qt6_ROOT` environment variable to the **Qt** libs path, e.g. `\6.11.0\msvc2022_64\` ### Linux These are the essentials tools to build RPCS3 on Linux. Some of them can be installed through your favorite package manager: - Clang 17+ or GCC 13+ - [CMake 3.28.0+](https://www.cmake.org/download/) -- [Qt 6.10.2](https://www.qt.io/download-qt-installer) +- [Qt 6.11.0](https://www.qt.io/download-qt-installer) - [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0. - [SDL3](https://github.com/libsdl-org/SDL/releases) (for the FAudio backend) @@ -123,7 +123,7 @@ Start **Visual Studio**, click on `Open a project or solution` and select the `r ##### Configuring the Qt Plugin (if used) 1) go to `Extensions->Qt VS Tools->Qt Versions` -2) add the path to your Qt installation with compiler e.g. `\6.10.2\msvc2022_64`, version will fill in automatically +2) add the path to your Qt installation with compiler e.g. `\6.11.0\msvc2022_64`, version will fill in automatically 3) go to `Extensions->Qt VS Tools->Options->Legacy Project Format`. (Only available in the **Legacy Qt Plugin**) 4) set `Build: Run pre-build setup` to `true`. (Only available in the **Legacy Qt Plugin**) diff --git a/CMakeLists.txt b/CMakeLists.txt index e5b77091cb..217c40f341 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,50 +86,7 @@ if(CMAKE_BUILD_TYPE MATCHES "Debug" AND NOT MSVC) endif() if(MSVC) - option(USE_MSVC_STATIC_CRT "Use static MSVC C runtime" OFF) - - # TODO(cjj19970505@live.cn) - # DiscordRPC binary in 3rdparty is compiled /MT - # So theoretically we should enable DiscordRPC in Release and static CRT build - # since we might encounter some rumtime issues when more than one CRT version are presented. - # https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=msvc-160#what-problems-exist-if-an-application-uses-more-than-one-crt-version - # Add other DiscordRPC binaries(compiled with /MTd, /MD, /MDd) or compile it from source may address this issue. - if(NOT IS_MULTI_CONFIG) - if(NOT(CMAKE_BUILD_TYPE MATCHES "Release" AND USE_MSVC_STATIC_CRT)) - set(USE_DISCORD_RPC OFF CACHE BOOL "Discord RPC is only available in Release and static CRT build." FORCE) - endif() - endif() - - if(USE_MSVC_STATIC_CRT) - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") - else() - # though doc ( https://cmake.org/cmake/help/latest/variable/CMAKE_MSVC_RUNTIME_LIBRARY.html ) - # says if that property is not set then CMake uses the default value MultiThreaded$<$:Debug>DLL - # to select a MSVC runtime library. - # But yaml-cpp set /MT(d) if CMAKE_MSVC_RUNTIME_LIBRARY is undefined - # So we have to define it explicitly - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") - endif() - - # TODO(cjj19970505@live.cn) - # offical QT uses dynamic CRT. - # When building our lib with static CRT and debug build type - # and linking with Qt with dynamic CRT and debug build, - # error is encountered in runtime (which is expected). - # But building our lib with static CRT and release build type, - # and linking with Qt with dynamic CRT and release build seems to be working, - # which is the same config with VS solution. - # (though technically it might still have some hidden errors). - # So we allow static CRT in both relase and debug build, but prompt warning in debug build. - # For more info: - # https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=msvc-160#what-problems-exist-if-an-application-uses-more-than-one-crt-version - # https://wiki.qt.io/Technical_FAQ#Why_does_a_statically_built_Qt_use_the_dynamic_Visual_Studio_runtime_libraries_.3F_Do_I_need_to_deploy_those_with_my_application_.3F - if(USE_MSVC_STATIC_CRT) - if(IS_MULTI_CONFIG OR CMAKE_BUILD_TYPE MATCHES "Debug") - message(AUTHOR_WARNING "Debug build currently can not work with static CRT.") - endif() - endif() - add_compile_options(/MP) + add_compile_options("$<$:/MP>") endif() if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8) @@ -142,7 +99,7 @@ if(APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64") endif() if(MSVC) - add_compile_options(/wd4530 /utf-8) # C++ exception handler used, but unwind semantics are not enabled + add_compile_options("$<$:/wd4530;/utf-8>") # C++ exception handler used, but unwind semantics are not enabled endif() add_subdirectory(3rdparty) @@ -161,10 +118,6 @@ if (NOT FOUND_LTO EQUAL -1) message(FATAL_ERROR "RPCS3 doesn't support building with LTO, use -DDISABLE_LTO=TRUE to force-disable it") endif() -if(NOT WIN32) - add_compile_options(-pthread) -endif() - ## Look for Gamemode if its installed on Linux if(LINUX) ## User chooses whether to Enable GameMode features or not diff --git a/Utilities/Config.h b/Utilities/Config.h index ca9af028a5..4c79cbf31c 100644 --- a/Utilities/Config.h +++ b/Utilities/Config.h @@ -393,7 +393,7 @@ namespace cfg void set(const s64& value) { - ensure(value >= Min && value <= Max); + if (value < Min || value > Max) fmt::throw_exception("'%s': value %d out of bounds (min=%d, max=%d)", m_name, value, Min, Max); m_value = static_cast(value); } @@ -484,7 +484,7 @@ namespace cfg void set(const f64& value) { - ensure(value >= Min && value <= Max); + if (value < Min || value > Max) fmt::throw_exception("'%s': value %d out of bounds (min=%d, max=%d)", m_name, value, Min, Max); m_value = static_cast(value); } @@ -571,7 +571,7 @@ namespace cfg void set(const u64& value) { - ensure(value >= Min && value <= Max); + if (value < Min || value > Max) fmt::throw_exception("'%s': value %d out of bounds (min=%d, max=%d)", m_name, value, Min, Max); m_value = static_cast(value); } diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 6395c32505..b22c1aeb52 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -107,7 +107,7 @@ thread_local u64 g_tls_fault_rsx = 0; thread_local u64 g_tls_fault_spu = 0; thread_local u64 g_tls_wait_time = 0; thread_local u64 g_tls_wait_fail = 0; -thread_local bool g_tls_access_violation_recovered = false; +thread_local u64 g_tls_access_violation_recovered = umax; extern thread_local std::string(*g_tls_log_prefix)(); namespace stx @@ -1269,7 +1269,7 @@ namespace rsx extern std::function g_access_violation_handler; } -bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noexcept +bool handle_access_violation(u32 addr, bool is_writing, bool is_exec, ucontext_t* context) noexcept { g_tls_fault_all++; @@ -1305,7 +1305,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe } } spu_protection{cpu}; - if (addr < RAW_SPU_BASE_ADDR && vm::check_addr(addr) && rsx::g_access_violation_handler) + if (!is_exec && addr < RAW_SPU_BASE_ADDR && vm::check_addr(addr) && rsx::g_access_violation_handler) { bool state_changed = false; @@ -1371,7 +1371,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe { auto thread = idm::get_unlocked>(spu_thread::find_raw_spu((addr - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET)); - if (!thread) + if (!thread || is_exec) { break; } @@ -1503,7 +1503,9 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe static_cast(context); #endif /* ARCH_ */ - if (vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable)) + const auto required_page_perms = (is_writing ? vm::page_writable : vm::page_readable) + (is_exec ? vm::page_executable : 0); + + if (vm::check_addr(addr, required_page_perms)) { return true; } @@ -1511,9 +1513,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe // Hack: allocate memory in case the emulator is stopping const auto hack_alloc = [&]() { - g_tls_access_violation_recovered = true; - - if (vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable)) + if (vm::check_addr(addr, required_page_perms)) { return true; } @@ -1525,17 +1525,45 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe return false; } + extern void ppu_register_range(u32 addr, u32 size); + + bool reprotected = false; + if (vm::writer_lock mlock; area->flags & vm::preallocated || vm::check_addr(addr, 0)) { // For allocated memory with protection lower than required (such as protection::no or read-only while writing to it) utils::memory_protect(vm::base(addr & -0x1000), 0x1000, utils::protection::rw); + reprotected = true; + } + + if (reprotected) + { + if (is_exec && !vm::check_addr(addr, vm::page_executable)) + { + ppu_register_range(addr & -0x10000, 0x10000); + } + + g_tls_access_violation_recovered = addr; return true; } - return area->falloc(addr & -0x10000, 0x10000) || vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable); + const bool allocated = area->falloc(addr & -0x10000, 0x10000); + + if (allocated) + { + if (is_exec && !vm::check_addr(addr, vm::page_executable)) + { + ppu_register_range(addr & -0x10000, 0x10000); + } + + g_tls_access_violation_recovered = addr; + return true; + } + + return false; }; - if (cpu && (cpu->get_class() == thread_class::ppu || cpu->get_class() == thread_class::spu)) + if (cpu && (cpu->get_class() == thread_class::ppu || cpu->get_class() == thread_class::spu) && !is_exec) { vm::temporary_unlock(*cpu); u32 pf_port_id = 0; @@ -1678,7 +1706,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe if (cpu->get_class() == thread_class::spu) { - if (!g_tls_access_violation_recovered) + if (g_tls_access_violation_recovered != addr) { vm_log.notice("\n%s", dump_useful_thread_info()); vm_log.always()("[%s] Access violation %s location 0x%x (%s)", cpu->get_name(), is_writing ? "writing" : "reading", addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory"); @@ -1714,10 +1742,10 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe // Note: a thread may access violate more than once after hack_alloc recovery // Do not log any further access violations in this case. - if (!g_tls_access_violation_recovered) + if (g_tls_access_violation_recovered != addr) { vm_log.notice("\n%s", dump_useful_thread_info()); - vm_log.fatal("Access violation %s location 0x%x (%s)", is_writing ? "writing" : (cpu && cpu->get_class() == thread_class::ppu && cpu->get_pc() == addr ? "executing" : "reading"), addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory"); + vm_log.fatal("Access violation %s location 0x%x (%s)", is_writing ? "writing" : (is_exec ? "executing" : "reading"), addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory"); } while (Emu.IsPausedOrReady()) @@ -1766,8 +1794,13 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe } } - if (Emu.IsStopped() && !hack_alloc()) + if (Emu.IsStopped()) { + while (!hack_alloc()) + { + thread_ctrl::wait_for(1000); + } + return false; } @@ -1806,6 +1839,7 @@ static LONG exception_handler(PEXCEPTION_POINTERS pExp) noexcept if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && !is_executing) { u32 addr = 0; + bool is_exec = false; if (auto [addr0, ok] = vm::try_get_addr(ptr); ok) { @@ -1813,14 +1847,21 @@ static LONG exception_handler(PEXCEPTION_POINTERS pExp) noexcept } else if (const usz exec64 = (ptr - vm::g_exec_addr) / 2; exec64 <= u32{umax}) { + is_exec = true; addr = static_cast(exec64); } - else + else if (const usz exec64 = (ptr - vm::g_exec_addr - vm::g_exec_addr_seg_offset); exec64 <= u32{umax}) { + is_exec = true; + addr = static_cast(exec64); + } + else + { + std::this_thread::sleep_for(1ms); return EXCEPTION_CONTINUE_SEARCH; } - if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, pExp->ContextRecord)) + if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, is_exec, pExp->ContextRecord)) { return EXCEPTION_CONTINUE_EXECUTION; } @@ -2027,12 +2068,13 @@ static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept #endif const u64 exec64 = (reinterpret_cast(info->si_addr) - reinterpret_cast(vm::g_exec_addr)) / 2; + const u64 exec64_2 = (reinterpret_cast(info->si_addr) - reinterpret_cast(vm::g_exec_addr)) - vm::g_exec_addr_seg_offset; const auto cause = is_executing ? "executing" : is_writing ? "writing" : "reading"; if (auto [addr, ok] = vm::try_get_addr(info->si_addr); ok && !is_executing) { // Try to process access violation - if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, context)) + if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, false, context)) { return; } @@ -2040,7 +2082,14 @@ static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept if (exec64 < 0x100000000ull && !is_executing) { - if (thread_ctrl::get_current() && handle_access_violation(static_cast(exec64), is_writing, context)) + if (thread_ctrl::get_current() && handle_access_violation(static_cast(exec64), is_writing, true, context)) + { + return; + } + } + else if (exec64_2 < 0x100000000ull && !is_executing) + { + if (thread_ctrl::get_current() && handle_access_violation(static_cast(exec64_2), is_writing, true, context)) { return; } @@ -2359,7 +2408,7 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept g_tls_fault_spu = 0; g_tls_wait_time = 0; g_tls_wait_fail = 0; - g_tls_access_violation_recovered = false; + g_tls_access_violation_recovered = umax; g_tls_log_prefix = []() -> std::string { return {}; }; @@ -2830,6 +2879,16 @@ void thread_base::exec() } } + if (auto [total, current] = utils::get_memory_usage(); total - current <= 256 * 1024 * 1024) + { + if (reason_buf.empty()) + { + reason_buf = std::string{reason}; + } + + fmt::append(reason_buf, " (Possible RAM deficiency: free RAM: %dMB)", (total - current) / (1024 * 1024)); + } + if (!reason_buf.empty()) { reason = reason_buf; diff --git a/bin/GuiConfigs/Classic (Bright).qss b/bin/GuiConfigs/Classic (Bright).qss index 143e42df94..f961e63e00 100644 --- a/bin/GuiConfigs/Classic (Bright).qss +++ b/bin/GuiConfigs/Classic (Bright).qss @@ -37,10 +37,6 @@ QSlider#sizeSlider::handle:horizontal { QLabel#toolbar_icon_color { color: rgba(64,64,64,255); } -/* thumbnail icon color stylesheet */ -QLabel#thumbnail_icon_color { - color: rgba(0,100,231,255); -} /* gamelist icon color stylesheet */ QLabel#gamelist_icon_background_color { color: rgba(209,209,209,255); diff --git a/bin/GuiConfigs/Darker Style by TheMitoSan.qss b/bin/GuiConfigs/Darker Style by TheMitoSan.qss index 47270a211e..8797ffd3c9 100644 --- a/bin/GuiConfigs/Darker Style by TheMitoSan.qss +++ b/bin/GuiConfigs/Darker Style by TheMitoSan.qss @@ -237,11 +237,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #262626; -} - /* Set Log colors */ QPlainTextEdit#log_frame { background-color: #000; /* Black */ diff --git a/bin/GuiConfigs/Envy.qss b/bin/GuiConfigs/Envy.qss index a78ba04f58..3e2e1ddc35 100644 --- a/bin/GuiConfigs/Envy.qss +++ b/bin/GuiConfigs/Envy.qss @@ -573,11 +573,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #23262d; -} - /* Log colors */ QPlainTextEdit#log_frame { background-color: #23262d; diff --git a/bin/GuiConfigs/Kuroi (Dark) by Ani.qss b/bin/GuiConfigs/Kuroi (Dark) by Ani.qss index 54c667213b..2cd81267f1 100644 --- a/bin/GuiConfigs/Kuroi (Dark) by Ani.qss +++ b/bin/GuiConfigs/Kuroi (Dark) by Ani.qss @@ -265,11 +265,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #444444; -} - /* Memory Viewer */ QLabel#memory_viewer_address_panel { color: #00cbff; /* Font Color: Blue */ diff --git a/bin/GuiConfigs/ModernBlue Theme by TheMitoSan.qss b/bin/GuiConfigs/ModernBlue Theme by TheMitoSan.qss index 410db682f7..96b84a9196 100644 --- a/bin/GuiConfigs/ModernBlue Theme by TheMitoSan.qss +++ b/bin/GuiConfigs/ModernBlue Theme by TheMitoSan.qss @@ -244,11 +244,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #262626; -} - /* Set Log colors */ QPlainTextEdit#log_frame { background-color: #181d24; /* Black */ diff --git a/bin/GuiConfigs/Nekotekina by GooseWing.qss b/bin/GuiConfigs/Nekotekina by GooseWing.qss index 435d550755..93fadcdd68 100755 --- a/bin/GuiConfigs/Nekotekina by GooseWing.qss +++ b/bin/GuiConfigs/Nekotekina by GooseWing.qss @@ -397,12 +397,6 @@ QLabel#gamelist_icon_background_color { } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #ffd785; -} - - QLabel#log_level_always { color: #00ffff; /* Cyan */ } diff --git a/bin/GuiConfigs/Skyline (Nightfall).qss b/bin/GuiConfigs/Skyline (Nightfall).qss index 625a6a28b6..4729a59173 100644 --- a/bin/GuiConfigs/Skyline (Nightfall).qss +++ b/bin/GuiConfigs/Skyline (Nightfall).qss @@ -656,11 +656,6 @@ QLabel#color_button { background: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #370048; -} - /* Debugger colors */ QLabel#debugger_frame_breakpoint { color: #000; /* Font Color: Black */ diff --git a/bin/GuiConfigs/Skyline.qss b/bin/GuiConfigs/Skyline.qss index ef3c7c6857..1004dc0016 100644 --- a/bin/GuiConfigs/Skyline.qss +++ b/bin/GuiConfigs/Skyline.qss @@ -664,11 +664,6 @@ QLabel#color_button { background: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #8500ae; -} - /* Debugger colors */ QLabel#debugger_frame_breakpoint { color: #000; /* Font Color: Black */ diff --git a/bin/GuiConfigs/YoRHa by Ani.qss b/bin/GuiConfigs/YoRHa by Ani.qss index c772f25196..51f09897b1 100644 --- a/bin/GuiConfigs/YoRHa by Ani.qss +++ b/bin/GuiConfigs/YoRHa by Ani.qss @@ -379,11 +379,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #4d4940; -} - QLabel#log_level_always { color: #00ffff; /* Cyan */ } diff --git a/buildfiles/cmake/ConfigureCompiler.cmake b/buildfiles/cmake/ConfigureCompiler.cmake index be900d13c2..ea8cd3200b 100644 --- a/buildfiles/cmake/ConfigureCompiler.cmake +++ b/buildfiles/cmake/ConfigureCompiler.cmake @@ -96,11 +96,6 @@ else() # This hides our LLVM from mesa's LLVM, otherwise we get some unresolvable conflicts. add_link_options(-Wl,--exclude-libs,ALL) elseif(WIN32) - add_compile_definitions(__STDC_FORMAT_MACROS=1) - - # Workaround for mingw64 (MSYS2) - add_link_options(-Wl,--allow-multiple-definition) - # Increase stack limit to 8 MB add_link_options(-Wl,--stack -Wl,8388608) endif() diff --git a/buildfiles/cmake/FindWolfSSL.cmake b/buildfiles/cmake/FindWolfSSL.cmake index d2e30be60b..35f316837c 100644 --- a/buildfiles/cmake/FindWolfSSL.cmake +++ b/buildfiles/cmake/FindWolfSSL.cmake @@ -1,4 +1,3 @@ -set(WOLFSSL_LIBRARY ON) -set(WOLFSSL_INCLUDE_DIR ON) -set(WOLFSSL_LIBRARIES wolfssl) +set(WOLFSSL_LIBRARY wolfssl) +set(WOLFSSL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/wolfssl) set(WOLFSSL_FOUND TRUE) diff --git a/buildfiles/cmake/FindZLIB.cmake b/buildfiles/cmake/FindZLIB.cmake index 0a29abafa9..ff5869a5f7 100644 --- a/buildfiles/cmake/FindZLIB.cmake +++ b/buildfiles/cmake/FindZLIB.cmake @@ -3,9 +3,9 @@ if(USE_SYSTEM_ZLIB) find_package(ZLIB) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}) else() - add_library(ZLIB::ZLIB INTERFACE IMPORTED) + add_library(ZLIB::ZLIB STATIC IMPORTED) set_target_properties(ZLIB::ZLIB PROPERTIES - INTERFACE_LINK_LIBRARIES zlibstatic + IMPORTED_LOCATION "${CMAKE_BINARY_DIR}/3rdparty/zlib/zlib/libzlibstatic.a" INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/3rdparty/zlib/zlib;${CMAKE_BINARY_DIR}/3rdparty/zlib/zlib") set(ZLIB_FOUND TRUE) endif() diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 2aa8120752..cfc2495f15 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -87,12 +87,6 @@ if (NOT ANDROID) message(FATAL_ERROR "RPCS3 requires either X11 or Wayland (or both) for Vulkan.") endif() - if(UNIX) - set(CMAKE_THREAD_PREFER_PTHREAD TRUE) - find_package(Threads REQUIRED) - target_link_libraries(rpcs3_lib PRIVATE Threads::Threads) - endif() - if(WIN32) target_link_libraries(rpcs3_lib PRIVATE ws2_32 Iphlpapi Winmm Psapi gdi32 setupapi) else() diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index e115585db9..d691952fa0 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -645,6 +645,9 @@ if(TARGET 3rdparty_vulkan) endif() endif() +if(NOT WIN32) + set(THREADS_PREFER_PTHREAD_FLAG ON) +endif() find_package(Threads REQUIRED) target_link_libraries(rpcs3_emu diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 3ab011aa04..4bd5fc9157 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -888,6 +888,14 @@ bool cpu_thread::check_state() noexcept store = true; } + if (flags & cpu_flag::req_exit) + { + // A request for the thread to quit has been made + flags -= cpu_flag::req_exit; + flags += cpu_flag::exit; + store = true; + } + // Can't process dbg_step if we only paused temporarily if (cpu_can_stop && flags & cpu_flag::dbg_step) { @@ -1157,13 +1165,13 @@ void cpu_thread::notify() cpu_thread& cpu_thread::operator=(thread_state) { - if (state & cpu_flag::exit) + if (state & (cpu_flag::exit + cpu_flag::req_exit)) { // Must be notified elsewhere or self-raised return *this; } - const auto old = state.fetch_add(cpu_flag::exit); + const auto old = state.fetch_add(cpu_flag::req_exit); if (old & cpu_flag::wait && old.none_of(cpu_flag::again + cpu_flag::exit)) { @@ -1322,8 +1330,9 @@ extern std::shared_ptr make_disasm(const cpu_thread* cpu, shared_ptr< void cpu_thread::dump_all(std::string& ret) const { std::any func_data; + std::any misc_data; - ret += dump_misc(); + dump_misc(ret, misc_data); ret += '\n'; dump_regs(ret, func_data); ret += '\n'; @@ -1371,9 +1380,9 @@ std::vector> cpu_thread::dump_callstack_list() const return {}; } -std::string cpu_thread::dump_misc() const +void cpu_thread::dump_misc(std::string& ret, std::any& /*custom_data*/) const { - return fmt::format("%s[0x%x]; State: %s\n", get_class() == thread_class::ppu ? "PPU" : get_class() == thread_class::spu ? "SPU" : "RSX", id, state.load()); + fmt::append(ret, "%s[0x%x]; State: %s\n", get_class() == thread_class::ppu ? "PPU" : get_class() == thread_class::spu ? "SPU" : "RSX", id, state.load()); } bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 5e3484f7f5..e723fd2d4b 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -29,6 +29,7 @@ enum class cpu_flag : u32 yield, // Thread is being requested to yield its execution time if it's running preempt, // Thread is being requested to preempt the execution of all CPU threads + req_exit, // Request the thread to exit dbg_global_pause, // Emulation paused dbg_pause, // Thread paused dbg_step, // Thread forced to pause after one step (one instruction, etc) @@ -39,7 +40,7 @@ enum class cpu_flag : u32 // Test stopped state constexpr bool is_stopped(bs_t state) { - return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::again)); + return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::again + cpu_flag::req_exit)); } // Test paused state @@ -176,7 +177,7 @@ public: virtual std::vector> dump_callstack_list() const; // Get CPU dump of misc information - virtual std::string dump_misc() const; + virtual void dump_misc(std::string& ret, std::any& /*custom_data*/) const; // Thread entry point function virtual void cpu_task() = 0; diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index 08e8e9ad30..22413f62b8 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -225,12 +225,66 @@ llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) const fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)", s1, s2); } - if (const auto c1 = llvm::dyn_cast(val)) + if (val->getType() == type) + { + return val; + } + + llvm::CastInst* i; + llvm::Value* source_val = val; + + // Try to reuse older bitcasts + while ((i = llvm::dyn_cast_or_null(source_val)) && i->getOpcode() == llvm::Instruction::BitCast) + { + source_val = i->getOperand(0); + + if (source_val->getType() == type) + { + return source_val; + } + } + + for (auto it = source_val->use_begin(); it != source_val->use_end(); ++it) + { + llvm::Value* it_val = *it; + + if (!it_val) + { + continue; + } + + llvm::CastInst* bci = llvm::dyn_cast_or_null(it_val); + + // Walk through bitcasts + while (bci && bci->getOpcode() == llvm::Instruction::BitCast) + { + if (bci->getParent() != m_ir->GetInsertBlock()) + { + break; + } + + if (bci->getType() == type) + { + return bci; + } + + if (bci->use_begin() == bci->use_end()) + { + break; + } + + bci = llvm::dyn_cast_or_null(*bci->use_begin()); + } + } + + // Do bitcast on the source + + if (const auto c1 = llvm::dyn_cast(source_val)) { return ensure(llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c1, type, m_module->getDataLayout())); } - return m_ir->CreateBitCast(val, type); + return m_ir->CreateBitCast(source_val, type); } template <> diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 9b9804fd39..738932808d 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -567,6 +567,32 @@ struct llvm_placeholder_t } }; +template >> +struct llvm_place_stealer_t +{ + // TODO: placeholder extracting actual constant values (u64, f64, vector, etc) + + using type = T; + + static constexpr bool is_ok = true; + + llvm::Value* eval(llvm::IRBuilder<>*) const + { + return nullptr; + } + + std::tuple<> match(llvm::Value*& value, llvm::Module*) const + { + if (value && value->getType() == llvm_value_t::get_type(value->getContext())) + { + return {}; + } + + value = nullptr; + return {}; + } +}; + template struct llvm_const_int { @@ -3227,6 +3253,12 @@ public: return {}; } + template + static llvm_place_stealer_t match_stealer() + { + return {}; + } + template requires requires { typename llvm_common_t; } static auto match_expr(llvm::Value* v, llvm::Module* _m, T&& expr) @@ -3951,6 +3983,15 @@ public: erase_stores({args.value...}); } + // Debug breakpoint + void debugtrap() + { + const auto _rty = llvm::Type::getVoidTy(m_context); + const auto type = llvm::FunctionType::get(_rty, {}, false); + const auto func = llvm::cast(m_ir->GetInsertBlock()->getParent()->getParent()->getOrInsertFunction("llvm.debugtrap", type).getCallee()); + m_ir->CreateCall(func); + } + template static auto pshufb(T&& a, U&& b) { diff --git a/rpcs3/Emu/Cell/Modules/cellAdec.cpp b/rpcs3/Emu/Cell/Modules/cellAdec.cpp index dfc91c8d2f..0d0c4481e9 100644 --- a/rpcs3/Emu/Cell/Modules/cellAdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAdec.cpp @@ -261,7 +261,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) savestate = lpcm_dec_state::waiting_for_output_mutex_lock; output_mutex_lock: - error_occurred |= static_cast(sys_mutex_lock(ppu, output_mutex, 0) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex, 0) != CELL_OK); if (ppu.state & cpu_flag::again) { @@ -273,7 +273,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) savestate = lpcm_dec_state::waiting_for_output_cond_wait; output_cond_wait: - ensure(sys_cond_wait(ppu, output_consumed, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, output_consumed, 0) == CELL_OK); // Error code isn't checked on LLE if (ppu.state & cpu_flag::again) { @@ -287,7 +287,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) savestate = lpcm_dec_state::queue_mutex_lock; queue_mutex_lock: - ensure(sys_mutex_lock(ppu, queue_mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (ppu.state & cpu_flag::again) { @@ -296,7 +296,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) cmd_queue.pop(cmd); - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); // Error code isn't checked on LLE cellAdec.trace("Command type: %d", static_cast(cmd.type.get())); @@ -307,7 +307,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) { case LpcmDecCmdType::start_seq: // LLE sends a command to the SPU thread. The SPU thread consumes the command without doing anything, however - error_occurred |= static_cast(sys_mutex_unlock(ppu, output_mutex) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex) != CELL_OK); break; case LpcmDecCmdType::end_seq: @@ -324,11 +324,11 @@ void LpcmDecContext::exec(ppu_thread& ppu) // Doesn't do anything else notify_seq_done.cbFunc(ppu, notify_seq_done.cbArg); - error_occurred |= static_cast(sys_mutex_unlock(ppu, output_mutex) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex) != CELL_OK); break; } case LpcmDecCmdType::close: - ensure(sys_mutex_unlock(ppu, output_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, output_mutex) == CELL_OK); // Error code isn't checked on LLE return; case LpcmDecCmdType::decode_au: @@ -685,7 +685,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) notify_au_done.cbFunc(ppu, cmd.pcm_handle, notify_au_done.cbArg); output_locked = true; - error_occurred |= static_cast(sys_mutex_unlock(ppu, output_mutex) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex) != CELL_OK); const vm::var bsi_info{{ lpcm_param->channelNumber, lpcm_param->sampleRate, static_cast(output_size) }}; @@ -703,14 +703,14 @@ error_code LpcmDecContext::send_command(ppu_thread& ppu, auto&&... args) { ppu.state += cpu_flag::wait; - if (error_code ret = sys_mutex_lock(ppu, queue_size_mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, queue_size_mutex, 0); ret != CELL_OK) { return ret; } if (cmd_queue.full()) { - ensure(sys_mutex_unlock(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_BUSY; } @@ -720,39 +720,39 @@ error_code LpcmDecContext::send_command(ppu_thread& ppu, auto&&... args) *lpcm_param = { args... }; } - if (error_code ret = sys_mutex_lock(ppu, queue_mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, queue_mutex, 0); ret != CELL_OK) { - ensure(sys_mutex_unlock(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } cmd_queue.emplace(type, std::forward(args)...); - if (error_code ret = sys_mutex_unlock(ppu, queue_mutex); ret != CELL_OK + if (error_code ret = lv2_syscall(ppu, queue_mutex); ret != CELL_OK || (ret = cmd_available.release(ppu)) != CELL_OK) { - ensure(sys_mutex_unlock(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } - return sys_mutex_unlock(ppu, queue_size_mutex); + return lv2_syscall(ppu, queue_size_mutex); } inline error_code LpcmDecContext::release_output(ppu_thread& ppu) { - if (error_code ret = sys_mutex_lock(ppu, output_mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, output_mutex, 0); ret != CELL_OK) { return ret; } output_locked = false; - if (error_code ret = sys_cond_signal(ppu, output_consumed); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, output_consumed); ret != CELL_OK) { return ret; // LLE doesn't unlock the mutex } - return sys_mutex_unlock(ppu, output_mutex); + return lv2_syscall(ppu, output_mutex); } void lpcmDecEntry(ppu_thread& ppu, vm::ptr lpcm_dec) @@ -820,13 +820,13 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr const vm::var queue_mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem06"_u64 } }}; const vm::var cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec03"_u64 } }}; - error_code ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_size_mutex), mutex_attr); - ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::queue_size_cond), handle->queue_size_mutex, cond_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::unk_mutex), mutex_attr); - ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::unk_cond), handle->unk_mutex, cond_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::output_mutex), output_mutex_attr); - ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::output_consumed), handle->output_mutex, cond_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_mutex), queue_mutex_attr); + error_code ret = lv2_syscall(ppu, handle.ptr(&LpcmDecContext::queue_size_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::queue_size_cond), handle->queue_size_mutex, cond_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::unk_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::unk_cond), handle->unk_mutex, cond_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::output_mutex), output_mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::output_consumed), handle->output_mutex, cond_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::queue_mutex), queue_mutex_attr); ret = ret ? ret : handle->release_output(ppu); ret = ret ? ret : handle->cmd_available.init(ppu, handle.ptr(&LpcmDecContext::cmd_available), 0); ret = ret ? ret : handle->reserved2.init(ppu, handle.ptr(&LpcmDecContext::reserved2), 0); @@ -844,8 +844,8 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr const auto entry = g_fxo->get().func_addr(FIND_FUNC(lpcmDecEntry)); ret = ppu_execute<&sys_ppu_thread_create>(ppu, handle.ptr(&LpcmDecContext::thread_id), entry, handle.addr(), +res->ppuThreadPriority, +res->ppuThreadStackSize, SYS_PPU_THREAD_CREATE_JOINABLE, +_name); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_pop_mutex), mutex_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_push_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::spurs_queue_pop_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::spurs_queue_push_mutex), mutex_attr); return ret; } @@ -865,8 +865,8 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr ha cellAdec.notice("_CellAdecCoreOpClose_lpcm(handle=*0x%x)", handle); - if (error_code ret = sys_mutex_lock(ppu, handle->queue_size_mutex, 0); ret != CELL_OK - || (ret = sys_mutex_lock(ppu, handle->queue_mutex, 0)) != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->queue_size_mutex, 0); ret != CELL_OK + || (ret = lv2_syscall(ppu, handle->queue_mutex, 0)) != CELL_OK) { return ret; } @@ -875,14 +875,14 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr ha { handle->cmd_queue.emplace(LpcmDecCmdType::close); - if (error_code ret = sys_mutex_unlock(ppu, handle->queue_mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->queue_mutex); ret != CELL_OK) { return ret; // LLE doesn't unlock the queue size mutex } if (error_code ret = handle->cmd_available.release(ppu); ret != CELL_OK) { - ensure(sys_mutex_unlock(ppu, handle->queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, handle->queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } } @@ -893,29 +893,29 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr ha cmd.type = LpcmDecCmdType::close; } - if (error_code ret = sys_mutex_unlock(ppu, handle->queue_mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->queue_mutex); ret != CELL_OK) { return ret; // LLE doesn't unlock the queue size mutex } } - error_code ret = sys_mutex_unlock(ppu, handle->queue_size_mutex); + error_code ret = lv2_syscall(ppu, handle->queue_size_mutex); ret = ret ? ret : handle->release_output(ppu); vm::var thread_ret; - ret = ret ? ret : sys_ppu_thread_join(ppu, static_cast(handle->thread_id), +thread_ret); + ret = ret ? ret : lv2_syscall(ppu, static_cast(handle->thread_id), +thread_ret); - ret = ret ? ret : sys_cond_destroy(ppu, handle->queue_size_cond); - ret = ret ? ret : sys_cond_destroy(ppu, handle->unk_cond); - ret = ret ? ret : sys_cond_destroy(ppu, handle->output_consumed); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_size_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->unk_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->output_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_size_cond); + ret = ret ? ret : lv2_syscall(ppu, handle->unk_cond); + ret = ret ? ret : lv2_syscall(ppu, handle->output_consumed); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_size_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->unk_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->output_mutex); ret = ret ? ret : handle->cmd_available.finalize(ppu); ret = ret ? ret : handle->reserved2.finalize(ppu); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->spurs_queue_pop_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->spurs_queue_push_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->spurs_queue_pop_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->spurs_queue_push_mutex); return ret; } @@ -1091,11 +1091,11 @@ error_code AdecContext::set_pcm_item(s32 pcm_handle, vm::ptr pcm_addr, u32 error_code AdecContext::link_frame(ppu_thread& ppu, s32 pcm_handle) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (verify_pcm_handle(pcm_handle) == static_cast(CELL_ADEC_ERROR_FATAL)) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } @@ -1115,27 +1115,27 @@ error_code AdecContext::link_frame(ppu_thread& ppu, s32 pcm_handle) } else { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_OK; } error_code AdecContext::unlink_frame(ppu_thread& ppu, s32 pcm_handle) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (verify_pcm_handle(pcm_handle) == static_cast(CELL_ADEC_ERROR_FATAL)) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } if (frames_head == -1 || frames_tail == -1) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } @@ -1146,7 +1146,7 @@ error_code AdecContext::unlink_frame(ppu_thread& ppu, s32 pcm_handle) { if (pcm_handle != frames_tail) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } @@ -1171,7 +1171,7 @@ error_code AdecContext::unlink_frame(ppu_thread& ppu, s32 pcm_handle) frames[prev].next = next; } - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_OK; } @@ -1557,7 +1557,7 @@ error_code adecOpen(ppu_thread& ppu, vm::ptr type, vm::cptr(ppu, _this.ptr(&AdecContext::mutex), _this.ptr(&AdecContext::mutex_attribute)) == CELL_OK); // Error code isn't checked on LLE *handle = _this; @@ -1626,7 +1626,7 @@ error_code cellAdecClose(ppu_thread& ppu, vm::ptr handle) return ret; } - if (error_code ret = sys_mutex_destroy(ppu, handle->mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->mutex); ret != CELL_OK) { return ret; } diff --git a/rpcs3/Emu/Cell/Modules/cellAdec.h b/rpcs3/Emu/Cell/Modules/cellAdec.h index a43c3f0aef..2216561516 100644 --- a/rpcs3/Emu/Cell/Modules/cellAdec.h +++ b/rpcs3/Emu/Cell/Modules/cellAdec.h @@ -485,6 +485,19 @@ struct AdecFrame CHECK_SIZE(AdecFrame, 0x68); +template +static auto lv2_syscall(ppu_thread& ppu, auto&&... args) +{ + const auto ret = Syscall(ppu, std::forward(args)...); + + if (ppu.test_stopped()) + { + ppu.state += cpu_flag::again; + } + + return ret; +} + class AdecOutputQueue { struct entry @@ -511,10 +524,10 @@ public: this->size = 0; const vm::var mutex_attr = {{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem07"_u64 } }}; - ensure(sys_mutex_create(ppu, _this.ptr(&AdecOutputQueue::mutex), mutex_attr) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, _this.ptr(&AdecOutputQueue::mutex), mutex_attr) == CELL_OK); // Error code isn't checked on LLE const vm::var cond_attr = {{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec05"_u64 } }}; - ensure(sys_cond_create(ppu, _this.ptr(&AdecOutputQueue::cond), mutex, cond_attr) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, _this.ptr(&AdecOutputQueue::cond), mutex, cond_attr) == CELL_OK); // Error code isn't checked on LLE for (s32 i = 0; i < 4; i++) { @@ -524,12 +537,12 @@ public: error_code finalize(ppu_thread& ppu) const { - if (error_code ret = sys_cond_destroy(ppu, cond); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond); ret != CELL_OK) { return ret; } - if (error_code ret = sys_mutex_destroy(ppu, mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, mutex); ret != CELL_OK) { return ret; } @@ -539,11 +552,11 @@ public: error_code push(ppu_thread& ppu, vm::ptr pcm_item, s32 pcm_handle) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (entries[back].state != 0xff) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return true; // LLE returns the result of the comparison above } @@ -554,13 +567,13 @@ public: back = (back + 1) & 3; size++; - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_OK; } const entry* pop(ppu_thread& ppu) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (ppu.state & cpu_flag::again) // Savestate was created while waiting on the mutex { @@ -569,7 +582,7 @@ public: if (entries[front].state == 0xff) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return nullptr; } @@ -581,15 +594,15 @@ public: front = (front + 1) & 3; size--; - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } const entry& peek(ppu_thread& ppu) const { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE const entry& ret = entries[front]; - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } }; @@ -708,39 +721,39 @@ public: const vm::var mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem01"_u64 } }}; const vm::var cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec01"_u64 } }}; - if (error_code ret = sys_mutex_create(ppu, _this.ptr(&LpcmDecSemaphore::mutex), mutex_attr); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, _this.ptr(&LpcmDecSemaphore::mutex), mutex_attr); ret != CELL_OK) { return ret; } - return sys_cond_create(ppu, _this.ptr(&LpcmDecSemaphore::cond), mutex, cond_attr); + return lv2_syscall(ppu, _this.ptr(&LpcmDecSemaphore::cond), mutex, cond_attr); } error_code finalize(ppu_thread& ppu) const { - if (error_code ret = sys_cond_destroy(ppu, cond); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond); ret != CELL_OK) { return ret; } - return sys_mutex_destroy(ppu, mutex); + return lv2_syscall(ppu, mutex); } error_code release(ppu_thread& ppu) { - if (error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, mutex, 0); ret != CELL_OK) { return ret; } value++; - if (error_code ret = sys_cond_signal(ppu, cond); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond); ret != CELL_OK) { return ret; // LLE doesn't unlock the mutex } - return sys_mutex_unlock(ppu, mutex); + return lv2_syscall(ppu, mutex); } error_code acquire(ppu_thread& ppu, lpcm_dec_state& savestate) @@ -752,7 +765,7 @@ public: savestate = lpcm_dec_state::waiting_for_cmd_mutex_lock; - if (error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, mutex, 0); ret != CELL_OK) { return ret; } @@ -767,7 +780,7 @@ public: savestate = lpcm_dec_state::waiting_for_cmd_cond_wait; cond_wait: - if (error_code ret = sys_cond_wait(ppu, cond, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond, 0); ret != CELL_OK) { return ret; // LLE doesn't unlock the mutex } @@ -780,7 +793,7 @@ public: value--; - return sys_mutex_unlock(ppu, mutex); + return lv2_syscall(ppu, mutex); } }; diff --git a/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp b/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp index c55cf7b60f..fb3191bcb4 100644 --- a/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp @@ -295,7 +295,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) { savestate = atracxdec_state::initial; - ensure(sys_mutex_lock(ppu, queue_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -310,24 +310,24 @@ void AtracXdecContext::exec(ppu_thread& ppu) savestate = atracxdec_state::waiting_for_cmd; label1_wait_for_cmd_state: - ensure(sys_cond_wait(ppu, queue_not_empty, 0) == CELL_OK); + ensure(lv2_syscall(ppu, queue_not_empty, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { return; } - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); } cmd_queue.pop(cmd); - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); savestate = atracxdec_state::checking_run_thread_1; label2_check_run_thread_1_state: - ensure(sys_mutex_lock(ppu, run_thread_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -336,11 +336,11 @@ void AtracXdecContext::exec(ppu_thread& ppu) if (!run_thread) { - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); return; } - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); savestate = atracxdec_state::executing_cmd; label3_execute_cmd_state: @@ -392,7 +392,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) cellAtracXdec.trace("Waiting for output to be consumed..."); - ensure(sys_mutex_lock(ppu, output_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, output_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -404,7 +404,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) savestate = atracxdec_state::waiting_for_output; label4_wait_for_output_state: - ensure(sys_cond_wait(ppu, output_consumed, 0) == CELL_OK); + ensure(lv2_syscall(ppu, output_consumed, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -417,7 +417,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) savestate = atracxdec_state::checking_run_thread_2; label5_check_run_thread_2_state: - ensure(sys_mutex_lock(ppu, run_thread_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -426,12 +426,12 @@ void AtracXdecContext::exec(ppu_thread& ppu) if (!run_thread) { - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); - ensure(sys_mutex_unlock(ppu, output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, output_mutex) == CELL_OK); return; } - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); savestate = atracxdec_state::decoding; label6_decode_state: @@ -645,7 +645,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) notify_au_done.cbFunc(ppu, cmd.pcm_handle, notify_au_done.cbArg); output_locked = true; - ensure(sys_mutex_unlock(ppu, output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, output_mutex) == CELL_OK); const u32 output_size = decoded_samples_num * (decoder.bw_pcm & 0x7fu) * decoder.nch_out; @@ -680,7 +680,7 @@ error_code AtracXdecContext::send_command(ppu_thread& ppu, auto&&... args) if (!signal) { - ensure(sys_mutex_lock(ppu, queue_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -692,23 +692,23 @@ error_code AtracXdecContext::send_command(ppu_thread& ppu, auto&&... args) // Close command is only sent if the queue is empty on LLE if (!cmd_queue.empty()) { - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); return {}; } } if (cmd_queue.full()) { - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); return CELL_ADEC_ERROR_ATX_BUSY; } cmd_queue.emplace(std::forward(type), std::forward(args)...); - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); } - ensure(sys_cond_signal(ppu, queue_not_empty) == CELL_OK); + ensure(lv2_syscall(ppu, queue_not_empty) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -779,25 +779,25 @@ error_code _CellAdecCoreOpOpenExt_atracx(ppu_thread& ppu, vm::ptr mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_atd001"_u64 } }}; const vm::var cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_atd002"_u64 } }}; - ensure(sys_mutex_create(ppu, handle.ptr(&AtracXdecContext::queue_mutex), mutex_attr) == CELL_OK); - ensure(sys_cond_create(ppu, handle.ptr(&AtracXdecContext::queue_not_empty), handle->queue_mutex, cond_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::queue_mutex), mutex_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::queue_not_empty), handle->queue_mutex, cond_attr) == CELL_OK); mutex_attr->name_u64 = "_atd003"_u64; cond_attr->name_u64 = "_atd004"_u64; - ensure(sys_mutex_create(ppu, handle.ptr(&AtracXdecContext::run_thread_mutex), mutex_attr) == CELL_OK); - ensure(sys_cond_create(ppu, handle.ptr(&AtracXdecContext::run_thread_cond), handle->run_thread_mutex, cond_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::run_thread_mutex), mutex_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::run_thread_cond), handle->run_thread_mutex, cond_attr) == CELL_OK); mutex_attr->name_u64 = "_atd005"_u64; cond_attr->name_u64 = "_atd006"_u64; - ensure(sys_mutex_create(ppu, handle.ptr(&AtracXdecContext::output_mutex), mutex_attr) == CELL_OK); - ensure(sys_cond_create(ppu, handle.ptr(&AtracXdecContext::output_consumed), handle->output_mutex, cond_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::output_mutex), mutex_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::output_consumed), handle->output_mutex, cond_attr) == CELL_OK); - ensure(sys_mutex_lock(ppu, handle->output_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex, 0) == CELL_OK); handle->output_locked = false; - ensure(sys_cond_signal(ppu, handle->output_consumed) == CELL_OK); - ensure(sys_mutex_unlock(ppu, handle->output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_consumed) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex) == CELL_OK); const vm::var _name = vm::make_str("HLE ATRAC3plus decoder"); const auto entry = g_fxo->get().func_addr(FIND_FUNC(atracXdecEntry)); @@ -829,26 +829,26 @@ error_code _CellAdecCoreOpClose_atracx(ppu_thread& ppu, vm::ptrrun_thread_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->run_thread_mutex, 0) == CELL_OK); handle->run_thread = false; - ensure(sys_mutex_unlock(ppu, handle->run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->run_thread_mutex) == CELL_OK); handle->send_command(ppu); - ensure(sys_mutex_lock(ppu, handle->output_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex, 0) == CELL_OK); handle->output_locked = false; - ensure(sys_mutex_unlock(ppu, handle->output_mutex) == CELL_OK); - ensure(sys_cond_signal(ppu, handle->output_consumed) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_consumed) == CELL_OK); vm::var thread_ret; - ensure(sys_ppu_thread_join(ppu, static_cast(handle->thread_id), +thread_ret) == CELL_OK); + ensure(lv2_syscall(ppu, static_cast(handle->thread_id), +thread_ret) == CELL_OK); - error_code ret = sys_cond_destroy(ppu, handle->queue_not_empty); - ret = ret ? ret : sys_cond_destroy(ppu, handle->run_thread_cond); - ret = ret ? ret : sys_cond_destroy(ppu, handle->output_consumed); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->run_thread_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->output_mutex); + error_code ret = lv2_syscall(ppu, handle->queue_not_empty); + ret = ret ? ret : lv2_syscall(ppu, handle->run_thread_cond); + ret = ret ? ret : lv2_syscall(ppu, handle->output_consumed); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->run_thread_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->output_mutex); return ret != CELL_OK ? static_cast(CELL_ADEC_ERROR_FATAL) : CELL_OK; } @@ -921,7 +921,7 @@ error_code _CellAdecCoreOpReleasePcm_atracx(ppu_thread& ppu, vm::ptroutput_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -931,7 +931,7 @@ error_code _CellAdecCoreOpReleasePcm_atracx(ppu_thread& ppu, vm::ptroutput_locked = false; } - ensure(sys_cond_signal(ppu, handle->output_consumed) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_consumed) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -939,7 +939,7 @@ error_code _CellAdecCoreOpReleasePcm_atracx(ppu_thread& ppu, vm::ptroutput_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex) == CELL_OK); return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp b/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp index 91ee7e2426..9332fc5f1e 100644 --- a/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp +++ b/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp @@ -10,6 +10,7 @@ #include "cellDmuxPamf.h" #include +#include vm::gvar g_cell_dmux_core_ops_pamf; vm::gvar g_cell_dmux_core_ops_raw_es; @@ -998,6 +999,10 @@ void dmux_pamf_spu_context::operator()() // cellSpursMain() ensure(stream_info_queue->pop(stream_info)); set_stream({ stream_info.stream_addr.get_ptr(), stream_info.stream_size }, stream_info.continuity); + + // Delay demuxing a bit + // Prevents White Knight Chronicles II FMVs from freezing, since events are otherwise fired before the game has finished initializing FMV playback + thread_ctrl::wait_for(1'500); } process_next_pack(); @@ -1153,6 +1158,19 @@ void dmux_pamf_spu_context::save(utils::serial& ar) // PPU thread +template +static auto lv2_syscall(ppu_thread& ppu, auto&&... args) +{ + const auto ret = Syscall(ppu, std::forward(args)...); + + if (ppu.test_stopped()) + { + ppu.state += cpu_flag::again; + } + + return ret; +} + template void DmuxPamfContext::send_spu_command_and_wait(ppu_thread& ppu, bool waiting_for_spu_state, auto&&... cmd_params) { @@ -1194,7 +1212,7 @@ error_code DmuxPamfContext::wait_au_released_or_stream_reset(ppu_thread& ppu, u6 goto label1_waiting_for_au_released_state; } - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -1213,9 +1231,9 @@ error_code DmuxPamfContext::wait_au_released_or_stream_reset(ppu_thread& ppu, u6 savestate = dmux_pamf_state::waiting_for_au_released; label1_waiting_for_au_released_state: - if (sys_cond_wait(ppu, cond, 0) != CELL_OK) + if (lv2_syscall(ppu, cond, 0) != CELL_OK) { - sys_mutex_unlock(ppu, mutex); + lv2_syscall(ppu, mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -1233,13 +1251,13 @@ error_code DmuxPamfContext::wait_au_released_or_stream_reset(ppu_thread& ppu, u6 au_released_bitset = 0; - return sys_mutex_unlock(ppu, mutex) != CELL_OK ? static_cast(CELL_DMUX_PAMF_ERROR_FATAL) : CELL_OK; + return lv2_syscall(ppu, mutex) != CELL_OK ? static_cast(CELL_DMUX_PAMF_ERROR_FATAL) : CELL_OK; } template error_code DmuxPamfContext::set_au_reset(ppu_thread& ppu) { - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -1251,7 +1269,7 @@ error_code DmuxPamfContext::set_au_reset(ppu_thread& ppu) std::ranges::for_each(elementary_streams | std::views::filter([](auto es){ return !!es; }), [](auto& reset_next_au) { reset_next_au = reset; }, &DmuxPamfElementaryStream::reset_next_au); - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } template @@ -1357,7 +1375,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) savestate = dmux_pamf_state::starting_demux_done; label4_starting_demux_done_state: - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::starting_demux_done_mutex_lock_error; label5_starting_demux_done_mutex_lock_error_state: @@ -1369,7 +1387,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) sequence_state = DmuxPamfSequenceState::resetting; - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::starting_demux_done_mutex_unlock_error; label6_starting_demux_done_mutex_unlock_error_state: @@ -1422,7 +1440,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) { case DmuxPamfEventType::au_found: { - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1437,7 +1455,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) // If the elementary stream of the found access unit is not enabled, don't do anything if (!es || es->_this.get_ptr() != es || es->es_id != event.au_found.user_data) { - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1465,7 +1483,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) es->reset_next_au = false; } - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1491,7 +1509,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) es->au_specific_info[2] = read_from_ptr>(event.au_found.stream_header_buf, 1); } - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1536,7 +1554,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) savestate = dmux_pamf_state::demux_done_mutex_lock; label15_demux_done_mutex_lock_state: - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1551,9 +1569,9 @@ void DmuxPamfContext::exec(ppu_thread& ppu) savestate = dmux_pamf_state::demux_done_cond_signal; label16_demux_done_cond_signal_state: - if (sys_cond_signal_all(ppu, cond) != CELL_OK) + if (lv2_syscall(ppu, cond) != CELL_OK) { - sys_mutex_unlock(ppu, mutex); + lv2_syscall(ppu, mutex); savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1562,7 +1580,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) RETURN_ON_CPU_FLAG_AGAIN(); } - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1577,7 +1595,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) } case DmuxPamfEventType::flush_done: { - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1588,7 +1606,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) DmuxPamfElementaryStream* const es = find_es(event.flush_done.stream_id, event.flush_done.private_stream_id); const bool valid = es && es->_this.get_ptr() == es && es->es_id == event.flush_done.user_data; - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1631,7 +1649,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) savestate = dmux_pamf_state::resuming_demux_mutex_lock; label17_resuming_demux_mutex_lock_state: - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1658,7 +1676,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) ensure(cmd_result_queue.pop()); } - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1964,8 +1982,8 @@ error_code DmuxPamfContext::open(ppu_thread& ppu, const CellDmuxPamfResource& re const vm::var mutex_attr = {{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_dxpmtx"_u64 } }}; const vm::var cond_attr = {{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_dxpcnd"_u64 } }}; - if (sys_mutex_create(ppu, _this.ptr(&DmuxPamfContext::mutex), mutex_attr) != CELL_OK - || sys_cond_create(ppu, _this.ptr(&DmuxPamfContext::cond), _this->mutex, cond_attr) != CELL_OK) + if (lv2_syscall(ppu, _this.ptr(&DmuxPamfContext::mutex), mutex_attr) != CELL_OK + || lv2_syscall(ppu, _this.ptr(&DmuxPamfContext::cond), _this->mutex, cond_attr) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2078,8 +2096,8 @@ error_code DmuxPamfContext::close(ppu_thread& ppu) g_fxo->get().free(0x40000); } - if (sys_cond_destroy(ppu, cond) != CELL_OK - || sys_mutex_destroy(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, cond) != CELL_OK + || lv2_syscall(ppu, mutex) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2117,7 +2135,7 @@ error_code DmuxPamfContext::reset_stream(ppu_thread& ppu) switch (savestate) { case 0: - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2130,7 +2148,7 @@ error_code DmuxPamfContext::reset_stream(ppu_thread& ppu) if (sequence_state != DmuxPamfSequenceState::running) { - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } [[fallthrough]]; @@ -2148,9 +2166,9 @@ error_code DmuxPamfContext::reset_stream(ppu_thread& ppu) [[fallthrough]]; case 2: - if (const error_code ret = sys_cond_signal_to(ppu, cond, static_cast(thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) + if (const error_code ret = lv2_syscall(ppu, cond, static_cast(thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) { - sys_mutex_unlock(ppu, mutex); + lv2_syscall(ppu, mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2160,7 +2178,7 @@ error_code DmuxPamfContext::reset_stream(ppu_thread& ppu) return {}; } - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; default: fmt::throw_exception("Unexpected savestate value: 0x%x", savestate); @@ -2215,7 +2233,7 @@ error_code _CellDmuxCoreOpCreateThread(ppu_thread& ppu, vm::ptr(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2226,12 +2244,12 @@ error_code DmuxPamfContext::join_thread(ppu_thread& ppu) send_spu_command_and_wait(ppu, false); - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } - return sys_ppu_thread_join(ppu, static_cast(thread_id), +vm::var{}) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, static_cast(thread_id), +vm::var{}) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } error_code _CellDmuxCoreOpJoinThread(ppu_thread& ppu, vm::ptr handle) @@ -2264,7 +2282,7 @@ error_code DmuxPamfContext::set_stream(ppu_thread& ppu, vm::cptr stream_addr if (!waiting_for_spu_state) { - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2279,7 +2297,7 @@ error_code DmuxPamfContext::set_stream(ppu_thread& ppu, vm::cptr stream_addr if (!stream_info_queue.emplace(stream_address, stream_size, user_data, !discontinuity, raw_es)) { - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_BUSY : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_BUSY : CELL_DMUX_PAMF_ERROR_FATAL; } } @@ -2293,7 +2311,7 @@ error_code DmuxPamfContext::set_stream(ppu_thread& ppu, vm::cptr stream_addr sequence_state = DmuxPamfSequenceState::running; - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } template @@ -2320,7 +2338,7 @@ error_code DmuxPamfElementaryStream::release_au(ppu_thread& ppu, vm::ptr au_ switch (savestate) { case 0: - if (sys_mutex_lock(ppu, demuxer->mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, demuxer->mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2346,9 +2364,9 @@ error_code DmuxPamfElementaryStream::release_au(ppu_thread& ppu, vm::ptr au_ [[fallthrough]]; case 2: - if (const error_code ret = sys_cond_signal_to(ppu, demuxer->cond, static_cast(demuxer->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) + if (const error_code ret = lv2_syscall(ppu, demuxer->cond, static_cast(demuxer->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) { - sys_mutex_unlock(ppu, demuxer->mutex); + lv2_syscall(ppu, demuxer->mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2358,7 +2376,7 @@ error_code DmuxPamfElementaryStream::release_au(ppu_thread& ppu, vm::ptr au_ return {}; } - return sys_mutex_unlock(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; default: fmt::throw_exception("Unexpected savestate value: 0x%x", savestate); @@ -2461,7 +2479,7 @@ error_code DmuxPamfContext::enable_es(ppu_thread& ppu, u16 stream_id, u16 privat return CELL_DMUX_PAMF_ERROR_ARG; } - if (const error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK) + if (const error_code ret = lv2_syscall(ppu, mutex, 0); ret != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2476,13 +2494,13 @@ error_code DmuxPamfContext::enable_es(ppu_thread& ppu, u16 stream_id, u16 privat if (enabled_es_num == max_enabled_es_num) { - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_NO_MEMORY : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_NO_MEMORY : CELL_DMUX_PAMF_ERROR_FATAL; } if (find_es(stream_id, private_stream_id)) { // Elementary stream is already enabled - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; } } @@ -2560,7 +2578,7 @@ error_code DmuxPamfContext::enable_es(ppu_thread& ppu, u16 stream_id, u16 privat enabled_es_num++; - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2614,7 +2632,7 @@ error_code DmuxPamfElementaryStream::disable_es(ppu_thread& ppu) switch (savestate) { case 0: - if (sys_mutex_lock(ppu, dmux->mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, dmux->mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2628,7 +2646,7 @@ error_code DmuxPamfElementaryStream::disable_es(ppu_thread& ppu) if (!dmux->find_es(stream_id, private_stream_id)) { // Elementary stream is already disabled - return sys_mutex_unlock(ppu, dmux->mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, dmux->mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; } [[fallthrough]]; @@ -2659,9 +2677,9 @@ error_code DmuxPamfElementaryStream::disable_es(ppu_thread& ppu) [[fallthrough]]; case 2: - if (const error_code ret = sys_cond_signal_to(ppu, dmux->cond, static_cast(dmux->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) + if (const error_code ret = lv2_syscall(ppu, dmux->cond, static_cast(dmux->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) { - sys_mutex_unlock(ppu, dmux->mutex); + lv2_syscall(ppu, dmux->mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2671,7 +2689,7 @@ error_code DmuxPamfElementaryStream::disable_es(ppu_thread& ppu) return {}; } - return sys_mutex_unlock(ppu, dmux->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, dmux->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; default: fmt::throw_exception("Unexpected savestate value: 0x%x", savestate); @@ -2698,7 +2716,7 @@ error_code DmuxPamfElementaryStream::flush_es(ppu_thread& ppu) const if (!waiting_for_spu_state) { - if (sys_mutex_lock(ppu, demuxer->mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, demuxer->mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2718,7 +2736,7 @@ error_code DmuxPamfElementaryStream::flush_es(ppu_thread& ppu) const return {}; } - return sys_mutex_unlock(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } error_code _CellDmuxCoreOpFlushEs(ppu_thread& ppu, vm::ptr esHandle) @@ -2741,7 +2759,7 @@ error_code DmuxPamfElementaryStream::reset_es(ppu_thread& ppu) const if (!waiting_for_spu_state) { - if (sys_mutex_lock(ppu, demuxer->mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, demuxer->mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2761,7 +2779,7 @@ error_code DmuxPamfElementaryStream::reset_es(ppu_thread& ppu) const return {}; } - return sys_mutex_unlock(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } error_code _CellDmuxCoreOpResetEs(ppu_thread& ppu, vm::ptr esHandle) @@ -2797,7 +2815,7 @@ error_code DmuxPamfContext::reset_stream_and_wait_done(ppu_thread& ppu) return {}; } - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2809,9 +2827,9 @@ error_code DmuxPamfContext::reset_stream_and_wait_done(ppu_thread& ppu) while (sequence_state != DmuxPamfSequenceState::dormant) { - if (sys_cond_wait(ppu, cond, 0) != CELL_OK) + if (lv2_syscall(ppu, cond, 0) != CELL_OK) { - sys_mutex_unlock(ppu, mutex); + lv2_syscall(ppu, mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2821,7 +2839,7 @@ error_code DmuxPamfContext::reset_stream_and_wait_done(ppu_thread& ppu) } } - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } error_code _CellDmuxCoreOpResetStreamAndWaitDone(ppu_thread& ppu, vm::ptr handle) diff --git a/rpcs3/Emu/Cell/Modules/cellGem.cpp b/rpcs3/Emu/Cell/Modules/cellGem.cpp index d45dace1ca..f9f5ea4100 100644 --- a/rpcs3/Emu/Cell/Modules/cellGem.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGem.cpp @@ -1774,6 +1774,12 @@ public: shared_mutex mutex; + gem_tracker& operator=(thread_state) noexcept + { + wake_up_tracker(); + return *this; + } + private: atomic_t m_wake_up_tracker = 0; atomic_t m_tracker_done = 0; diff --git a/rpcs3/Emu/Cell/Modules/sceNp.h b/rpcs3/Emu/Cell/Modules/sceNp.h index 1bc0a345db..e6b8bff945 100644 --- a/rpcs3/Emu/Cell/Modules/sceNp.h +++ b/rpcs3/Emu/Cell/Modules/sceNp.h @@ -1267,6 +1267,11 @@ struct SceNpOnlineId { char data[SCE_NET_NP_ONLINEID_MAX_LENGTH + 1]; // char term; char dummy[3]; + + bool operator<(const SceNpOnlineId& other) const + { + return memcmp(data, other.data, sizeof(data)) < 0; + } }; // NP ID structure @@ -1283,6 +1288,11 @@ struct SceNpId }; u8 reserved[8]; + + bool operator<(const SceNpId& other) const + { + return handle < other.handle; + } }; CHECK_SIZE_ALIGN(SceNpId, 0x24, 1); @@ -1689,12 +1699,22 @@ struct SceNpLobbyId { u8 opt[28]; u8 reserved[8]; + + bool operator<(const SceNpLobbyId& other) const + { + return memcmp(opt, other.opt, sizeof(opt)) < 0; + } }; struct SceNpRoomId { u8 opt[28]; u8 reserved[8]; + + bool operator<(const SceNpRoomId& other) const + { + return memcmp(opt, other.opt, sizeof(opt)) < 0; + } }; struct SceNpMatchingAttr diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 4d690b344d..f5d91cc519 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1286,7 +1286,7 @@ extern bool ppu_patch(u32 addr, u32 value) { if (addr % 4) { - ppu_log.fatal("Patch failed at 0x%x: unanligned memory address.", addr); + ppu_log.fatal("Patch failed at 0x%x: unaligned memory address.", addr); return false; } @@ -1364,9 +1364,7 @@ void ppu_thread::dump_regs(std::string& ret, std::any& custom_data) const u32 preferred_cr_field_index = 7; }; - dump_registers_data_t* func_data = nullptr; - - func_data = std::any_cast(&custom_data); + dump_registers_data_t* func_data = std::any_cast(&custom_data); if (!func_data) { @@ -2039,9 +2037,9 @@ std::vector> ppu_thread::dump_callstack_list() const return call_stack_list; } -std::string ppu_thread::dump_misc() const +void ppu_thread::dump_misc(std::string& ret, std::any& custom_data) const { - std::string ret = cpu_thread::dump_misc(); + cpu_thread::dump_misc(ret, custom_data); if (ack_suspend) { @@ -2096,7 +2094,6 @@ std::string ppu_thread::dump_misc() const { ret += '\n'; } - return ret; } void ppu_thread::dump_all(std::string& ret) const diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 97c705aed5..cf5b91c487 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -145,7 +145,7 @@ public: virtual void dump_regs(std::string&, std::any& custom_data) const override; virtual std::string dump_callstack() const override; virtual std::vector> dump_callstack_list() const override; - virtual std::string dump_misc() const override; + virtual void dump_misc(std::string& ret, std::any& custom_data) const override; virtual void dump_all(std::string&) const override; virtual void cpu_task() override final; virtual void cpu_sleep() override; diff --git a/rpcs3/Emu/Cell/SPUAnalyser.h b/rpcs3/Emu/Cell/SPUAnalyser.h index 123a629bed..1598551c7d 100644 --- a/rpcs3/Emu/Cell/SPUAnalyser.h +++ b/rpcs3/Emu/Cell/SPUAnalyser.h @@ -13,6 +13,7 @@ struct spu_itype static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values static constexpr struct zregmod_tag{} zregmod{}; // Instructions not modifying any GPR + static constexpr struct pure_tag{} pure{}; // Instructions that always produce the same values as long as arguments are equal enum class type : unsigned char { @@ -158,6 +159,15 @@ struct spu_itype CUFLT, FRDS, // xfloat_tag last + CFLTS, + CFLTU, + FCEQ, + FCMEQ, + FCGT, + FCMGT, // floating_tag last + FSCRWR, + FSCRRD, + DFA, DFS, DFM, @@ -167,20 +177,11 @@ struct spu_itype DFNMA, FESD, - CFLTS, - CFLTU, - FCEQ, - FCMEQ, - FCGT, - FCMGT, - FSCRWR, - FSCRRD, - DFCEQ, DFCMEQ, DFCGT, DFCMGT, - DFTSV, // floating_tag last + DFTSV, SHLH, // shiftrot_tag first SHLHI, @@ -248,10 +249,10 @@ struct spu_itype return value >= BR && value <= BISL; } - // Test for floating point instruction + // Test for floating point instruction (32-bit float) friend constexpr bool operator &(type value, floating_tag) { - return value >= FMA && value <= DFTSV; + return value >= FMA && value <= FCMGT; } // Test for 4-op instruction @@ -301,8 +302,16 @@ struct spu_itype { return (value >= HEQ && value <= STQR) || (value >= BR && value <= BIHNZ); } + + // Test for instructions which always produce the same values as long as arguments and immediate values are equal + friend constexpr bool operator &(type value, pure_tag) + { + return (value >= ILH && value <= CLGTI); + } }; +using spu_itype_t = spu_itype::type; + struct spu_iflag { enum @@ -528,6 +537,8 @@ struct spu_iflag } }; +using spu_iflag_t = spu_iflag::flag; + #define NAME(x) static constexpr const char& x = *#x struct spu_iname diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 1b6003036b..fd0a8c33bf 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -52,6 +52,36 @@ struct span_less template inline constexpr span_less s_span_less{}; +template <> +void fmt_class_string::format(std::string& out, u64 arg) +{ + format_enum(out, arg, [](spu_recompiler_base::compare_direction arg) + { + switch (arg) + { + case spu_recompiler_base::CMP_SLESS: return "SLT"; + case spu_recompiler_base::CMP_SGREATER: return "SGT"; + case spu_recompiler_base::CMP_EQUAL: return "IEQ"; + case spu_recompiler_base::CMP_LLESS: return "ULT"; + case spu_recompiler_base::CMP_LGREATER: return "UGT"; + case spu_recompiler_base::CMP_SGREATER_EQUAL: return "SGE"; + case spu_recompiler_base::CMP_SLOWER_EQUAL: return "SLE"; + case spu_recompiler_base::CMP_NOT_EQUAL: return "INE"; + case spu_recompiler_base::CMP_LGREATER_EQUAL: return "UGE"; + case spu_recompiler_base::CMP_LLOWER_EQUAL: return "ULE"; + case spu_recompiler_base::CMP_UNKNOWN: + case spu_recompiler_base::CMP_NOT_EQUAL2: + case spu_recompiler_base::CMP_EQUAL2: + default: + { + break; + } + } + + return unknown; + }); +} + // Move 4 args for calling native function from a GHC calling convention function #if defined(ARCH_X64) static u8* move_args_ghc_to_native(u8* raw) @@ -2927,7 +2957,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (g_cfg.core.spu_block_size == spu_block_size_type::safe) { // Stop on special instructions (TODO) - m_targets[pos]; + m_targets[pos].push_back(SPU_LS_SIZE); next_block(); break; } @@ -2948,7 +2978,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.error("[0x%x] Invalid interrupt flags (DE)", pos); } - m_targets[pos]; + m_targets[pos].push_back(SPU_LS_SIZE); next_block(); break; } @@ -3002,7 +3032,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); } - if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + if (+iflags & +spu_iflag::use_rc) { is_no_return = is_no_return || (op_next.ra >= 4 && op_next.rb < 10); } @@ -3248,7 +3278,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe || is_no_return) { - m_targets[pos]; + m_targets[pos].push_back(SPU_LS_SIZE); } else { @@ -3261,6 +3291,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s else { m_targets[pos].push_back(pos + 4); + m_targets[pos].push_back(SPU_LS_SIZE); add_block(pos + 4); } @@ -3308,7 +3339,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); } - if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + if (+iflags & +spu_iflag::use_rc) { is_no_return = is_no_return || (op_next.rc >= 4 && op_next.rc < 10); } @@ -3834,17 +3865,25 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s continue; } + bool removed = false; + for (auto it2 = it->second.begin(); it2 != it->second.end();) { if (*it2 < lsa || *it2 >= limit) { it2 = it->second.erase(it2); + removed = true; continue; } it2++; } + if (removed) + { + it->second.emplace_back(SPU_LS_SIZE); + } + it++; } @@ -3895,7 +3934,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto type = g_spu_itype.decode(op.opcode); - u8 reg_save = 255; + u8 reg_save = s_reg_max; if (type == spu_itype::STQD && op.ra == s_reg_sp && !block.reg_mod[op.rt] && !block.reg_use[op.rt]) { @@ -3915,7 +3954,17 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Register reg use only if it happens before reg mod if (!block.reg_mod[reg]) { - block.reg_use.set(reg); + if (type & spu_itype::floating) + { + block.reg_maybe_float.set(reg); + } + + if (type == spu_itype::SHUFB && reg == op.rc) + { + block.reg_maybe_shuffle_mask.set(reg); + } + + block.reg_use[reg]++; if (reg_save != reg && block.reg_save_dom[reg]) { @@ -3932,7 +3981,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u8 reg : {s_reg_mfc_lsa, s_reg_mfc_tag, s_reg_mfc_size}) { if (!block.reg_mod[reg]) - block.reg_use.set(reg); + block.reg_use[reg]++; } } @@ -3986,7 +4035,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (i == s_reg_lr || (i >= 2 && i < s_reg_80) || i > s_reg_127) { if (!block.reg_mod[i]) - block.reg_use.set(i); + block.reg_use[i]++; if (!is_tail) { @@ -4863,19 +4912,24 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s return map; }; - struct putllc16_statistics_t + struct stats_t { atomic_t all = 0; atomic_t single = 0; - atomic_t nowrite = 0; std::array, 128> breaking_reason{}; }; - struct rchcnt_statistics_t + struct putllc16_statistics_t : stats_t + { + atomic_t nowrite = 0; + }; + + struct rchcnt_statistics_t : stats_t + { + }; + + struct reduced_statistics_t : stats_t { - atomic_t all = 0; - atomic_t single = 0; - std::array, 128> breaking_reason{}; }; // Pattern structures @@ -4987,6 +5041,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // RDCH/RCHCNT Loop analysis tracker rchcnt_loop_t rchcnt_loop{}; + reduced_loop_t reduced_loop{}; + block_reg_state_iterator(u32 _pc, usz _parent_iterator_index = umax, usz _parent_target_index = 0) noexcept : pc(_pc) , parent_iterator_index(_parent_iterator_index) @@ -4999,6 +5055,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s std::map atomic16_all; // RdAtomicStat location -> atomic loop optimization state std::map rchcnt_loop_all; // RDCH/RCHCNT location -> channel read loop optimization state + std::map reduced_loop_all; std::map getllar_starts; // True for failed loops std::map run_on_block; std::map logged_block; @@ -5007,6 +5064,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s atomic16_t dummy16{}; rchcnt_loop_t dummy_loop{}; + reduced_loop_t dummy_rloop{}; bool likely_putllc_loop = false; bool had_putllc_evaluation = false; @@ -5053,6 +5111,194 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 iterator_id_alloc = 0; + auto get_block_targets = [&](u32 pc) -> std::span + { + if (m_block_info[pc / 4] && m_bbs.count(pc)) + { + return ::at32(m_bbs, pc).targets; + } + + return {}; + }; + + auto get_block_preds = [&](u32 pc) -> std::span + { + if (m_block_info[pc / 4] && m_bbs.count(pc)) + { + return ::at32(m_bbs, pc).preds; + } + + return {}; + }; + + const auto initiate_patterns = [&](block_reg_state_iterator& block_state_it, u32 bpc, bool is_multi_block) + { + // Initiate patterns (that are initiated on block start) + const auto& bb_body = ::at32(m_bbs, bpc); + + bool invalid = bb_body.size <= 2; + bool valid = true; + + u32 expected_sup_conds = 0; + u32 first_pred_of_loop = SPU_LS_SIZE; + + for (u32 pred : get_block_preds(bpc)) + { + if (is_multi_block ? pred >= bpc : pred == bpc) + { + first_pred_of_loop = std::min(pred, first_pred_of_loop); + } + } + + valid = first_pred_of_loop != SPU_LS_SIZE; + + const auto& bb_connect = ::at32(m_bbs, valid ? first_pred_of_loop : bpc); + + invalid = invalid || !valid; + valid = false; + + // Check loop connector block (must jump to block-next or to loop-start) + u32 targets_count = 0; + + for (u32 target : get_block_targets(first_pred_of_loop)) + { + valid = true; + targets_count++; + + if (first_pred_of_loop == bpc) + { + continue; + } + + if (target != bpc) + { + if (target != first_pred_of_loop + bb_connect.size * 4) + { + invalid = true; + } + } + } + + if (targets_count > 2) + { + invalid = true; + } + + const bool is_two_block_loop = targets_count == 1; + + invalid = invalid || !valid; + valid = false; + + // Check loop body block (must jump to last-block or another location) + + for (u32 block_pc = bpc; !invalid;) + { + targets_count = 0; + + const u32 cond_next = block_pc + ::at32(m_bbs, block_pc).size * 4; + valid = false; + + bool is_end = false; + + for (u32 target : get_block_targets(block_pc)) + { + targets_count++; + + if (target == cond_next) + { + // Conditional branch + valid = true; + } + + if (target <= block_pc && target > bpc) + { + // Branch backwards + invalid = true; + } + + if (target == bpc) + { + is_end = true; + } + } + + // if (bpc != block_pc) + // { + // for (u32 pred : get_block_preds(block_pc)) + // { + // if (pred < bpc || pred > first_pred_of_loop + ::at32(m_bbs, first_pred_of_loop).size * 4) + // { + // invalid = true; + // break; + // } + // } + // } + + if (targets_count > 2) + { + invalid = true; + break; + } + + if (cond_next == first_pred_of_loop && is_two_block_loop) + { + valid = true; + break; + } + + if (!valid) + { + break; + } + + if (bpc == first_pred_of_loop || is_end) + { + break; + } + + if (targets_count == 2) + { + expected_sup_conds++; + } + + block_pc = cond_next; + } + + invalid = invalid || !valid; + + if (bb_body.size > 2 && !invalid) + { + // Early filtering of false positives + const spu_opcode_t op{std::bit_cast>(::at32(result.data, (bpc - entry_point) / 4 + bb_body.size - 2))}; + const spu_opcode_t op2{std::bit_cast>(::at32(result.data, (bpc - entry_point) / 4))}; + + switch (g_spu_itype.decode(op.opcode)) + { + case spu_itype::RDCH: invalid = op.ra != SPU_RdDec; break; + case spu_itype::RCHCNT: invalid = true; break; + default: break; + } + + switch (g_spu_itype.decode(op2.opcode)) + { + case spu_itype::RDCH: invalid = invalid || op2.ra != SPU_RdDec; break; + case spu_itype::RCHCNT: invalid = true; break; + default: break; + } + } + + if (valid && !invalid && !reduced_loop_all.count(bpc) && expected_sup_conds == 0) + { + const auto reduced_loop = &block_state_it.reduced_loop; + reduced_loop->discard(); + reduced_loop->active = true; + reduced_loop->loop_pc = bpc; + reduced_loop->loop_end = first_pred_of_loop; + reduced_loop->expected_sup_conds = expected_sup_conds; + reduced_loop->is_two_block_loop = is_two_block_loop; + } + }; + for (u32 wf = 0, wi = 0, wa = entry_point, bpc = wa; wf <= 1;) { const bool is_form_block = wf == 0; @@ -5121,6 +5367,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s auto& vregs = is_form_block ? infos[bpc]->local_state : *true_state_walkby; const auto atomic16 = is_pattern_match ? &::at32(reg_state_it, wi).atomic16 : &dummy16; const auto rchcnt_loop = is_pattern_match ? &::at32(reg_state_it, wi).rchcnt_loop : &dummy_loop; + const auto reduced_loop = &::at32(reg_state_it, wi).reduced_loop; const u32 pos = wa; @@ -5244,10 +5491,71 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } }; + const auto break_reduced_loop_pattern = [&](u32 cause, reduced_loop_t previous) + { + if (previous.active && previous.loop_pc != SPU_LS_SIZE && reduced_loop_all.count(previous.loop_pc) == 0) + { + g_fxo->get().breaking_reason[cause]++; + + if (!spu_log.notice) + { + return; + } + + previous.active = false; + previous.failed = true; + + reduced_loop_all[previous.loop_pc] = previous; + + std::string break_error = fmt::format("Reduced loop pattern breakage [%x cause=%u] (read_pc=0x%x)", pos, cause, previous.loop_pc); + + const auto values = sort_breakig_reasons(g_fxo->get().breaking_reason); + + std::string tracing = "Top Breaking Reasons:"; + + usz i = 0; + usz fail_count = 0; + bool switched_to_minimal = false; + + for (auto it = values.begin(); it != values.end(); i++, it++) + { + fail_count += it->second; + + if (i >= 12) + { + continue; + } + + if (i < 8 && it->second > 1) + { + fmt::append(tracing, " [cause=%u, n=%d]", it->first, it->second); + } + else + { + if (!std::exchange(switched_to_minimal, true)) + { + fmt::append(tracing, "; More:"); + } + + fmt::append(tracing, " %u", it->first); + } + } + + fmt::append(tracing, " of %d failures", fail_count); + spu_log.notice("%s\n%s", break_error, tracing); + + std::string block_dump; + this->dump(result, block_dump, previous.loop_pc, previous.loop_end + 1); + + spu_log.notice("SPU Block Dump:\n%s", block_dump); + } + }; + const auto break_all_patterns = [&](u32 cause) { break_putllc16(cause, atomic16->discard()); break_channel_pattern(cause, rchcnt_loop->discard()); + break_reduced_loop_pattern(cause, reduced_loop->discard()); }; const auto calculate_absolute_ls_difference = [](u32 addr1, u32 addr2) @@ -5309,16 +5617,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 stackframe_pc = SPU_LS_SIZE; usz entry_index = umax; - auto get_block_targets = [&](u32 pc) -> std::span - { - if (m_block_info[pc / 4] && m_bbs.count(pc)) - { - return m_bbs.at(pc).targets; - } - - return {}; - }; - u32 target_pc = SPU_LS_SIZE; bool insert_entry = false; bool is_code_backdoor = false; @@ -5508,7 +5806,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } - const u32 previous_pc = m_bbs.at(reg_state_it[stackframe_it].pc).size * 4 + reg_state_it[stackframe_it].pc - 4; + const u32 previous_pc = ::at32(m_bbs, reg_state_it[stackframe_it].pc).size * 4 + reg_state_it[stackframe_it].pc - 4; bool may_return = previous_pc + 4 != entry_point + result.data.size() * 4 && (m_ret_info[(previous_pc / 4) + 1] || m_entry_info[previous_pc / 4]); @@ -5537,6 +5835,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Backup analyser information const auto atomic16_info = reg_state_it[stackframe_it].atomic16; const auto rchcnt_loop_info = reg_state_it[stackframe_it].rchcnt_loop; + const auto reduced_loop_info = reg_state_it[stackframe_it].reduced_loop; // Clean from the back possible because it does not affect old indices // Technically should always do a full cleanup at the moment @@ -5562,6 +5861,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.trace("Emplacing: block_id=%d, pc=0x%x, target_it=%d/%d, new_pc=0x%x (has_it=%d)", reg_state_it[stackframe_it].iterator_id, stackframe_pc, entry_index + 1, target_size, target_pc, atomic16_info.active); auto& next = reg_state_it.emplace_back(target_pc, stackframe_it, 0); + initiate_patterns(next, target_pc, true); + if (!is_code_backdoor) { // Restore analyser information (if not an entry) @@ -5569,6 +5870,9 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (previous_pc != rchcnt_loop_info.branch_pc || target_pc == rchcnt_loop_info.branch_target) next.rchcnt_loop = rchcnt_loop_info; + + if (previous_pc + 4 == target_pc && reduced_loop_info.loop_pc != reduced_loop_info.loop_end && reduced_loop_info.active && target_pc <= reduced_loop_info.loop_end) + next.reduced_loop = reduced_loop_info; } else { @@ -5604,15 +5908,30 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (!infos.empty()) { - reg_state_it.emplace_back(::at32(infos, entry_point)->pc).iterator_id = iterator_id_alloc++;; + reg_state_it.emplace_back(::at32(infos, entry_point)->pc).iterator_id = iterator_id_alloc++; + + initiate_patterns(reg_state_it.back(), ::at32(infos, entry_point)->pc, true); } } } + const auto prev_wi = wi - 1; + if (prev_wi != umax && ::at32(reg_state_it, prev_wi).reduced_loop.active) + { + const auto reduced_loop = &::at32(reg_state_it, prev_wi).reduced_loop; + + for (const auto& [reg_num, reg] : reduced_loop->regs) + { + + } + } + if (wi < reg_state_it.size()) { wa = ::at32(reg_state_it, wi).pc; bpc = wa; + + initiate_patterns(::at32(reg_state_it, wi), bpc, false); } }; @@ -5737,7 +6056,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (!is_form_block) { // Call for external code - break_all_patterns(25); + break_putllc16(25, atomic16->discard()); + break_channel_pattern(25, rchcnt_loop->discard()); } } @@ -5762,6 +6082,143 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto op = spu_opcode_t{data}; const auto type = g_spu_itype.decode(data); + if (reduced_loop->active && !(type & spu_itype::zregmod)) + { + const u32 op_rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt; + + u32 ra = s_reg_max, rb = s_reg_max, rc = s_reg_max; + + if (::at32(m_use_ra, pos / 4)) + { + ra = op.ra; + } + + if (::at32(m_use_rb, pos / 4)) + { + rb = op.rb; + } + + if (::at32(m_use_rc, pos / 4)) + { + rc = op.rc; + } + + bool is_move_register_op = false; + + switch (type) + { + case spu_itype::SHLQBYI: + { + is_move_register_op = op.i7 == 0; + break; + } + // Technically only ORI is needed but I am taking into account possible third-party SPU compilers or hand-written assembly + case spu_itype::ORI: + case spu_itype::ORHI: + case spu_itype::ORBI: + case spu_itype::AI: + case spu_itype::AHI: + case spu_itype::XORI: + case spu_itype::XORHI: + case spu_itype::XORBI: + { + is_move_register_op = op.si10 == 0; + break; + } + case spu_itype::ANDI: + case spu_itype::ANDHI: + case spu_itype::ANDBI: + { + is_move_register_op = op.si10 == -1; + break; + } + default: + { + break; + } + } + + u32 reg_pos = SPU_LS_SIZE; + + auto org = reduced_loop->get_reg(op_rt); + + u32 reg_first = s_reg_max; + + for (u32 reg : {ra, rb, rc}) + { + if (reg != s_reg_max && reg != reg_first) + { + const auto arg = reduced_loop->find_reg(reg); + + if (arg && arg->modified >= 1) + { + reg_first = reg; + + if (reg_first != s_reg_max && !is_move_register_op) + { + // Multiple origins + org.add_instruction_modifier(spu_itype::UNK, op.opcode); + break; + } + } + } + } + + if (reg_first == s_reg_max) + { + org = {}; + + if (!is_move_register_op) + { + org.add_instruction_modifier(type, op.opcode); + } + } + else if (reg_first == rb) + { + std::swap(ra, rb); + } + else if (reg_first == rc) + { + std::swap(ra, rc); + } + + for (u32 reg : {ra, rb, rc}) + { + if (reg != s_reg_max) + { + const auto arg = reduced_loop->find_reg(reg); + + if (arg && reg != op_rt) + { + if (reg_first == reg) + { + org = *arg; + + if (!is_move_register_op) + { + org.add_instruction_modifier(type, op.opcode); + } + + continue; + } + + org.join_with_this(*arg); + } + else + { + org.add_register_origin(reg); + } + } + } + + *ensure(reduced_loop->find_reg(op_rt)) = org; + } + + if (reduced_loop->active && ((type & spu_itype::memory) || type == spu_itype::STOP || type == spu_itype::STOPD)) + { + reduced_loop->is_constant_expression = false; + } + // For debugging if (false && likely_putllc_loop && is_pattern_match) { @@ -5848,12 +6305,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } - if (type == spu_itype::SYNC) - { - // Remember - sync = true; - } - + break_reduced_loop_pattern(19, reduced_loop->discard()); break; } @@ -5880,8 +6332,57 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } + case spu_itype::BR: case spu_itype::BRA: { + if (reduced_loop->active) + { + if (!reduced_loop->is_two_block_loop || !reduced_loop->has_cond_state) + { + break_reduced_loop_pattern(20, reduced_loop->discard()); + break; + } + + for (const auto& [reg_num, reg] : reduced_loop->regs) + { + if (reg.is_loop_dictator(reg_num)) + { + if (reg.is_non_predictable_loop_dictator(reg_num)) + { + //break_reduced_loop_pattern(13, reduced_loop->discard()); + reduced_loop->is_constant_expression = false; + } + + reduced_loop->loop_dicts.set(reg_num); + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + const auto& b = ::at32(m_bbs, reduced_loop->loop_pc); + const auto& b2 = ::at32(m_bbs, bpc); + + if (!::at32(reduced_loop->loop_dicts, i)) + { + if (b.reg_use[i] || (!::at32(b.reg_mod, i) && b2.reg_use[i])) + { + if ((b.reg_use[i] && ::at32(b.reg_mod, i)) || ::at32(b2.reg_mod, i)) + { + reduced_loop->is_constant_expression = false; + reduced_loop->loop_writes.set(i); + } + else + { + reduced_loop->loop_args.set(i); + } + } + } + } + + reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop); + reduced_loop->discard(); + } + break; } @@ -5891,7 +6392,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const u32 next_pc = spu_branch_target(pos, 1); const u32 target = spu_branch_target(pos, op.i16); - if (rchcnt_loop->active) + while (rchcnt_loop->active) { const reg_state_t& rt = vregs[op.rt]; @@ -5907,16 +6408,663 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s rchcnt_loop->conditioned = true; rchcnt_loop->branch_pc = pos; rchcnt_loop->branch_target = rchcnt_loop->product_test_negate != (type == spu_itype::BRZ) ? target : next_pc; - break; } + + break; } - break; + [[fallthrough]]; } - case spu_itype::BR: case spu_itype::BRHZ: case spu_itype::BRHNZ: { + const u32 next_pc = spu_branch_target(pos, 1); + const u32 target = spu_branch_target(pos, op.i16); + + const bool is_u16_jump = type == spu_itype::BRHZ || type == spu_itype::BRHNZ; + const bool is_jump_zero = (type == spu_itype::BRZ || type == spu_itype::BRHZ) ^ reduced_loop->is_two_block_loop; + + while (reduced_loop->active) + { + if (reduced_loop->expected_sup_conds) + { + break_reduced_loop_pattern(50, reduced_loop->discard()); + break; + } + + const u32 op_rt = op.rt; + + const auto reg = reduced_loop->find_reg(op_rt); + + if (!reg/* || reg->modified == 0*/) // See special case regarding branch with direct comparison with 0 + { + break_reduced_loop_pattern(1, reduced_loop->discard()); + break; + } + + bool should_have_argument_dictator = false; + bool should_have_argument_increment = false; + bool cond_val_incr_before_cond = false; + bool ends_with_comparison = false; + + bool pattern_ok1 = true; + + switch (reg->mod1_type) + { + case spu_itype::A: + { + should_have_argument_increment = true; + [[fallthrough]]; + } + case spu_itype::AI: + case spu_itype::AHI: + { + cond_val_incr_before_cond = true; + pattern_ok1 = true; + break; + } + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + ends_with_comparison = true; + should_have_argument_dictator = true; + break; + } + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + ends_with_comparison = true; + pattern_ok1 = true; + break; + } + default: + { + if (reg->modified == 0) + { + // Special case: target may be sourced from another register which would be the loop dictator + break; + } + + pattern_ok1 = false; + break; + } + } + + if (!pattern_ok1) + { + break_reduced_loop_pattern(9, reduced_loop->discard()); + break; + } + + if (reg->modified >= 2) + { + switch (reg->mod2_type) + { + case spu_itype::A: + { + should_have_argument_increment = true; + [[fallthrough]]; + } + case spu_itype::AI: + case spu_itype::AHI: + { + if (cond_val_incr_before_cond) + { + // AI twice + break_reduced_loop_pattern(8, reduced_loop->discard()); + pattern_ok1 = false; + break; + } + + cond_val_incr_before_cond = false; + pattern_ok1 = true; + break; + } + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + if (!cond_val_incr_before_cond) + { + // Double comparison + break_reduced_loop_pattern(19, reduced_loop->discard()); + pattern_ok1 = false; + break; + } + + pattern_ok1 = true; + ends_with_comparison = true; + should_have_argument_dictator = true; + break; + } + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + if (!cond_val_incr_before_cond) + { + // Double comparison + break_reduced_loop_pattern(19, reduced_loop->discard()); + pattern_ok1 = false; + break; + } + + ends_with_comparison = true; + pattern_ok1 = true; + break; + } + default: + { + pattern_ok1 = false; + break; + } + } + } + + if (!pattern_ok1) + { + break_reduced_loop_pattern(10, reduced_loop->discard()); + break; + } + + bool found_loop_dictator = false; + bool found_loop_argument_for_dictator = false; + u32 null_regs_found = 0; + + for (u32 i = 0; i < reg->regs.size() && reduced_loop->active; i++) + { + if (::at32(reg->regs, i)) + { + if (0) if (i == op_rt || reg->modified == 0) + { + // Special case: direct comparison with zero for 32-bits (the only supported form by SPU) + + if (is_jump_zero) + { + // Infinite or single-time "loop" + break_reduced_loop_pattern(3, reduced_loop->discard()); + break; + } + + if (reg->modified >= 2) + { + break_reduced_loop_pattern(22, reduced_loop->discard()); + break; + } + + reduced_loop->cond_val_mask = u32{umax}; + reduced_loop->cond_val_min = 0; + reduced_loop->cond_val_size = u32{umax}; + + auto comp_reg = i == op_rt ? reg : reduced_loop->find_reg(i); + + if (!comp_reg || !comp_reg->is_predictable_loop_dictator(i)) + { + break_reduced_loop_pattern(4, reduced_loop->discard()); + break; + } + + ensure(reg->modified == 1 || i != op_rt); + + reduced_loop->cond_val_incr = static_cast(comp_reg->IMM); + reduced_loop->cond_val_incr_before_cond = reg->modified == 1; + reduced_loop->cond_val_register_idx = i; + reduced_loop->cond_val_compare = CMP_NOT_EQUAL; + reduced_loop->cond_val_is_immediate = true; + + found_loop_dictator = true; + break; + } + + auto reg_org = reduced_loop->find_reg(i); + u32 reg_index = i; + + if (reg_org && !cond_val_incr_before_cond && reg_org->modified == 0 && reg_org->regs.count() - 1u <= 1u && !::at32(reg_org->regs, i)) + { + for (u32 j = 0; j <= s_reg_127; j++) + { + if (::at32(reg_org->regs, j)) + { + if (const auto reg_found = reduced_loop->find_reg(j)) + { + if (reg_found->modified) + { + reg_org = reg_found; + reg_index = j; + break; + } + } + } + } + } + + if (!reg_org || reg_org->is_null(reg_index)) + { + // if (found_loop_dictator && !reduced_loop->cond_val_incr_is_immediate) + // { + // ensure(reduced_loop->cond_val_incr < s_reg_max); + + // } + // if (!should_have_argument_dictator) + // { + // break_reduced_loop_pattern(11, reduced_loop->discard()); + // break; + // } + + // if (found_loop_argument_for_dictator) + // { + // break_reduced_loop_pattern(6, reduced_loop->discard()); + // break; + // } + + // found_loop_argument_for_dictator = true; + // reduced_loop->cond_val_is_immediate = false; + + // if (found_loop_dictator) + // { + // ensure(i == reduced_loop->cond_val_register_argument_idx); + // } + // else + // { + // reduced_loop->cond_val_register_argument_idx = i; + // } + + // if (found_loop_dictator && reg->regs.count() == 2) + // { + // break; + // } + + null_regs_found++; + continue; + } + + if (found_loop_dictator) + { + break_reduced_loop_pattern(13, reduced_loop->discard()); + break; + } + + found_loop_dictator = true; + + if (!reg_org->is_predictable_loop_dictator(i)) + { + break_reduced_loop_pattern(7, reduced_loop->discard()); + break; + } + + u32 cond_val_incr = static_cast(reg_org->IMM); + + if (reg_org->mod1_type == spu_itype::AI || reg_org->mod1_type == spu_itype::AHI) + { + reduced_loop->cond_val_incr_is_immediate = true; + reduced_loop->cond_val_incr = static_cast(reg_org->IMM); + } + else if (reg_org->mod1_type == spu_itype::A) + { + reduced_loop->cond_val_incr_is_immediate = false; + + const u32 op_ra = spu_opcode_t{reg->IMM}.ra; + const u32 op_rb = spu_opcode_t{reg->IMM}.rb; + + if (!(op_ra == reg_index || op_rb == reg_index)) + { + break_reduced_loop_pattern(25, reduced_loop->discard()); + break; + } + + const u32 incr_arg_reg = reg_index == op_ra ? op_rb : op_ra; + + if (!reduced_loop->is_reg_null(incr_arg_reg)) + { + break_reduced_loop_pattern(26, reduced_loop->discard()); + break; + } + + reduced_loop->cond_val_incr = incr_arg_reg; + } + else + { + break_reduced_loop_pattern(28, reduced_loop->discard()); + break; + } + + reduced_loop->cond_val_incr_before_cond = cond_val_incr_before_cond; + + u64 cmp_mask = 0; + compare_direction cmp_direction{}; + + if (!ends_with_comparison) + { + if (is_jump_zero) + { + // Infinite or single-time "loop" + break_reduced_loop_pattern(3, reduced_loop->discard()); + break; + } + + cmp_mask = is_u16_jump ? u16{umax} : u32{umax}; + reduced_loop->cond_val_min = 0; + reduced_loop->cond_val_is_immediate = true; + cmp_direction = CMP_NOT_EQUAL; + } + else if (!should_have_argument_dictator) + { + reduced_loop->cond_val_min = reg->IMM; + reduced_loop->cond_val_is_immediate = true; + + const auto cmp_optype = reg->reverse1_type() == spu_itype::XSBH ? reg->reverse2_type() : reg->reverse1_type(); + + switch (cmp_optype) + { + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + { + cmp_direction = CMP_EQUAL; + break; + } + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + { + cmp_direction = CMP_SGREATER; + break; + } + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + cmp_direction = CMP_LGREATER; + break; + } + default: + { + break_reduced_loop_pattern(21, reduced_loop->discard()); + } + } + + switch (cmp_optype) + { + case spu_itype::CEQI: + case spu_itype::CGTI: + case spu_itype::CLGTI: + { + cmp_mask = u32{umax}; + break; + } + case spu_itype::CLGTHI: + case spu_itype::CEQHI: + case spu_itype::CGTHI: + { + cmp_mask = u16{umax}; + break; + } + case spu_itype::CEQBI: + case spu_itype::CGTBI: + case spu_itype::CLGTBI: + { + cmp_mask = u8{umax}; + break; + } + default: break_reduced_loop_pattern(21, reduced_loop->discard()); + } + + if (is_jump_zero) + { + cmp_direction = compare_direction{cmp_direction ^ CMP_NEGATE_FLAG}; + } + + if (cmp_direction == CMP_EQUAL2 || cmp_direction == CMP_NOT_EQUAL2) + { + // Fixup (no sense in remembering the turnaround for euqality comparison) + cmp_direction = compare_direction{cmp_direction & ~CMP_TURNAROUND_FLAG}; + } + } + else + { + const u32 op_ra = spu_opcode_t{reg->IMM}.ra; + const u32 op_rb = spu_opcode_t{reg->IMM}.rb; + + if (!(op_ra == reg_index || op_rb == reg_index)) + { + break_reduced_loop_pattern(20, reduced_loop->discard()); + break; + } + + const auto cmp_optype = reg->reverse1_type() == spu_itype::XSBH ? reg->reverse2_type() : reg->reverse1_type(); + + switch (cmp_optype) + { + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + { + cmp_direction = CMP_EQUAL; + break; + } + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + { + cmp_direction = CMP_SGREATER; + break; + } + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + cmp_direction = CMP_LGREATER; + break; + } + default: ensure(false); + } + + switch (cmp_optype) + { + case spu_itype::CEQ: + case spu_itype::CGT: + case spu_itype::CLGT: + { + cmp_mask = u32{umax}; + break; + } + case spu_itype::CLGTH: + case spu_itype::CEQH: + case spu_itype::CGTH: + { + cmp_mask = u16{umax}; + break; + } + case spu_itype::CEQB: + case spu_itype::CGTB: + case spu_itype::CLGTB: + { + cmp_mask = u8{umax}; + break; + } + default: ensure(false); + } + + if (op_ra != i) + { + // Compare is on the oppsoite direction + // This variation exists only via register mode (due to lack of SPU opcodes) + cmp_direction = compare_direction{cmp_direction ^ CMP_TURNAROUND_FLAG}; + } + + if (is_jump_zero) + { + cmp_direction = compare_direction{cmp_direction ^ CMP_NEGATE_FLAG}; + } + + if (cmp_direction == CMP_EQUAL2 || cmp_direction == CMP_NOT_EQUAL2) + { + // Fixup (no sense in remembering the turnaround for euqality comparison) + cmp_direction = compare_direction{cmp_direction & ~CMP_TURNAROUND_FLAG}; + } + + // The loop dictator is the register that is not the argument + const u32 loop_arg_reg = reg_index == op_ra ? op_rb : op_ra; + const u32 loop_dict_reg = reg_index == op_ra ? op_ra : op_rb; + reduced_loop->cond_val_is_immediate = false; + + if (found_loop_argument_for_dictator) + { + ensure(loop_arg_reg == reduced_loop->cond_val_register_argument_idx); + } + else + { + reduced_loop->cond_val_register_argument_idx = loop_arg_reg; + } + + if (!reduced_loop->is_reg_null(loop_arg_reg)) + { + break_reduced_loop_pattern(27, reduced_loop->discard()); + break; + } + + found_loop_argument_for_dictator = true; + } + + if (cmp_direction == CMP_EQUAL) + { + // Infinite or single-time "loop" + break_reduced_loop_pattern(18, reduced_loop->discard()); + break; + } + + if (cmp_mask == u16{umax} && !is_u16_jump) + { + break_reduced_loop_pattern(14, reduced_loop->discard()); + break; + } + + if (cmp_mask == u8{umax}) + { + bool instructions_ok = false; + + if (is_u16_jump) + { + // If ANDI(0xff) is used, although unlikely, it fine as well for 16-bits + instructions_ok = FN(x == spu_itype::XSBH || x == spu_itype::ANDI)(!cond_val_incr_before_cond ? reg->mod2_type : reg->mod3_type); + } + else + { + instructions_ok = FN(x == spu_itype::ANDI)(!cond_val_incr_before_cond ? reg->mod2_type : reg->mod3_type); + } + + if (!instructions_ok) + { + break_reduced_loop_pattern(15, reduced_loop->discard()); + break; + } + } + + reduced_loop->cond_val_compare = cmp_direction; + reduced_loop->cond_val_mask = cmp_mask; + reduced_loop->cond_val_register_idx = reg_index; + + // if (!should_have_argument_dictator && reg->regs.count() == 1) + // { + // break; + // } + + // if (found_loop_argument_for_dictator && reg->regs.count() == 2) + // { + // break; + // } + } + } + + if (!found_loop_dictator) + { + break_reduced_loop_pattern(16, reduced_loop->discard()); + } + + if (should_have_argument_dictator && !found_loop_argument_for_dictator) + { + break_reduced_loop_pattern(17, reduced_loop->discard()); + } + + if (reduced_loop->active) + { + ensure(reduced_loop->cond_val_register_idx != umax); + + if (reduced_loop->is_two_block_loop) + { + reduced_loop->has_cond_state = true; + break; + } + + for (const auto& [reg_num, reg] : reduced_loop->regs) + { + if (reg.is_loop_dictator(reg_num)) + { + if (reg.is_non_predictable_loop_dictator(reg_num)) + { + //break_reduced_loop_pattern(13, reduced_loop->discard()); + reduced_loop->is_constant_expression = false; + } + + reduced_loop->loop_dicts.set(reg_num); + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + const auto& b = ::at32(m_bbs, reduced_loop->loop_pc); + const auto& b2 = ::at32(m_bbs, bpc); + + if (!::at32(reduced_loop->loop_dicts, i)) + { + if (b.reg_use[i] || (!::at32(b.reg_mod, i) && b2.reg_use[i])) + { + if ((b.reg_use[i] && ::at32(b.reg_mod, i)) || ::at32(b2.reg_mod, i)) + { + reduced_loop->is_constant_expression = false; + reduced_loop->loop_writes.set(i); + } + else + { + reduced_loop->loop_args.set(i); + } + } + } + } + + reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop); + reduced_loop->discard(); + } + + break; + } + break; } @@ -5929,17 +7077,49 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s case spu_itype::HLGTI: case spu_itype::LNOP: case spu_itype::NOP: - case spu_itype::MTSPR: case spu_itype::FSCRWR: { // Do nothing break; } - + + case spu_itype::MTSPR: + { + break_all_patterns(99); + break; + } + case spu_itype::WRCH: { break_channel_pattern(56, rchcnt_loop->discard()); + if (reduced_loop->active) + { + switch (op.ra) + { + case MFC_EAL: + case MFC_LSA: + case MFC_TagID: + case MFC_Size: + case MFC_EAH: + case SPU_WrDec: + case SPU_WrSRR0: + case SPU_WrEventAck: + case SPU_Set_Bkmk_Tag: + case SPU_PM_Start_Ev: + case SPU_PM_Stop_Ev: + case MFC_WrTagMask: + { + break; + } + default: + { + break_reduced_loop_pattern(18, reduced_loop->discard()); + break; + } + } + } + switch (op.ra) { case MFC_EAL: @@ -6202,6 +7382,14 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const bool is_read = type == spu_itype::RDCH; bool invalidate = true; + if (!is_read || op.ra != SPU_RdDec) + { + if (reduced_loop->active) + { + break_reduced_loop_pattern(17, reduced_loop->discard()); + } + } + const auto it = rchcnt_loop_all.find(pos); if (it != rchcnt_loop_all.end()) @@ -7111,17 +8299,17 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 ra = s_reg_max, rb = s_reg_max, rc = s_reg_max; - if (m_use_ra.test(pos / 4)) + if (::at32(m_use_ra, pos / 4)) { ra = op.ra; } - if (m_use_rb.test(pos / 4)) + if (::at32(m_use_rb, pos / 4)) { rb = op.rb; } - if (type & spu_itype::_quadrop && m_use_rc.test(pos / 4)) + if (::at32(m_use_rc, pos / 4)) { rc = op.rc; } @@ -7169,6 +8357,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { for (u32 next_target : ::at32(m_targets, pos)) { + if (next_target == SPU_LS_SIZE) + { + continue; + } + add_block(next_target); } @@ -7353,6 +8546,66 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } + for (const auto& [loop_pc, pattern] : reduced_loop_all) + { + if (!pattern.active || pattern.loop_pc == SPU_LS_SIZE) + { + continue; + } + + if (inst_attr attr = m_inst_attrs[(loop_pc - entry_point) / 4]; attr == inst_attr::none) + { + const u64 hash = loop_pc / 4 + read_from_ptr>(func_hash.data()); + + add_pattern(inst_attr::reduced_loop, loop_pc - result.entry_point, 0, std::make_shared(pattern)); + + std::string regs = "{"; + + for (const auto& [reg_num, reg] : pattern.regs) + { + if (reg.is_loop_dictator(reg_num)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u", reg_num); + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + if (::at32(pattern.loop_writes, i)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u-w", i); + } + + if (::at32(pattern.loop_args, i)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u-r", i); + } + } + + regs += " }"; + + spu_log.success("Reduced Loop Pattern Detected! (REGS: %s, DICT: r%d, ARG: %s, Incr: %s (%s), CMP/Size: %s/%u, loop_pc=0x%x, 0x%x-%s)", regs, pattern.cond_val_register_idx + , pattern.cond_val_is_immediate ? fmt::format("0x%x", pattern.cond_val_min) : fmt::format("r%d", pattern.cond_val_register_argument_idx) + , pattern.cond_val_incr_is_immediate ? fmt::format("%d", static_cast(pattern.cond_val_incr)) : fmt::format("r%d", pattern.cond_val_incr), pattern.cond_val_incr_before_cond ? "BEFORE" : "AFTER" + , pattern.cond_val_compare, std::popcount(pattern.cond_val_mask), loop_pc, entry_point, func_hash); + } + } + if (likely_putllc_loop && !had_putllc_evaluation) { spu_log.notice("Likely missed PUTLLC16 patterns. (entry=0x%x)", entry_point); @@ -7363,7 +8616,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback } - if (!m_patterns.empty()) + if (!m_patterns.empty() && g_cfg.core.spu_debug) { std::string out_dump; dump(result, out_dump); @@ -7386,7 +8639,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s return result; } -void spu_recompiler_base::dump(const spu_program& result, std::string& out) +void spu_recompiler_base::dump(const spu_program& result, std::string& out, u32 block_min, u32 block_max) { SPUDisAsm dis_asm(cpu_disasm_mode::dump, reinterpret_cast(result.data.data()), result.lower_bound); @@ -7409,10 +8662,18 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out) hash = "N/A"; } - fmt::append(out, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", result.entry_point, result.data.size(), hash); + if (block_min == 0) + { + fmt::append(out, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", result.entry_point, result.data.size(), hash); + } for (auto& bb : m_bbs) { + if (bb.first < block_min || bb.first >= block_max) + { + continue; + } + if (m_block_info[bb.first / 4]) { fmt::append(out, "A: [0x%05x] %s [%s]\n", bb.first, m_entry_info[bb.first / 4] ? (m_ret_info[bb.first / 4] ? "Chunk" : "Entry") : "Block", spu_block_hash{(hash_start & -65536) + bb.first / 4}); @@ -8435,9 +9696,9 @@ std::array& block_reg_info::evaluate_start_state(const s return walkby_state; } -void spu_recompiler_base::add_pattern(inst_attr attr, u32 start, u64 info) +void spu_recompiler_base::add_pattern(inst_attr attr, u32 start, u64 info, std::shared_ptr info_ptr) { - m_patterns[start] = pattern_info{info}; + m_patterns[start] = pattern_info{info, info_ptr}; m_inst_attrs[start / 4] = attr; } diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 856a039e5e..b13c27e376 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -132,6 +132,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator llvm::MDNode* m_md_unlikely; llvm::MDNode* m_md_likely; + llvm::MDNode* m_md_spu_memory_domain; + llvm::MDNode* m_md_spu_context_domain; struct block_info { @@ -139,7 +141,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator spu_recompiler_base::block_info* bb{}; // Current block's entry block - llvm::BasicBlock* block; + llvm::BasicBlock* block{}; // Final block (for PHI nodes, set after completion) llvm::BasicBlock* block_end{}; @@ -155,6 +157,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Store instructions std::array store{}; + bool block_wide_reg_store_elimination = false; // Store reordering/elimination protection std::array store_context_last_id = fill_array(0); // Protects against illegal forward ordering @@ -174,6 +177,11 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const usz first_id = store_context_first_id[i]; return counter != 1 && first_id != umax && counter < first_id; } + + bool is_gpr_not_NaN_hint(u32 i) const noexcept + { + return block_wide_reg_store_elimination && ::at32(bb->reg_maybe_float, i) && ::at32(bb->reg_use, i) >= 3 && !::at32(bb->reg_mod, i); + } }; struct function_info @@ -364,7 +372,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { if (i != s_reg_lr && i != s_reg_sp && (i < s_reg_80 || i > s_reg_127)) { - m_block->reg[i] = m_ir->CreateLoad(get_reg_type(i), init_reg_fixed(i)); + m_block->reg[i] = get_reg_fixed(i, get_reg_type(i)); } } @@ -549,6 +557,40 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return _ptr(m_thread, ::offset32(offset_args...)); } + template + T* spu_mem_attr(T* inst) + { + if (auto load_inst = llvm::dyn_cast(inst)) + { + load_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_context_domain); + load_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_memory_domain); + } + else if (auto store_inst = llvm::dyn_cast(inst)) + { + store_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_context_domain); + store_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_memory_domain); + } + + return inst; + } + + template + T* spu_context_attr(T* inst) + { + if (auto load_inst = llvm::dyn_cast(inst)) + { + load_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_context_domain); + load_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_memory_domain); + } + else if (auto store_inst = llvm::dyn_cast(inst)) + { + store_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_context_domain); + store_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_memory_domain); + } + + return inst; + } + // Return default register type llvm::Type* get_reg_type(u32 index) { @@ -709,8 +751,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (!reg) { + if (m_block && m_block->block_wide_reg_store_elimination) + { + fmt::throw_exception("Unexpected load: [%s] at 0x%x (gpr=r%d)", m_hash, m_pos, index); + } + // Load register value if necessary reg = m_finfo && m_finfo->load[index] ? m_finfo->load[index] : m_ir->CreateLoad(get_reg_type(index), init_reg_fixed(index)); + spu_context_attr(reg); } if (reg->getType() == get_type()) @@ -920,6 +968,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (m_block) { + if (m_block->block_wide_reg_store_elimination) + { + // Don't save registers for the current block iteration + // Affected optimizations: + // 1. Single-block reduced loop + return; + } + // Keep the store's location in history of gpr preservaions m_block->store_context_last_id[index] = m_block->store_context_ctr[index]; m_block->store_context_first_id[index] = std::min(m_block->store_context_first_id[index], m_block->store_context_ctr[index]); @@ -935,7 +991,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } // Write register to the context - _store = m_ir->CreateStore(is_xfloat ? double_to_xfloat(saved_value) : m_ir->CreateBitCast(value, get_reg_type(index)), addr); + _store = m_ir->CreateStore(is_xfloat ? double_to_xfloat(saved_value) : bitcast(value, get_reg_type(index)), addr); + + spu_context_attr(_store); } template @@ -1046,7 +1104,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Update PC for current or explicitly specified instruction address void update_pc(u32 target = -1) { - m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr(&spu_thread::pc))->setVolatile(true); + spu_context_attr(m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr(&spu_thread::pc)))->setVolatile(true); } // Call cpu_thread::check_state if necessary and return or continue (full check) @@ -1055,7 +1113,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto pstate = spu_ptr(&spu_thread::state); const auto _body = llvm::BasicBlock::Create(m_context, "", m_function); const auto check = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_ir->CreateLoad(get_type(), pstate, true), m_ir->getInt32(0)), _body, check, m_md_likely); + m_ir->CreateCondBr(m_ir->CreateICmpEQ(spu_context_attr(m_ir->CreateLoad(get_type(), pstate, true)), m_ir->getInt32(0)), _body, check, m_md_likely); m_ir->SetInsertPoint(check); update_pc(addr); @@ -1066,14 +1124,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (may_be_unsafe_for_savestate) { - m_ir->CreateStore(m_ir->getInt8(1), spu_ptr(&spu_thread::unsavable))->setVolatile(true); + spu_context_attr(m_ir->CreateStore(m_ir->getInt8(1), spu_ptr(&spu_thread::unsavable)))->setVolatile(true); } m_ir->CreateCall(m_test_state, {m_thread}); if (may_be_unsafe_for_savestate) { - m_ir->CreateStore(m_ir->getInt8(0), spu_ptr(&spu_thread::unsavable))->setVolatile(true); + spu_context_attr(m_ir->CreateStore(m_ir->getInt8(0), spu_ptr(&spu_thread::unsavable)))->setVolatile(true); } m_ir->CreateBr(_body); @@ -1509,6 +1567,16 @@ public: m_md_likely = llvm::MDTuple::get(m_context, {md_name, md_high, md_low}); m_md_unlikely = llvm::MDTuple::get(m_context, {md_name, md_low, md_high}); + const auto domain = llvm::MDNode::getDistinct(m_context, {llvm::MDString::get(m_context, "SPU_mem")}); + const auto scope = llvm::MDNode::get(m_context, {llvm::MDString::get(m_context, "SPU_mem_scope"), domain}); + + m_md_spu_memory_domain = llvm::MDNode::get(m_context, scope); + + const auto domain2 = llvm::MDNode::getDistinct(m_context, {llvm::MDString::get(m_context, "SPU_ctx")}); + const auto scope2 = llvm::MDNode::get(m_context, {llvm::MDString::get(m_context, "SPU_ctx_scope"), domain2}); + + m_md_spu_context_domain = llvm::MDNode::get(m_context, scope2); + // Initialize transform passes clear_transforms(); #ifdef ARCH_ARM64 @@ -1678,7 +1746,7 @@ public: // Emit state check const auto pstate = spu_ptr(&spu_thread::state); - m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type(), pstate), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely); + m_ir->CreateCondBr(m_ir->CreateICmpNE(spu_context_attr(m_ir->CreateLoad(get_type(), pstate)), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely); // Emit code check u32 check_iterations = 0; @@ -2059,6 +2127,43 @@ public: bool need_check = false; m_block->bb = &bb; + // [1gJ45f2-0x00a40]: 16.4982% (113258) + // [ZsQTud1-0x0924c]: 6.1202% (42014) + // [ZsQTud1-0x08e54]: 5.6610% (38862) + // [0000000-0x3fffc]: 4.3764% (30043) + // [Zh4tpJM-0x00bcc]: 3.7908% (26023) + // [CFt8hXu-0x063b8]: 3.6177% (24835) + // [8YJCUjv-0x0ad18]: 3.2417% (22254) + // [Try3XHn-0x0f018]: 2.3721% (16284) + // [s6ti9iu-0x07678]: 1.8464% (12675) + // [oyxkAPv-0x0c22c]: 1.7776% (12203) + // [Q0jLqH4-0x00324]: 1.6015% (10994) + static const std::array, 4> to_nop + { + { } + }; + + bool found_block = false; + + for (auto& [hash, pos] : to_nop) + { + if (m_hash.find(hash) <= 2 && baddr == pos) + { + found_block = true; + break; + } + } + + if (found_block) + { + for (u32 i = 0; i < 100; i++) + { + auto value = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::last_getllar_lsa)); + auto mod_val = m_ir->CreateFDiv(value, llvm::ConstantFP::get(value->getType(), 1.1 + i)); + m_ir->CreateStore(value, spu_ptr(&spu_thread::last_getllar_lsa)); + } + } + if (!bb.preds.empty()) { // Initialize registers and build PHI nodes if necessary @@ -2174,6 +2279,490 @@ public: check_state(baddr); } + const bool is_reduced_loop = m_inst_attrs[(baddr - start) / 4] == inst_attr::reduced_loop; + const auto reduced_loop_info = is_reduced_loop ? std::static_pointer_cast(ensure(m_patterns.at(baddr - start).info_ptr)) : nullptr; + + BasicBlock* block_optimization_phi_parent = nullptr; + const auto block_optimization_inner = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b-loop-it-0x%x", m_pos), m_function) : nullptr; + const auto block_optimization_exit_early = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b-loop-exit-0x%x", m_pos), m_function) : nullptr; + const auto block_optimization_next = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b2-0x%x", m_pos), m_function) : nullptr; + + std::array reduced_loop_phi_nodes{}; + std::array reduced_loop_init_regs{}; + + auto make_reduced_loop_condition = [&](llvm::BasicBlock* optimization_block, bool is_second_time, u32 reserve_iterations) + { + llvm::ICmpInst::Predicate compare{}; + + switch (reduced_loop_info->cond_val_compare) + { + case CMP_SLESS: compare = ICmpInst::ICMP_SLT; break; + case CMP_SGREATER: compare = ICmpInst::ICMP_SGT; break; + case CMP_EQUAL: compare = ICmpInst::ICMP_EQ; break; + case CMP_LLESS: compare = ICmpInst::ICMP_ULT; break; + case CMP_LGREATER: compare = ICmpInst::ICMP_UGT; break; + case CMP_SGREATER_EQUAL: compare = ICmpInst::ICMP_SGE; break; + case CMP_SLOWER_EQUAL: compare = ICmpInst::ICMP_SLE; break; + case CMP_NOT_EQUAL: compare = ICmpInst::ICMP_NE; break; + case CMP_LGREATER_EQUAL: compare = ICmpInst::ICMP_UGE; break; + case CMP_LLOWER_EQUAL: compare = ICmpInst::ICMP_ULE; break; + { + break; + } + case CMP_UNKNOWN: + case CMP_NOT_EQUAL2: + case CMP_EQUAL2: + default: + { + ensure(false); + break; + } + } + + llvm::Value* loop_dictator_before_adjustment{}; + llvm::Value* loop_dictator_after_adjustment{}; + + spu_opcode_t reg_target{}; + reg_target.rt = static_cast(reduced_loop_info->cond_val_register_idx); + + if (reg_target.rt != reduced_loop_info->cond_val_register_idx) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition register index: 0x%llx", reduced_loop_info->cond_val_register_idx); + } + + if (!m_block->reg[reg_target.rt]) + { + m_block->reg[reg_target.rt] = reduced_loop_init_regs[reg_target.rt]; + } + + switch (reduced_loop_info->cond_val_mask) + { + case u8{umax}: + { + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + case u16{umax}: + { + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + case u32{umax}: + { + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + case u64{umax}: + { + ensure(false); // TODO + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + default: + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition bit mask: 0x%llx", reduced_loop_info->cond_val_mask); + } + } + + const u32 type_bits = std::popcount(reduced_loop_info->cond_val_mask); + + llvm::Value* cond_val_incr = nullptr; + + if (reduced_loop_info->cond_val_incr_is_immediate) + { + cond_val_incr = m_ir->getIntN(type_bits, reduced_loop_info->cond_val_incr & reduced_loop_info->cond_val_mask); + } + else + { + spu_opcode_t reg_incr{}; + reg_incr.rt = static_cast(reduced_loop_info->cond_val_incr); + + if (reg_incr.rt != reduced_loop_info->cond_val_incr) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal increment arguemnt register index: 0x%llx", reduced_loop_info->cond_val_incr); + } + switch (reduced_loop_info->cond_val_mask) + { + case u8{umax}: + { + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + case u16{umax}: + { + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + case u32{umax}: + { + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + case u64{umax}: + { + ensure(false); // TODO + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + } + } + + if (reduced_loop_info->cond_val_incr_before_cond && !reduced_loop_info->cond_val_incr_before_cond_taken_in_account) + { + loop_dictator_after_adjustment = m_ir->CreateAdd(loop_dictator_before_adjustment, cond_val_incr); + } + else + { + loop_dictator_after_adjustment = loop_dictator_before_adjustment; + } + + llvm::Value* loop_argument = nullptr; + + if (reduced_loop_info->cond_val_is_immediate) + { + loop_argument = m_ir->CreateTrunc(m_ir->getInt64(reduced_loop_info->cond_val_min & reduced_loop_info->cond_val_mask), loop_dictator_before_adjustment->getType()); + } + else + { + spu_opcode_t reg_target2{}; + reg_target2.rt = static_cast(reduced_loop_info->cond_val_register_argument_idx); + + if (reg_target2.rt != reduced_loop_info->cond_val_register_argument_idx) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition arguemnt register index: 0x%llx", reduced_loop_info->cond_val_register_argument_idx); + } + + switch (reduced_loop_info->cond_val_mask) + { + case u8{umax}: + { + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + case u16{umax}: + { + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + case u32{umax}: + { + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + case u64{umax}: + { + ensure(false); // TODO + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + } + } + + llvm::Value* condition = nullptr; + + if (reserve_iterations == 1) + { + condition = m_ir->CreateICmp(compare, loop_dictator_after_adjustment, loop_argument); + } + // else if ((reduced_loop_info->cond_val_compare == CMP_LGREATER || (reduced_loop_info->cond_val_compare == CMP_LGREATER_EQUAL && reduced_loop_info->cond_val_is_immediate && reduced_loop_info->cond_val_incr)) && cond_val_incr->getSExtValue() < 0) + // { + // const auto cond_val_incr_multiplied = m_ir->CreateMul(cond_val_incr, reserve_iterations - 1); + // condition = m_ir->CreateICmp(compare, select(m_ir->CreateICmpUGE(cond_val_incr_multiplied, loop_dictator_after_adjustment), m_ir->CreateAdd(loop_dictator_after_adjustment, cond_val_incr_multiplied), m_ir->getIntN(type_bits, 0)), loop_argument); + // } + else + { + //debugtrap(); + + llvm::Value* prev_it = loop_dictator_after_adjustment; + + for (u32 i = 0; i < reserve_iterations; i++) + { + if (i) + { + prev_it = m_ir->CreateAdd(prev_it, cond_val_incr); + } + + const auto also_cond = m_ir->CreateICmp(compare, prev_it, loop_argument); + condition = condition ? m_ir->CreateAnd(condition, also_cond) : also_cond; + } + } + + if (!is_second_time) + { + for (u32 i = 0, count = 0, prev_i = umax;; i++) + { + const bool is_last = !(count <= 20 && i < s_reg_max); + + if (is_last || m_block->is_gpr_not_NaN_hint(i)) + { + count++; + + if (prev_i == umax) + { + if (!is_last) + { + prev_i = i; + continue; + } + + break; + } + + auto access_gpr = [&](u32 index) + { + spu_opcode_t op_arg{}; + op_arg.ra = index; + return get_vr(op_arg.ra); + }; + + // OR LSB to convert infinity to NaN + llvm::Value* arg1 = bitcast(access_gpr(prev_i) | splat(1)).eval(m_ir); + llvm::Value* arg2 = is_last ? arg1 : bitcast(access_gpr(i) | splat(1)).eval(m_ir); + + llvm::Value* acc = m_ir->CreateSExt(m_ir->CreateFCmpUNO(arg1, arg2), get_type()); + + // Pattern for PTEST + acc = m_ir->CreateBitCast(acc, get_type()); + + llvm::Value* elem = m_ir->CreateExtractElement(acc, u64{0}); + + for (u64 i = 1; i < 2; i++) + { + elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, i)); + } + + // Compare result with zero + const auto cond_nans = m_ir->CreateICmpEQ(elem, m_ir->getInt64(0)); + condition = m_ir->CreateAnd(cond_nans, condition); + prev_i = umax; + } + + if (is_last) + { + break; + } + } + } + + //condition = m_ir->getInt1(0); + + m_ir->CreateCondBr(condition, optimization_block, block_optimization_next); + }; + + if (is_reduced_loop) + { + for (u32 i = 0; i < s_reg_max; i++) + { + llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); + + if (i < reduced_loop_info->loop_dicts.size() && (reduced_loop_info->loop_dicts.test(i) || reduced_loop_info->loop_writes.test(i))) + { + // Connect registers which are used and then modified by the block + auto value = m_block->reg[i]; + + if (!value || value->getType() != type) + { + value = get_reg_fixed(i, type); + } + + reduced_loop_init_regs[i] = value; + } + else if (i < reduced_loop_info->loop_dicts.size() && reduced_loop_info->loop_args.test(i)) + { + // Load registers used as arguments of the loop + if (!m_block->reg[i]) + { + m_block->reg[i] = get_reg_fixed(i, type); + } + } + } + + const auto prev_insert_block = m_ir->GetInsertBlock(); + + block_optimization_phi_parent = prev_insert_block; + + make_reduced_loop_condition(block_optimization_inner, false, 2); + m_ir->SetInsertPoint(block_optimization_inner); + + for (u32 i = 0; i < s_reg_max; i++) + { + if (auto init_val = reduced_loop_init_regs[i]) + { + llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); + + const auto _phi = m_ir->CreatePHI(init_val->getType(), 2, fmt::format("reduced_0x%05x_r%u", baddr, i)); + _phi->addIncoming(init_val, prev_insert_block); + + reduced_loop_phi_nodes[i] = _phi; + m_block->reg[i] = _phi; + } + } + + m_block->block_wide_reg_store_elimination = true; + } + + // Instructions emitting optimizations: Loop iteration is not the last + m_pos = baddr; + + // Masked opcodde -> register modification times + std::map>> masked_times; + std::array reg_states{}; + u32 s_reg_state{1}; + + for (u32 iteration_emit = 0; is_reduced_loop; m_pos += 4) + { + if (m_pos != baddr && m_block_info[m_pos / 4] && reduced_loop_info->loop_end < m_pos) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(1) too early at 0x%x", m_pos); + } + + if (!(m_pos >= start && m_pos < end)) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(2) too early at 0x%x", m_pos); + } + + if (m_ir->GetInsertBlock()->getTerminator()) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(3) too early at 0x%x", m_pos); + } + + const u32 op = std::bit_cast>(func.data[(m_pos - start) / 4]); + const auto itype = g_spu_itype.decode(op); + + if (itype & spu_itype::branch) + { + bool branches_back = false; + + for (u32 dest : op_branch_targets(m_pos, spu_opcode_t{op})) + { + branches_back = branches_back || dest == baddr; + } + + if (!branches_back) + { + continue; + } + + iteration_emit++; + + if (iteration_emit < 2) + { + // Reset mpos (with fixup) + m_pos = baddr - 4; + continue; + } + + // Optimization block body + const auto block_inner = m_ir->GetInsertBlock(); + + std::array block_reg_results{}; + + for (u32 i = 0; i < s_reg_max; i++) + { + if (auto phi = reduced_loop_phi_nodes[i]) + { + const auto type = phi->getType() == get_type() ? get_type() : get_reg_type(i); + block_reg_results[i] = ensure(get_reg_fixed(i, type)); + phi->addIncoming(block_reg_results[i], block_inner); + } + } + + ensure(!!m_block->reg[reduced_loop_info->cond_val_register_idx]); + make_reduced_loop_condition(block_optimization_inner, true, 2); + m_ir->SetInsertPoint(block_optimization_next); + m_block->block_wide_reg_store_elimination = false; + + for (u32 i = 0; i < s_reg_max; i++) + { + if (const auto loop_value = block_reg_results[i]) + { + const auto phi = m_ir->CreatePHI(loop_value->getType(), 2, fmt::format("redres_0x%05x_r%u", baddr, i)); + + phi->addIncoming(loop_value, block_inner); + phi->addIncoming(reduced_loop_init_regs[i], block_optimization_phi_parent); + m_block->reg[i] = phi; + } + } + + + break; + } + + if (!op) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: [%s] Unexpected fallthrough to 0x%x (chunk=0x%x, entry=0x%x)", m_hash, m_pos, m_entry, m_function_queue[0]); + } + + const auto [reg_rt, reg_access, masked_op] = op_register_targets(m_pos, spu_opcode_t{op}); + + bool erased = false; + + const auto inst_times = std::array{reg_states[reg_access[0]], reg_states[reg_access[1]], reg_states[reg_access[2]]}; + + // Try to reuse the reult of the previous iteration (if argumnent registers have not been modified) + if (reg_rt < 128 && masked_times.count(masked_op) && masked_times[masked_op].first && m_inst_attrs[(m_pos - start) / 4] == inst_attr::none) + { + auto times = masked_times[masked_op].second; + + bool is_ok = true; + for (u32 regi = 0; regi < 3; regi++) + { + if (reg_access[regi] < 128 && times[regi] != inst_times[regi]) + { + is_ok = false; + } + } + + if (is_ok) + { + m_block->reg[reg_rt] = masked_times[masked_op].first; + erased = true; + } + } + + if (reg_rt < 128) + { + reg_states[reg_rt] = s_reg_state++; + } + + if (erased) + { + continue; + } + + m_next_op = 0; + + masked_times[masked_op] = {}; + + switch (m_inst_attrs[(m_pos - start) / 4]) + { + case inst_attr::putllc0: + { + putllc0_pattern(func, m_patterns.at(m_pos - start).info); + continue; + } + case inst_attr::putllc16: + { + putllc16_pattern(func, m_patterns.at(m_pos - start).info); + continue; + } + case inst_attr::omit: + { + // TODO + continue; + } + default: break; + } + + // Execute recompiler function (TODO) + (this->*decode(op))({op}); + + if (reg_rt < 128 && itype & spu_itype::pure && reg_rt != reg_access[0] && reg_rt != reg_access[1] && reg_rt != reg_access[2]) + { + masked_times[masked_op] = {ensure(m_block->reg[reg_rt]), inst_times}; + } + } + // Emit instructions for (m_pos = baddr; m_pos >= start && m_pos < end && !m_ir->GetInsertBlock()->getTerminator(); m_pos += 4) { @@ -2640,6 +3229,8 @@ public: m_ir->SetInsertPoint(ins); auto si = llvm::cast(m_ir->Insert(bs->clone())); + spu_context_attr(si); + if (b2->store[i] == nullptr) { // Protect against backwards ordering now @@ -2705,7 +3296,7 @@ public: continue; m_ir->SetInsertPoint(ins); - m_ir->Insert(bs->clone()); + m_ir->Insert(spu_context_attr(bs->clone())); } bs->eraseFromParent(); @@ -5756,11 +6347,59 @@ public: void CEQI(spu_opcode_t op) { + // CEQHI following a comparison instruction (compare-equal negation) + if (!m_interp_magn && !op.si10 && match_vr(op.ra, [&](auto c, auto MT) + { + using VT = typename decltype(MT)::type; + using VT_HALF = s16[8]; + + if (auto [ok, a, b] = match_expr(c, bitcast(sext(match() == match())) << 16 >> 16); ok && m_block->block_wide_reg_store_elimination) + { + set_vr(op.rt, bitcast(sext(a != b)) << 16 >> 16); + return true; + } + + if (auto [ok, a, b] = match_expr(c, sext(MT == MT)); ok) + { + set_vr(op.rt, sext(a != b)); + return true; + } + + return false; + })) + { + return; + } + set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); } void CEQHI(spu_opcode_t op) { + // CEQHI following a comparison instruction (compare-equal negation) + if (!m_interp_magn && !op.si10 && match_vr(op.ra, [&](auto c, auto MT) + { + using VT = typename decltype(MT)::type; + using VT_HALF = s8[16]; + + if (auto [ok, a, b] = match_expr(c, bitcast(sext(match() == match())) << 8 >> 8); ok && m_block->block_wide_reg_store_elimination) + { + set_vr(op.rt, bitcast(sext(a != b)) << 8 >> 8); + return true; + } + + if (auto [ok, a, b] = match_expr(c, sext(match() == match())); ok) + { + set_vr(op.rt, sext(a != b)); + return true; + } + + return false; + })) + { + return; + } + set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); } @@ -6337,8 +6976,13 @@ public: return eval(bitcast(min(bitcast(v),splat(0xff7fffff)))); } - value_t clamp_smax(value_t v) + value_t clamp_smax(value_t v, u32 gpr = s_reg_max) { + if (m_block && gpr < s_reg_max && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(gpr)) + { + return v; + } + if (m_use_avx512) { if (is_input_positive(v)) @@ -6358,16 +7002,6 @@ public: return eval(clamp_positive_smax(clamp_negative_smax(v))); } - // FMA favouring zeros - value_t xmuladd(value_t a, value_t b, value_t c) - { - const auto ma = eval(sext(fcmp_uno(a != fsplat(0.)))); - const auto mb = eval(sext(fcmp_uno(b != fsplat(0.)))); - const auto ca = eval(bitcast(bitcast(a) & mb)); - const auto cb = eval(bitcast(bitcast(b) & ma)); - return eval(fmuladd(ca, cb, c)); - } - // Checks for postive and negative zero, or Denormal (treated as zero) // If sign is +-1 check equality againts all sign bits bool is_spu_float_zero(v128 a, int sign = 0) @@ -6454,12 +7088,6 @@ public: set_vr(op.rt, frsqest(get_vr(op.ra))); } - template - static llvm_calli fcgt(T&& a, U&& b) - { - return {"spu_fcgt", {std::forward(a), std::forward(b)}}; - } - void FCGT(spu_opcode_t op) { if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) @@ -6468,11 +7096,8 @@ public: return; } - register_intrinsic("spu_fcgt", [&](llvm::CallInst* ci) + const auto fcgt = [&](value_t a, value_t b) { - const auto a = value(ci->getOperand(0)); - const auto b = value(ci->getOperand(1)); - const value_t ab[2]{a, b}; std::bitset<2> safe_int_compare(0); @@ -6504,6 +7129,16 @@ public: } } + if (m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.ra)) + { + safe_finite_compare.set(0); + } + + if (m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.rb)) + { + safe_finite_compare.set(1); + } + if (safe_int_compare.any()) { return eval(sext(bitcast(a) > bitcast(b))); @@ -6523,7 +7158,7 @@ public: const auto bi = eval(bitcast(b)); return eval(sext(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi))); - }); + }; set_vr(op.rt, fcgt(get_vr(op.ra), get_vr(op.rb))); } @@ -6620,12 +7255,6 @@ public: set_vr(op.rt, fa(get_vr(op.ra), get_vr(op.rb))); } - template - static llvm_calli fs(T&& a, U&& b) - { - return {"spu_fs", {std::forward(a), std::forward(b)}}; - } - void FS(spu_opcode_t op) { if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) @@ -6634,29 +7263,26 @@ public: return; } - register_intrinsic("spu_fs", [&](llvm::CallInst* ci) + const auto fs = [&](value_t a, value_t b) { - const auto a = value(ci->getOperand(0)); - const auto b = value(ci->getOperand(1)); - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { - const auto bc = clamp_smax(b); // for #4478 + const auto bc = clamp_smax(b, op.rb); // for #4478 return eval(a - bc); } else { return eval(a - b); } - }); + }; set_vr(op.rt, fs(get_vr(op.ra), get_vr(op.rb))); } - template - static llvm_calli fm(T&& a, U&& b) + template , typename W = llvm_place_stealer_t> + static auto fm(T&& a, U&& b, V&& a_not_nan = match_stealer(), W&& b_not_nan = match_stealer()) { - return llvm_calli{"spu_fm", {std::forward(a), std::forward(b)}}.set_order_equality_hint(1, 1); + return llvm_calli{"spu_fm", {std::forward(a), std::forward(b), a_not_nan, b_not_nan}}.set_order_equality_hint(1, 1, 2, 3); } void FM(spu_opcode_t op) @@ -6671,14 +7297,27 @@ public: { const auto a = value(ci->getOperand(0)); const auto b = value(ci->getOperand(1)); + const bool a_notnan = llvm::cast(ci->getOperand(2))->getZExtValue() != 0; + const bool b_notnan = llvm::cast(ci->getOperand(3))->getZExtValue() != 0; if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { - if (a.value == b.value) + if (a.value == b.value || (a_notnan && b_notnan)) { return eval(a * b); } + if (a_notnan) + { + const auto ma = sext(fcmp_uno(a != fsplat(0.))); + return eval(bitcast(bitcast(a * b) & ma)); + } + else if (b_notnan) + { + const auto mb = sext(fcmp_uno(b != fsplat(0.))); + return eval(bitcast(bitcast(a * b) & mb)); + } + const auto ma = sext(fcmp_uno(a != fsplat(0.))); const auto mb = sext(fcmp_uno(b != fsplat(0.))); return eval(bitcast(bitcast(a * b) & ma & mb)); @@ -6689,10 +7328,13 @@ public: } }); + const u32 a_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; + const u32 b_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; + if (op.ra == op.rb && !m_interp_magn) { const auto a = get_vr(op.ra); - set_vr(op.rt, fm(a, a)); + set_vr(op.rt, fm(a, a, splat(a_notnan), splat(a_notnan))); return; } @@ -6731,7 +7373,7 @@ public: } } - set_vr(op.rt, fm(a, b)); + set_vr(op.rt, fm(a, b, splat(a_notnan), splat(b_notnan))); } template @@ -7024,10 +7666,10 @@ public: set_vr(op.rt4, fnms(get_vr(op.ra), get_vr(op.rb), get_vr(op.rc))); } - template - static llvm_calli fma(T&& a, U&& b, V&& c) + template , typename X = llvm_place_stealer_t> + static llvm_calli fma(T&& a, U&& b, V&& c, W&& d = match_stealer(), X&& e = match_stealer()) { - return llvm_calli{"spu_fma", {std::forward(a), std::forward(b), std::forward(c)}}.set_order_equality_hint(1, 1, 0); + return llvm_calli{"spu_fma", {std::forward(a), std::forward(b), std::forward(c), std::forward(d), std::forward(e)}}.set_order_equality_hint(1, 1, 2, 3, 4); } template @@ -7046,14 +7688,35 @@ public: return; } + register_intrinsic("spu_fma", [&](llvm::CallInst* ci) { const auto a = value(ci->getOperand(0)); const auto b = value(ci->getOperand(1)); const auto c = value(ci->getOperand(2)); - + const bool a_notnan = llvm::cast(ci->getOperand(3))->getZExtValue() != 0; + const bool b_notnan = llvm::cast(ci->getOperand(4))->getZExtValue() != 0; + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { + if (a.value == b.value || (a_notnan && b_notnan)) + { + return fma32x4(a, b, c); + } + + if (a_notnan) + { + const auto ma = sext(fcmp_uno(a != fsplat(0.))); + const auto cb = bitcast(bitcast(b) & ma); + return fma32x4(a, eval(cb), c); + } + else if (b_notnan) + { + const auto mb = sext(fcmp_uno(b != fsplat(0.))); + const auto ca = bitcast(bitcast(a) & mb); + return fma32x4(eval(ca), b, c); + } + const auto ma = sext(fcmp_uno(a != fsplat(0.))); const auto mb = sext(fcmp_uno(b != fsplat(0.))); const auto ca = bitcast(bitcast(a) & mb); @@ -7102,6 +7765,9 @@ public: const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); static const auto MT = match(); + const u32 a_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; + const u32 b_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; + auto check_sqrt_pattern_for_float = [&](f32 float_value) -> bool { auto match_fnms = [&](f32 float_value) @@ -7297,7 +7963,13 @@ public: spu_log.todo("[%s:0x%05x] Unmatched spu_rsqrte(c) found in FMA", m_hash, m_pos); } - set_vr(op.rt4, fma(a, b, c)); + if (!m_interp_magn && op.ra == op.rb) + { + set_vr(op.rt4, fma(a, a, c, splat(a_notnan), splat(a_notnan))); + return; + } + + set_vr(op.rt4, fma(a, b, c, splat(a_notnan), splat(b_notnan))); } template @@ -7720,13 +8392,13 @@ public: void make_store_ls(value_t addr, value_t data) { const auto bswapped = byteswap(data); - m_ir->CreateStore(bswapped.eval(m_ir), _ptr(m_lsptr, addr.value)); + spu_mem_attr(m_ir->CreateStore(bswapped.eval(m_ir), _ptr(m_lsptr, addr.value))); } auto make_load_ls(value_t addr) { value_t data; - data.value = m_ir->CreateLoad(get_type(), _ptr(m_lsptr, addr.value)); + data.value = spu_mem_attr(m_ir->CreateLoad(get_type(), _ptr(m_lsptr, addr.value))); return byteswap(data); } @@ -7741,12 +8413,18 @@ public: { data._u32[3] %= SPU_LS_SIZE; - if (data._u32[3] % 0x10 == 0) + if (const u32 remainder = data._u32[3] % 0x10; remainder == 0) { value_t addr = eval(splat(data._u32[3]) + zext(extract(pair.second, 3) & 0x3fff0)); make_store_ls(addr, get_vr(op.rt)); return; } + else + { + value_t addr = eval(splat(data._u32[3] - remainder) + zext((extract(pair.second, 3) + remainder) & 0x3fff0)); + make_store_ls(addr, get_vr(op.rt)); + return; + } } } @@ -7765,12 +8443,18 @@ public: { data._u32[3] %= SPU_LS_SIZE; - if (data._u32[3] % 0x10 == 0) + if (const u32 remainder = data._u32[3] % 0x10; remainder == 0) { value_t addr = eval(splat(data._u32[3]) + zext(extract(pair.second, 3) & 0x3fff0)); set_vr(op.rt, make_load_ls(addr)); return; } + else + { + value_t addr = eval(splat(data._u32[3] - remainder) + zext((extract(pair.second, 3) + remainder) & 0x3fff0)); + set_vr(op.rt, make_load_ls(addr)); + return; + } } } @@ -7824,13 +8508,51 @@ public: } } - value_t addr = eval(zext(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm(op.si10) << 4)); + const auto a = get_vr(op.ra); + + if (auto [ok, x, y] = match_expr(a, match() + match()); ok) + { + if (auto [ok1, data] = get_const_vector(x.value, m_pos + 1); ok1 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(y, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + make_store_ls(addr, get_vr(op.rt)); + return; + } + + if (auto [ok2, data] = get_const_vector(y.value, m_pos + 2); ok2 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(x, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + make_store_ls(addr, get_vr(op.rt)); + return; + } + } + + value_t addr = eval(zext(extract(a, 3) & 0x3fff0) + (get_imm(op.si10) << 4)); make_store_ls(addr, get_vr(op.rt)); } void LQD(spu_opcode_t op) { - value_t addr = eval(zext(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm(op.si10) << 4)); + const auto a = get_vr(op.ra); + + if (auto [ok, x1, y1] = match_expr(a, match() + match()); ok) + { + if (auto [ok1, data] = get_const_vector(x1.value, m_pos + 1); ok1 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(y1, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + set_vr(op.rt, make_load_ls(addr)); + return; + } + + if (auto [ok2, data] = get_const_vector(y1.value, m_pos + 2); ok2 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(x1, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + set_vr(op.rt, make_load_ls(addr)); + return; + } + } + + value_t addr = eval(zext(extract(a, 3) & 0x3fff0) + (get_imm(op.si10) << 4)); set_vr(op.rt, make_load_ls(addr)); } diff --git a/rpcs3/Emu/Cell/SPUOpcodes.h b/rpcs3/Emu/Cell/SPUOpcodes.h index cea4513e3f..42d76792a2 100644 --- a/rpcs3/Emu/Cell/SPUOpcodes.h +++ b/rpcs3/Emu/Cell/SPUOpcodes.h @@ -24,6 +24,20 @@ union spu_opcode_t bf_t i16; // 9..24 bf_t si16; // 9..24, signed bf_t i18; // 7..24 + + // For 16-bit instructions in the context of 32-bits + u32 duplicate_si10() const + { + const u32 _16 = static_cast(static_cast(si10)); + return (_16 << 16) | _16; + } + + // For 8-bit instructions in the context of 32-bits + u32 duplicate_duplicate_si10() const + { + const u32 _8 = static_cast(si10 & 0xff); + return (_8 << 24) | (_8 << 16) | (_8 << 8) | _8; + } }; constexpr u32 spu_branch_target(u32 pc, u32 imm = 0) @@ -42,6 +56,7 @@ constexpr u32 spu_decode(u32 inst) } std::array op_branch_targets(u32 pc, spu_opcode_t op); +std::tuple, u32> op_register_targets(u32 /*pc*/, spu_opcode_t op); // SPU decoder object. D provides functions. T is function pointer type returned. template diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 57d842e69d..54ddcb2f1e 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -4,12 +4,24 @@ #include "Utilities/lockless.h" #include "Utilities/address_range.h" #include "SPUThread.h" +#include "SPUAnalyser.h" #include #include #include #include #include +// std::bitset +template + requires requires(std::remove_cvref_t& x, T&& y) { x.count(); x.test(y); x.flip(y); } +[[nodiscard]] constexpr bool at32(CT&& container, T&& index, std::source_location src_loc = std::source_location::current()) +{ + const usz csv = container.size(); + if (csv <= std::forward(index)) [[unlikely]] + fmt::raw_range_error(src_loc, format_object_simplified(index), csv); + return container[std::forward(index)]; +} + // Helper class class spu_cache { @@ -201,6 +213,25 @@ public: __bitset_enum_max }; + enum compare_direction : u32 + { + CMP_TURNAROUND_FLAG = 0x1, + CMP_NEGATE_FLAG = 0x100, + CMP_SLESS = 0, + CMP_SGREATER = CMP_SLESS | CMP_TURNAROUND_FLAG, + CMP_EQUAL, + CMP_EQUAL2 = CMP_EQUAL | CMP_TURNAROUND_FLAG, + CMP_LLESS, + CMP_LGREATER = CMP_LLESS | CMP_TURNAROUND_FLAG, + CMP_SGREATER_EQUAL = CMP_SLESS | CMP_NEGATE_FLAG, + CMP_SLOWER_EQUAL = CMP_SGREATER | CMP_NEGATE_FLAG, + CMP_NOT_EQUAL = CMP_EQUAL | CMP_NEGATE_FLAG, + CMP_NOT_EQUAL2 = CMP_NOT_EQUAL | CMP_TURNAROUND_FLAG, + CMP_LGREATER_EQUAL = CMP_LLESS | CMP_NEGATE_FLAG, + CMP_LLOWER_EQUAL = CMP_LGREATER | CMP_NEGATE_FLAG, + CMP_UNKNOWN, + }; + struct reg_state_t { bs_t flag{+vf::is_null}; @@ -273,6 +304,399 @@ public: static u32 alloc_tag(bool reset = false) noexcept; }; + struct reduced_loop_t + { + bool active = false; // Single block loop detected + bool failed = false; + u32 loop_pc = SPU_LS_SIZE; + u32 loop_end = SPU_LS_SIZE; + + // False: single-block loop + // True: loop with a trailing block of aftermath (iteration update) stuff (like for (u32 i = 0; i < 10; /*update*/ i++)) + bool is_two_block_loop = false; + bool has_cond_state = false; + + // Loop stay-in state requirement + u64 cond_val_mask = umax; + u64 cond_val_min = 0; + u64 cond_val_size = 0; + compare_direction cond_val_compare{}; + u64 cond_val_incr = 0; + bool cond_val_incr_is_immediate = false; + u64 cond_val_register_argument_idx = umax; + u64 cond_val_register_idx = umax; + bool cond_val_incr_before_cond = false; + bool cond_val_incr_before_cond_taken_in_account = false; + bool cond_val_is_immediate = false; + + // Loop attributes + bool is_constant_expression = false; + bool is_secret = false; + + struct supplemental_condition_t + { + u64 immediate_value = umax; + u64 type_size = 0; + compare_direction val_compare{}; + }; + + // Supplemental loop condition: + // Inner conditions that depend on extrnal values (not produced inside the loop) + // all should evaluate to false in order for the optimization to work (at the moment) + // So succeeding can be treated linearly + u64 expected_sup_conds = 0; + u64 current_sup_conds_index = 0; + std::vector sup_conds; + + void take_cond_val_incr_before_cond_into_account() + { + if (cond_val_is_immediate && cond_val_incr_before_cond_taken_in_account && !cond_val_incr_before_cond_taken_in_account) + { + cond_val_min -= cond_val_incr; + cond_val_min &= cond_val_mask; + cond_val_incr_before_cond_taken_in_account = true; + } + } + + std::bitset loop_args; + std::bitset loop_dicts; + std::bitset loop_writes; + + struct origin_t + { + std::bitset regs{}; + u32 modified = 0; + spu_itype_t mod1_type = spu_itype::UNK; + spu_itype_t mod2_type = spu_itype::UNK; + spu_itype_t mod3_type = spu_itype::UNK; + u32 IMM = 0; + +private: + // Internal, please access using fixed order + spu_itype_t access_type(u32 i) const + { + if (i > modified) + { + return spu_itype::UNK; + } + + switch (i) + { + case 1: return mod1_type; + case 2: return mod2_type; + case 3: return mod3_type; + default: return spu_itype::UNK; + } + + return spu_itype::UNK; + } +public: + + spu_itype_t reverse1_type() + { + return access_type(modified); + } + + spu_itype_t reverse2_type() + { + return access_type(modified - 1); + } + + spu_itype_t reverse3_type() + { + return access_type(modified - 2); + } + + origin_t& join_with_this(const origin_t& rhs) + { + regs |= rhs.regs; + return *this; + } + + origin_t& join_with_this(u32 rhs) + { + regs.set(rhs); + return *this; + } + + origin_t& add_register_origin(u32 reg_val) + { + regs.set(reg_val); + return *this; + } + + bool is_single_reg_access(u32 reg_val) const + { + if (!modified) + { + return true; + } + + return regs.count() == 1 && ::at32(regs, reg_val); + } + + bool is_loop_dictator(u32 reg_val, bool test_predictable = false, bool should_predictable = true) const + { + if (!modified) + { + return false; + } + + if (regs.count() >= 1 && ::at32(regs, reg_val)) + { + if (!test_predictable) + { + return true; + } + + if (modified > 1) + { + return should_predictable ^ true; + } + + switch (mod1_type) + { + case spu_itype::A: + { + if (regs.count() == 2) + { + return should_predictable; + } + + return should_predictable ^ true; + } + case spu_itype::AI: + case spu_itype::AHI: + { + if (IMM && regs.count() == 1) + { + return should_predictable; + } + + return should_predictable ^ true; + } + default: break; + } + + return should_predictable ^ true; + } + + return false; + } + + bool is_predictable_loop_dictator(u32 reg_val) const + { + return is_loop_dictator(reg_val, true, true); + } + + bool is_non_predictable_loop_dictator(u32 reg_val) const + { + return is_loop_dictator(reg_val, true, false); + } + + bool is_null(u32 reg_val) const noexcept + { + if (modified) + { + return false; + } + + if (regs.count() - (::at32(regs, reg_val) ? 1 : 0)) + { + return false; + } + + return true; + } + + origin_t& add_instruction_modifier(spu_itype_t inst_type, u32 imm = 0) + { + if (inst_type == spu_itype::UNK) + { + mod1_type = spu_itype::UNK; + mod2_type = spu_itype::UNK; + mod3_type = spu_itype::UNK; + IMM = umax; + modified = 1; + return *this; + } + + if (modified == 1) + { + if (modified == 3) + { + mod1_type = spu_itype::UNK; + mod2_type = spu_itype::UNK; + mod3_type = spu_itype::UNK; + IMM = umax; + modified = 1; + return *this; + } + + bool is_ok = false; + switch (inst_type) + { + case spu_itype::XSBH: + { + const auto prev_type = modified == 1 ? mod1_type : mod2_type; + is_ok &= mod1_type == spu_itype::CEQB || mod1_type == spu_itype::CEQBI || mod1_type == spu_itype::CGTB || mod1_type == spu_itype::CGTBI || mod1_type == spu_itype::CLGTB || mod1_type == spu_itype::CLGTBI; + break; + } + case spu_itype::ANDI: + { + const auto prev_type = modified == 1 ? mod1_type : mod2_type; + is_ok &= mod1_type == spu_itype::CEQB || mod1_type == spu_itype::CEQBI || mod1_type == spu_itype::CGTB || mod1_type == spu_itype::CGTBI || mod1_type == spu_itype::CLGTB || mod1_type == spu_itype::CLGTBI; + is_ok &= (spu_opcode_t{imm}.si10 & 0xff) == 0xff; + break; + } + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + is_ok = modified == 1 && (mod1_type == spu_itype::AI || mod1_type == spu_itype::AHI); + IMM = imm; + break; + } + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + is_ok = modified == 1 && (mod1_type == spu_itype::AI || mod1_type == spu_itype::AHI); + IMM = spu_opcode_t{imm}.si10; + break; + } + } + + if (!is_ok) + { + mod1_type = spu_itype::UNK; + mod2_type = spu_itype::UNK; + mod3_type = spu_itype::UNK; + IMM = umax; + modified = 1; + return *this; + } + + (modified == 1 ? mod2_type : mod3_type) = inst_type; + modified++; + return *this; + } + + mod1_type = inst_type; + modified = 1; + + switch (inst_type) + { + case spu_itype::AHI: + { + IMM = spu_opcode_t{imm}.duplicate_si10(); + return *this; + } + case spu_itype::AI: + case spu_itype::ORI: + case spu_itype::XORI: + case spu_itype::ANDI: + + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + IMM = spu_opcode_t{imm}.si10; + return *this; + } + case spu_itype::ILA: + { + IMM = spu_opcode_t{imm}.i18; + return *this; + } + case spu_itype::IOHL: + case spu_itype::ILH: + case spu_itype::ILHU: + { + IMM = spu_opcode_t{imm}.i16; + return *this; + } + default: + { + IMM = imm; + break; + } + } + + return *this; + } + }; + + static origin_t make_reg(u32 reg_val) noexcept + { + origin_t org{}; + org.add_register_origin(reg_val); + return org; + } + + const origin_t* find_reg(u32 reg_val) const noexcept + { + for (auto& pair : regs) + { + if (pair.first == reg_val) + { + return &pair.second; + } + } + + return nullptr; + } + + origin_t* find_reg(u32 reg_val) noexcept + { + return const_cast(std::as_const(*this).find_reg(reg_val)); + } + + bool is_reg_null(u32 reg_val) const noexcept + { + if (const auto reg_found = find_reg(reg_val)) + { + return reg_found->is_null(reg_val); + } + + return true; + } + + origin_t get_reg(u32 reg_val) noexcept + { + const auto org = find_reg(reg_val); + return org ? *org : regs.emplace_back(reg_val, std::remove_reference_t{}).second; + } + + std::vector> regs; + + // Return old state for error reporting + reduced_loop_t discard() + { + const reduced_loop_t old = *this; + *this = reduced_loop_t{}; + return old; + } + }; + protected: spu_runtime* m_spurt{}; @@ -326,8 +750,14 @@ protected: // Set if the initial register value in this block may be xfloat std::bitset reg_maybe_xf{}; - // Bit mask of the registers used (before modified) - std::bitset reg_use{}; + // Set if register is used in floating pont instruction + std::bitset reg_maybe_float{}; + + // Set if register is used as shuffle mask + std::bitset reg_maybe_shuffle_mask{}; + + // Number of times registers are used (before modified) + std::array reg_use{}; // Bit mask of the trivial (u32 x 4) constant value resulting in this block std::bitset reg_const{}; @@ -391,18 +821,23 @@ protected: putllc16, putllc0, rchcnt_loop, + reduced_loop, }; std::vector m_inst_attrs; struct pattern_info { - u64 info; + // Info via integral + u64 info{}; + + // Info via additional erased-typed pointer + std::shared_ptr info_ptr; }; - std::unordered_map m_patterns; + std::map m_patterns; - void add_pattern(inst_attr attr, u32 start, u64 info); + void add_pattern(inst_attr attr, u32 start, u64 info, std::shared_ptr info_ptr = nullptr); private: // For private use @@ -435,7 +870,7 @@ public: spu_program analyse(const be_t* ls, u32 entry_point, std::map>* out_target_list = nullptr); // Print analyser internal state - void dump(const spu_program& result, std::string& out); + void dump(const spu_program& result, std::string& out, u32 block_min = 0, u32 block_max = SPU_LS_SIZE); // Get SPU Runtime spu_runtime& get_runtime() diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index b9a77d7696..60e0f99cca 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -495,7 +495,8 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write); extern thread_local u64 g_tls_fault_spu; -const spu_decoder s_spu_itype; +const extern spu_decoder g_spu_itype; +const extern spu_decoder g_spu_iflag; namespace vm { @@ -598,7 +599,7 @@ std::array op_branch_targets(u32 pc, spu_opcode_t op) { std::array res{spu_branch_target(pc + 4), umax}; - switch (const auto type = s_spu_itype.decode(op.opcode)) + switch (const auto type = g_spu_itype.decode(op.opcode)) { case spu_itype::BR: case spu_itype::BRA: @@ -639,6 +640,54 @@ std::array op_branch_targets(u32 pc, spu_opcode_t op) return res; } +std::tuple, u32> op_register_targets(u32 /*pc*/, spu_opcode_t op) +{ + std::tuple, u32> result{u32{umax}, std::array{128, 128, 128}, op.opcode}; + + const auto type = g_spu_itype.decode(op.opcode); + + if (type & spu_itype::zregmod) + { + std::get<2>(result) = 0; + return result; + } + + std::get<0>(result) = type & spu_itype::_quadrop ? op.rt4 : op.rt; + + spu_opcode_t op_masked = op; + + if (type & spu_itype::_quadrop) + { + op_masked.rt4 = 0; + } + else + { + op_masked.rt = 0; + } + + std::get<2>(result) = op_masked.opcode; + + if (auto iflags = g_spu_iflag.decode(op.opcode)) + { + if (+iflags & +spu_iflag::use_ra) + { + std::get<1>(result)[0] = op.ra; + } + + if (+iflags & +spu_iflag::use_rb) + { + std::get<1>(result)[1] = op.rb; + } + + if (+iflags & +spu_iflag::use_rc) + { + std::get<1>(result)[2] = op.rc; + } + } + + return result; +} + void spu_int_ctrl_t::set(u64 ints) { // leave only enabled interrupts @@ -988,7 +1037,7 @@ std::vector> spu_thread::dump_callstack_list() const passed[i / 4] = true; const spu_opcode_t op{_ref(i)}; - const auto type = s_spu_itype.decode(op.opcode); + const auto type = g_spu_itype.decode(op.opcode); if (start == 0 && type == spu_itype::STQD && op.ra == 1u && op.rt == 0u) { @@ -1090,11 +1139,62 @@ std::vector> spu_thread::dump_callstack_list() const return call_stack_list; } -std::string spu_thread::dump_misc() const +void spu_thread::dump_misc(std::string& ret, std::any& custom_data) const { - std::string ret = cpu_thread::dump_misc(); + cpu_thread::dump_misc(ret, custom_data); - fmt::append(ret, "Block Weight: %u (Retreats: %u)", block_counter, block_failure); + struct dump_misc_data_t + { + u32 cpu_id = umax; + u64 last_read_time = umax; + u64 last_block_counter = umax; + u64 update_count = 0; + + std::pair update(u64 current_block_counter, u64 current_timestamp = get_system_time()) + { + const u64 diff_time = current_timestamp <= last_read_time ? 0 : current_timestamp - last_read_time; + const u64 diff_block = current_block_counter <= last_block_counter ? 0 : current_block_counter - last_block_counter; + + if (last_read_time == umax || update_count >= 1000) + { + last_read_time = current_timestamp; + last_block_counter = current_block_counter; + update_count = 0; + } + else if (diff_time >= 100000 && diff_block >= 100) + { + // Update values to measure rate (but not fully so rate can be measured later) + last_read_time += diff_time / 10 * 9; + last_block_counter += diff_block / 10 * 9; + update_count++; + } + + return {diff_time, diff_block}; + } + }; + + dump_misc_data_t* func_data = std::any_cast(&custom_data); + + if (!func_data) + { + custom_data.reset(); + custom_data = std::make_any(); + func_data = ensure(std::any_cast(&custom_data)); + } + + if (func_data->cpu_id != this->id) + { + *func_data = {}; + func_data->cpu_id = this->id; + } + + const u64 current_block_counter = atomic_storage::load(block_counter); + + const auto [diff_time, diff_block] = func_data->update(current_block_counter); + + const u64 rate_of_diff = diff_block ? std::max(1, utils::rational_mul(diff_block, 1'000'000, std::max(diff_time, 1))) : 0; + + fmt::append(ret, "Block Weight: log10(%u/second): %.1f (Retreats: %u)", rate_of_diff, std::log10(std::max(rate_of_diff, 10)), block_failure); if (u64 hash = atomic_storage::load(block_hash)) { @@ -1145,8 +1245,6 @@ std::string spu_thread::dump_misc() const break; } } - - return ret; } void spu_thread::cpu_on_stop() @@ -3761,7 +3859,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add const u32 addr0 = spu_branch_target(addr); const spu_opcode_t op{read_from_ptr>(ls_ptr, addr0 - base_addr)}; - const auto type = s_spu_itype.decode(op.opcode); + const auto type = g_spu_itype.decode(op.opcode); if (type == spu_itype::UNK || !op.opcode) { @@ -3907,7 +4005,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add // Test the validity of a single instruction of the optional target // This function can't be too slow and is unlikely to improve results by a great deal const u32 op0 = read_from_ptr>(ls_ptr, route_pc - base_addr); - const spu_itype::type type0 = s_spu_itype.decode(op0); + const spu_itype::type type0 = g_spu_itype.decode(op0); if (type0 == spu_itype::UNK || !op0) { @@ -6878,7 +6976,7 @@ spu_exec_object spu_thread::capture_memory_as_elf(std::span>(all_data, pc0 - 4); // Try to find function entry (if they are placed sequentially search for BI $LR of previous function) - if (!op || op == 0x35000000u || s_spu_itype.decode(op) == spu_itype::UNK) + if (!op || op == 0x35000000u || g_spu_itype.decode(op) == spu_itype::UNK) { if (is_exec_code(pc0, { all_data.data(), SPU_LS_SIZE })) break; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 9596f7b006..889d6f291c 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -630,7 +630,7 @@ public: virtual void dump_regs(std::string&, std::any& custom_data) const override; virtual std::string dump_callstack() const override; virtual std::vector> dump_callstack_list() const override; - virtual std::string dump_misc() const override; + virtual void dump_misc(std::string& ret, std::any& custom_data) const override; virtual void cpu_task() override final; virtual void cpu_on_stop() override; virtual void cpu_return() override; diff --git a/rpcs3/Emu/Cell/lv2/sys_fs.cpp b/rpcs3/Emu/Cell/lv2/sys_fs.cpp index ffd31227a8..beec0fc4e4 100644 --- a/rpcs3/Emu/Cell/lv2/sys_fs.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_fs.cpp @@ -900,10 +900,8 @@ lv2_file::open_raw_result_t lv2_file::open_raw(const std::string& local_path, s3 switch (auto error = fs::g_tls_error) { case fs::error::noent: return {CELL_ENOENT}; - default: sys_fs.error("lv2_file::open(): unknown error %s", error); break; + default: fmt::throw_exception("unknown error %s", error); } - - return {CELL_EIO}; } if (flags & CELL_FS_O_MSELF && !verify_mself(file)) @@ -1374,8 +1372,7 @@ error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr path, vm::ptr fd) } default: { - sys_fs.error("sys_fs_opendir(): unknown error %s", error); - return {CELL_EIO, path}; + fmt::throw_exception("unknown error %s", error); } } } @@ -1597,8 +1594,7 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr } default: { - sys_fs.error("sys_fs_stat(): unknown error %s", error); - return {CELL_EIO, path}; + fmt::throw_exception("unknown error %s", error); } } } @@ -1732,10 +1728,8 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) { return {sys_fs.warning, CELL_EEXIST, path}; } - default: sys_fs.error("sys_fs_mkdir(): unknown error %s", error); + default: fmt::throw_exception("unknown error %s", error); } - - return {CELL_EIO, path}; // ??? } sys_fs.notice("sys_fs_mkdir(): directory %s created", path); @@ -1797,10 +1791,8 @@ error_code sys_fs_rename(ppu_thread& ppu, vm::cptr from, vm::cptr to { case fs::error::noent: return {CELL_ENOENT, from}; case fs::error::exist: return {CELL_EEXIST, to}; - default: sys_fs.error("sys_fs_rename(): unknown error %s", error); + default: fmt::throw_exception("unknown error %s", error); } - - return {CELL_EIO, from}; // ??? } sys_fs.notice("sys_fs_rename(): %s renamed to %s", from, to); @@ -1852,10 +1844,8 @@ error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr path) { case fs::error::noent: return {CELL_ENOENT, path}; case fs::error::notempty: return {CELL_ENOTEMPTY, path}; - default: sys_fs.error("sys_fs_rmdir(): unknown error %s", error); + default: fmt::throw_exception("unknown error %s", error); } - - return {CELL_EIO, path}; // ??? } sys_fs.notice("sys_fs_rmdir(): directory %s removed", path); @@ -1910,10 +1900,8 @@ error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr path) { return {mp == &g_mp_sys_dev_hdd1 ? sys_fs.warning : sys_fs.error, CELL_ENOENT, path}; } - default: sys_fs.error("sys_fs_unlink(): unknown error %s", error); + default: fmt::throw_exception("unknown error %s", error); } - - return {CELL_EIO, path}; // ??? } sys_fs.notice("sys_fs_unlink(): file %s deleted", path); @@ -2632,10 +2620,8 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr switch (auto error = fs::g_tls_error) { case fs::error::inval: return {CELL_EINVAL, "fd=%u, offset=0x%x, whence=%d", fd, offset, whence}; - default: sys_fs.error("sys_fs_lseek(): unknown error %s", error); + default: fmt::throw_exception("unknown error %s", error); } - - return CELL_EIO; // ??? } lock.unlock(); @@ -2751,10 +2737,8 @@ error_code sys_fs_get_block_size(ppu_thread& ppu, vm::cptr path, vm::ptr(ppu.test_stopped()); @@ -2809,10 +2793,8 @@ error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr path, u64 size) { return {mp == &g_mp_sys_dev_hdd1 ? sys_fs.warning : sys_fs.error, CELL_ENOENT, path}; } - default: sys_fs.error("sys_fs_truncate(): unknown error %s", error); + default: fmt::throw_exception("unknown error %s", error); } - - return {CELL_EIO, path}; // ??? } return CELL_OK; @@ -2858,10 +2840,8 @@ error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size) switch (auto error = fs::g_tls_error) { case fs::error::ok: - default: sys_fs.error("sys_fs_ftruncate(): unknown error %s", error); + default: fmt::throw_exception("unknown error %s", error); } - - return CELL_EIO; // ??? } return CELL_OK; @@ -3057,10 +3037,8 @@ error_code sys_fs_utime(ppu_thread& ppu, vm::cptr path, vm::cptr g_prx_list { "libddpdec.sprx", 0 }, { "libdivxdec.sprx", 0 }, { "libdmux.sprx", 0 }, - { "libdmuxpamf.sprx", 0 }, + { "libdmuxpamf.sprx", 1 }, { "libdtslbrdec.sprx", 0 }, { "libfiber.sprx", 0 }, { "libfont.sprx", 0 }, diff --git a/rpcs3/Emu/Io/LogitechG27.cpp b/rpcs3/Emu/Io/LogitechG27.cpp index 7ae996c864..3f07e92560 100644 --- a/rpcs3/Emu/Io/LogitechG27.cpp +++ b/rpcs3/Emu/Io/LogitechG27.cpp @@ -884,7 +884,7 @@ static s16 fetch_sdl_as_axis(SDL_Joystick* joystick, const sdl_mapping& mapping) return 0; } -static s16 fetch_sdl_axis_avg(std::map>& joysticks, const sdl_mapping& mapping) +static s16 fetch_sdl_axis_avg(const std::map>& joysticks, const sdl_mapping& mapping) { constexpr s16 MAX = 0x7FFF; constexpr s16 MIN = -0x8000; @@ -910,7 +910,7 @@ static s16 fetch_sdl_axis_avg(std::map>& joystic return std::clamp(sdl_joysticks_total_value / static_cast(joysticks_of_type->second.size()), MIN, MAX); } -static bool sdl_to_logitech_g27_button(std::map>& joysticks, const sdl_mapping& mapping) +static bool sdl_to_logitech_g27_button(const std::map>& joysticks, const sdl_mapping& mapping) { auto joysticks_of_type = joysticks.find(mapping.device_type_id); if (joysticks_of_type == joysticks.end()) @@ -931,21 +931,21 @@ static bool sdl_to_logitech_g27_button(std::map> return pressed; } -static u16 sdl_to_logitech_g27_steering(std::map>& joysticks, const sdl_mapping& mapping) +static u16 sdl_to_logitech_g27_steering(const std::map>& joysticks, const sdl_mapping& mapping) { const s16 avg = fetch_sdl_axis_avg(joysticks, mapping); const u16 unsigned_avg = avg + 0x8000; return unsigned_avg * (0xFFFF >> 2) / 0xFFFF; } -static u8 sdl_to_logitech_g27_pedal(std::map>& joysticks, const sdl_mapping& mapping) +static u8 sdl_to_logitech_g27_pedal(const std::map>& joysticks, const sdl_mapping& mapping) { const s16 avg = fetch_sdl_axis_avg(joysticks, mapping); const u16 unsigned_avg = avg + 0x8000; return unsigned_avg * 0xFF / 0xFFFF; } -void usb_device_logitech_g27::transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* transfer) const { DFEX_data data{}; ensure(buf_size >= sizeof(data)); @@ -979,7 +979,7 @@ void usb_device_logitech_g27::transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* std::memcpy(buf, &data, sizeof(data)); } -void usb_device_logitech_g27::transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* transfer) const { DFP_data data{}; ensure(buf_size >= sizeof(data)); @@ -1015,7 +1015,7 @@ void usb_device_logitech_g27::transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* t std::memcpy(buf, &data, sizeof(data)); } -void usb_device_logitech_g27::transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* transfer) const { DFGT_data data{}; ensure(buf_size >= sizeof(data)); @@ -1057,7 +1057,7 @@ void usb_device_logitech_g27::transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* std::memcpy(buf, &data, sizeof(data)); } -void usb_device_logitech_g27::transfer_g25(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_g25(u32 buf_size, u8* buf, UsbTransfer* transfer) const { G25_data data{}; ensure(buf_size >= sizeof(data)); @@ -1105,7 +1105,7 @@ void usb_device_logitech_g27::transfer_g25(u32 buf_size, u8* buf, UsbTransfer* t std::memcpy(buf, &data, sizeof(data)); } -void usb_device_logitech_g27::transfer_g27(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_g27(u32 buf_size, u8* buf, UsbTransfer* transfer) const { G27_data data{}; ensure(buf_size >= sizeof(data)); diff --git a/rpcs3/Emu/Io/LogitechG27.h b/rpcs3/Emu/Io/LogitechG27.h index dc68db68b9..ccb2f58908 100644 --- a/rpcs3/Emu/Io/LogitechG27.h +++ b/rpcs3/Emu/Io/LogitechG27.h @@ -121,11 +121,11 @@ public: private: void sdl_refresh(); void set_personality(logitech_personality personality, bool reconnect = false); - void transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* transfer); - void transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* transfer); - void transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* transfer); - void transfer_g25(u32 buf_size, u8* buf, UsbTransfer* transfer); - void transfer_g27(u32 buf_size, u8* buf, UsbTransfer* transfer); + void transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* transfer) const; + void transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* transfer) const; + void transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* transfer) const; + void transfer_g25(u32 buf_size, u8* buf, UsbTransfer* transfer) const; + void transfer_g27(u32 buf_size, u8* buf, UsbTransfer* transfer) const; u32 m_controller_index = 0; @@ -134,7 +134,7 @@ private: logitech_g27_sdl_mapping m_mapping {}; bool m_reverse_effects = false; - std::mutex m_sdl_handles_mutex; + mutable std::mutex m_sdl_handles_mutex; SDL_Joystick* m_led_joystick_handle = nullptr; SDL_Haptic* m_haptic_handle = nullptr; std::map> m_joysticks; diff --git a/rpcs3/Emu/Io/PadHandler.cpp b/rpcs3/Emu/Io/PadHandler.cpp index 1e22da3fc8..ccd81d2805 100644 --- a/rpcs3/Emu/Io/PadHandler.cpp +++ b/rpcs3/Emu/Io/PadHandler.cpp @@ -11,30 +11,15 @@ PadHandlerBase::PadHandlerBase(pad_handler type) : m_type(type) { } -std::vector> PadHandlerBase::find_key_combos(const std::unordered_map& map, const std::string& cfg_string, const std::string& fallback) +std::vector> PadHandlerBase::find_key_combos(const std::unordered_map& map, const std::string& cfg_string) { std::vector> key_codes; - const std::vector> combos = cfg_pad::get_buttons(cfg_string); - u32 def_code = umax; + const std::vector combos = cfg_pad::get_combos(cfg_string); - for (const std::vector& names : combos) + for (const pad::combo& combo : combos) { - std::set keys; - - for (const std::string& nam : names) - { - for (const auto& [code, name] : map) - { - if (name == nam) - { - keys.insert(code); - } - - if (!fallback.empty() && name == fallback) - def_code = code; - } - } + std::set keys = find_key_codes(map, combo); if (!keys.empty()) { @@ -42,39 +27,18 @@ std::vector> PadHandlerBase::find_key_combos(const std::unordered_ } } - if (!key_codes.empty()) - { - return key_codes; - } - - if (!fallback.empty()) - { - if (!combos.empty()) - input_log.error("FindKeyCode for [name = %s] returned with [def_code = %d] for [fallback = %s]", cfg_string, def_code, fallback); - - if (def_code != umax) - { - return {{ def_code }}; - } - } - - return {}; + return key_codes; } -std::vector> PadHandlerBase::find_key_combos(const std::unordered_map& map, const cfg::string& cfg_string, bool fallback) -{ - return find_key_combos(map, cfg_string.to_string(), fallback ? cfg_string.def : ""); -} - -std::set PadHandlerBase::find_key_codes(const std::unordered_map& map, const std::vector& names) +std::set PadHandlerBase::find_key_codes(const std::unordered_map& map, const pad::combo& combo) { std::set key_codes; - for (const std::string& name : names) + for (const std::string& button_name : combo.buttons()) { - for (const auto& [code, nam] : map) + for (const auto& [code, name] : map) { - if (nam == name) + if (button_name == name) { key_codes.insert(code); break; @@ -82,12 +46,7 @@ std::set PadHandlerBase::find_key_codes(const std::unordered_map m_pad_for_pad_settings; - // Search an unordered map for a string value and return the found combo - static std::vector> find_key_combos(const std::unordered_map& map, const std::string& cfg_string, const std::string& fallback); + // Search an unordered map for a string value and return the found combos + static std::vector> find_key_combos(const std::unordered_map& map, const std::string& cfg_string); - // Search an unordered map for a string value and return the found combo - static std::vector> find_key_combos(const std::unordered_map& map, const cfg::string& cfg_string, bool fallback = true); - - // Search an unordered map for string values and return the found key codes - static std::set find_key_codes(const std::unordered_map& map, const std::vector& names); + // Search an unordered map for a combo and return the found key codes + static std::set find_key_codes(const std::unordered_map& map, const pad::combo& combo); // Get normalized trigger value based on the range defined by a threshold u16 NormalizeTriggerInput(u16 value, u32 threshold) const; diff --git a/rpcs3/Emu/Io/pad_config.cpp b/rpcs3/Emu/Io/pad_config.cpp index ce64513659..7d042bddbc 100644 --- a/rpcs3/Emu/Io/pad_config.cpp +++ b/rpcs3/Emu/Io/pad_config.cpp @@ -5,15 +5,15 @@ extern std::string g_input_config_override; -std::vector> cfg_pad::get_buttons(std::string_view str) +std::vector cfg_pad::get_combos(std::string_view button_string) { - if (str.empty()) + if (button_string.empty()) return {}; // Handle special case: string contains separator itself as configured value (it's why I don't use fmt::split here) const auto split = [](std::string_view str, char sep) { - std::vector vec; + std::set buttons; bool was_sep = true; usz btn_start = 0ULL; usz i = 0ULL; @@ -27,7 +27,7 @@ std::vector> cfg_pad::get_buttons(std::string_view str) if (!was_sep) { was_sep = true; - vec.push_back(std::string(str.substr(btn_start, i - btn_start))); + buttons.insert(std::string(str.substr(btn_start, i - btn_start))); continue; } } @@ -40,54 +40,68 @@ std::vector> cfg_pad::get_buttons(std::string_view str) if (i == (str.size() - 1)) { - vec.push_back(std::string(str.substr(btn_start, i - btn_start + 1))); + buttons.insert(std::string(str.substr(btn_start, i - btn_start + 1))); } } - // Remove duplicates - std::sort(vec.begin(), vec.end()); - vec.erase(std::unique(vec.begin(), vec.end()), vec.end()); - - return vec; + return buttons; }; - std::vector> res; + std::vector combos; // Get all combos (seperated by ',') - const std::vector vec = split(str, ','); + const std::set combo_strings = split(button_string, ','); - for (const std::string& combo : vec) + for (const std::string& combo_string : combo_strings) { // Get all keys for this combo (seperated by '&') - std::vector keys = split(combo, '&'); - if (!keys.empty()) + std::set combo = split(combo_string, '&'); + if (!combo.empty()) { - res.push_back(std::move(keys)); + combos.push_back(pad::combo{std::move(combo)}); } } - return res; + return combos; } -std::string cfg_pad::get_buttons(std::vector> vec) +std::string cfg_pad::get_button_string(std::vector& combos) { - std::vector combos; + std::vector combo_strings; // Remove duplicates - std::sort(vec.begin(), vec.end()); - vec.erase(std::unique(vec.begin(), vec.end()), vec.end()); + std::sort(combos.begin(), combos.end()); + combos.erase(std::unique(combos.begin(), combos.end()), combos.end()); - for (std::vector& combo : vec) + for (const pad::combo& combo : combos) { - std::sort(combo.begin(), combo.end()); - combo.erase(std::unique(combo.begin(), combo.end()), combo.end()); - // Merge all keys for this combo (seperated by '&') - combos.push_back(fmt::merge(combo, "&")); + combo_strings.push_back(combo.to_string()); } // Merge combos (seperated by ',') - return fmt::merge(combos, ","); + return fmt::merge(combo_strings, ","); +} + +std::string cfg_pad::make_button_string(const std::unordered_map& button_list, const std::vector>& button_combos) +{ + std::vector combos; + + for (const std::set& button_combo : button_combos) + { + if (button_combo.empty()) continue; + + pad::combo combo {}; + + for (u32 button : button_combo) + { + combo.add_button(::at32(button_list, button)); + } + + combos.push_back(std::move(combo)); + } + + return get_button_string(combos); } u8 cfg_pad::get_motor_speed(VibrateMotor& motor, f32 multiplier) const diff --git a/rpcs3/Emu/Io/pad_config.h b/rpcs3/Emu/Io/pad_config.h index 07de4a7299..86779c0859 100644 --- a/rpcs3/Emu/Io/pad_config.h +++ b/rpcs3/Emu/Io/pad_config.h @@ -5,10 +5,47 @@ #include "Utilities/Config.h" #include +#include namespace pad { constexpr static std::string_view keyboard_device_name = "Keyboard"; + + struct combo + { + public: + combo() = default; + combo(std::set buttons) : m_buttons(std::move(buttons)) {} + + const std::set& buttons() const + { + return m_buttons; + } + + void add_button(const std::string& button) + { + if (button.empty()) return; + m_buttons.insert(button); + } + + std::string to_string() const + { + return fmt::merge(m_buttons, "&"); + } + + bool operator==(const combo& other) const + { + return m_buttons == other.m_buttons; + } + + bool operator<(const combo& other) const + { + return m_buttons < other.m_buttons; + } + + private: + std::set m_buttons; + }; } struct cfg_sensor final : cfg::node @@ -25,8 +62,9 @@ struct cfg_pad final : cfg::node cfg_pad() {}; cfg_pad(node* owner, const std::string& name) : cfg::node(owner, name) {} - static std::vector> get_buttons(std::string_view str); - static std::string get_buttons(std::vector> vec); + static std::vector get_combos(std::string_view button_string); + static std::string get_button_string(std::vector& combos); + static std::string make_button_string(const std::unordered_map& button_list, const std::vector>& button_combos); u8 get_motor_speed(VibrateMotor& motor, f32 multiplier) const; u8 get_large_motor_speed(std::array& motors) const; diff --git a/rpcs3/Emu/Io/recording_config.h b/rpcs3/Emu/Io/recording_config.h index 127d24015f..ef73149f5e 100644 --- a/rpcs3/Emu/Io/recording_config.h +++ b/rpcs3/Emu/Io/recording_config.h @@ -13,13 +13,13 @@ struct cfg_recording final : cfg::node node_video(cfg::node* _this) : cfg::node(_this, "Video") {} cfg::uint<0, 60> framerate{this, "Framerate", 30}; - cfg::uint<0, 7680> width{this, "Width", 1280}; - cfg::uint<0, 4320> height{this, "Height", 720}; + cfg::uint<640, 7680> width{this, "Width", 1280}; + cfg::uint<360, 4320> height{this, "Height", 720}; cfg::uint<0, 192> pixel_format{this, "AVPixelFormat", 0}; // AVPixelFormat::AV_PIX_FMT_YUV420P cfg::uint<0, 0xFFFF> video_codec{this, "AVCodecID", 12}; // AVCodecID::AV_CODEC_ID_MPEG4 - cfg::uint<0, 25000000> video_bps{this, "Video Bitrate", 4000000}; - cfg::uint<0, 5> max_b_frames{this, "Max B-Frames", 2}; - cfg::uint<0, 20> gop_size{this, "Group of Pictures Size", 12}; + cfg::uint<1'000'000, 60'000'000> video_bps{this, "Video Bitrate", 4'000'000}; + cfg::uint<0, 3> max_b_frames{this, "Max B-Frames", 2}; + cfg::uint<1, 120> gop_size{this, "Group of Pictures Size", 30}; } video{ this }; @@ -28,7 +28,7 @@ struct cfg_recording final : cfg::node node_audio(cfg::node* _this) : cfg::node(_this, "Audio") {} cfg::uint<0x10000, 0x17000> audio_codec{this, "AVCodecID", 86018}; // AVCodecID::AV_CODEC_ID_AAC - cfg::uint<0, 25000000> audio_bps{this, "Audio Bitrate", 320000}; + cfg::uint<64'000, 320'000> audio_bps{this, "Audio Bitrate", 192'000}; } audio{ this }; diff --git a/rpcs3/Emu/NP/np_cache.cpp b/rpcs3/Emu/NP/np_cache.cpp index 4731e0cede..4d70cacaa0 100644 --- a/rpcs3/Emu/NP/np_cache.cpp +++ b/rpcs3/Emu/NP/np_cache.cpp @@ -125,7 +125,7 @@ namespace np rooms[room_id].opt_param = *sce_opt_param; } - std::pair> cache_manager::get_slots(SceNpMatching2RoomId room_id) + std::pair> cache_manager::get_slots(SceNpMatching2RoomId room_id) const { std::lock_guard lock(mutex); @@ -134,7 +134,7 @@ namespace np return {SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND, {}}; } - const auto& room = rooms[room_id]; + const auto& room = ::at32(rooms, room_id); SceNpMatching2RoomSlotInfo slots{}; @@ -166,7 +166,7 @@ namespace np return {CELL_OK, slots}; } - std::pair> cache_manager::get_memberids(u64 room_id, s32 sort_method) + std::pair> cache_manager::get_memberids(u64 room_id, s32 sort_method) const { std::lock_guard lock(mutex); @@ -175,7 +175,7 @@ namespace np return {SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND, {}}; } - const auto& room = rooms[room_id]; + const auto& room = ::at32(rooms, room_id); std::vector vec_memberids; @@ -211,7 +211,7 @@ namespace np return {CELL_OK, vec_memberids}; } - std::pair> cache_manager::get_password(SceNpMatching2RoomId room_id) + std::pair> cache_manager::get_password(SceNpMatching2RoomId room_id) const { std::lock_guard lock(mutex); @@ -220,15 +220,17 @@ namespace np return {SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND, {}}; } - if (!rooms[room_id].owner) + const auto& room = ::at32(rooms, room_id); + + if (!room.owner) { return {SCE_NP_MATCHING2_ERROR_NOT_ALLOWED, {}}; } - return {CELL_OK, rooms[room_id].password}; + return {CELL_OK, room.password}; } - std::pair> cache_manager::get_opt_param(SceNpMatching2RoomId room_id) + std::pair> cache_manager::get_opt_param(SceNpMatching2RoomId room_id) const { std::lock_guard lock(mutex); @@ -237,10 +239,10 @@ namespace np return {SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND, {}}; } - return {CELL_OK, rooms[room_id].opt_param}; + return {CELL_OK, ::at32(rooms, room_id).opt_param}; } - error_code cache_manager::get_member_and_attrs(SceNpMatching2RoomId room_id, SceNpMatching2RoomMemberId member_id, const std::vector& binattrs_list, SceNpMatching2RoomMemberDataInternal* ptr_member, u32 addr_data, u32 size_data, bool include_onlinename, bool include_avatarurl) + error_code cache_manager::get_member_and_attrs(SceNpMatching2RoomId room_id, SceNpMatching2RoomMemberId member_id, const std::vector& binattrs_list, SceNpMatching2RoomMemberDataInternal* ptr_member, u32 addr_data, u32 size_data, bool include_onlinename, bool include_avatarurl) const { std::lock_guard lock(mutex); @@ -249,7 +251,7 @@ namespace np return SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND; } - if (!rooms[room_id].members.contains(member_id)) + if (!::at32(rooms, room_id).members.contains(member_id)) { return SCE_NP_MATCHING2_ERROR_ROOM_MEMBER_NOT_FOUND; } @@ -352,7 +354,7 @@ namespace np return not_an_error(needed_data_size); } - std::pair> cache_manager::get_npid(u64 room_id, u16 member_id) + std::pair> cache_manager::get_npid(u64 room_id, u16 member_id) const { std::lock_guard lock(mutex); @@ -371,7 +373,7 @@ namespace np return {CELL_OK, ::at32(::at32(rooms, room_id).members, member_id).userInfo.npId}; } - std::optional cache_manager::get_memberid(u64 room_id, const SceNpId& npid) + std::optional cache_manager::get_memberid(u64 room_id, const SceNpId& npid) const { std::lock_guard lock(mutex); diff --git a/rpcs3/Emu/NP/np_cache.h b/rpcs3/Emu/NP/np_cache.h index 8870f169a8..bd0bd8a736 100644 --- a/rpcs3/Emu/NP/np_cache.h +++ b/rpcs3/Emu/NP/np_cache.h @@ -74,16 +74,16 @@ namespace np void update_password(SceNpMatching2RoomId room_id, const std::optional& password); void update_opt_param(SceNpMatching2RoomId room_id, const SceNpMatching2SignalingOptParam* sce_opt_param); - std::pair> get_slots(SceNpMatching2RoomId room_id); - std::pair> get_memberids(u64 room_id, s32 sort_method); - std::pair> get_password(SceNpMatching2RoomId room_id); - std::pair> get_opt_param(SceNpMatching2RoomId room_id); - error_code get_member_and_attrs(SceNpMatching2RoomId room_id, SceNpMatching2RoomMemberId member_id, const std::vector& binattrs_list, SceNpMatching2RoomMemberDataInternal* ptr_member, u32 addr_data, u32 size_data, bool include_onlinename, bool include_avatarurl); - std::pair> get_npid(u64 room_id, u16 member_id); - std::optional get_memberid(u64 room_id, const SceNpId& npid); + std::pair> get_slots(SceNpMatching2RoomId room_id) const; + std::pair> get_memberids(u64 room_id, s32 sort_method) const; + std::pair> get_password(SceNpMatching2RoomId room_id) const; + std::pair> get_opt_param(SceNpMatching2RoomId room_id) const; + error_code get_member_and_attrs(SceNpMatching2RoomId room_id, SceNpMatching2RoomMemberId member_id, const std::vector& binattrs_list, SceNpMatching2RoomMemberDataInternal* ptr_member, u32 addr_data, u32 size_data, bool include_onlinename, bool include_avatarurl) const; + std::pair> get_npid(u64 room_id, u16 member_id) const; + std::optional get_memberid(u64 room_id, const SceNpId& npid) const; private: - shared_mutex mutex; + mutable shared_mutex mutex; std::map rooms; }; } // namespace np diff --git a/rpcs3/Emu/NP/np_contexts.cpp b/rpcs3/Emu/NP/np_contexts.cpp index f43d02c362..739a46368b 100644 --- a/rpcs3/Emu/NP/np_contexts.cpp +++ b/rpcs3/Emu/NP/np_contexts.cpp @@ -25,7 +25,7 @@ generic_async_transaction_context::~generic_async_transaction_context() } } -std::optional generic_async_transaction_context::get_transaction_status() +std::optional generic_async_transaction_context::get_transaction_status() const { std::lock_guard lock(mutex); return result; diff --git a/rpcs3/Emu/NP/np_contexts.h b/rpcs3/Emu/NP/np_contexts.h index 20e123730b..98ed36fa0a 100644 --- a/rpcs3/Emu/NP/np_contexts.h +++ b/rpcs3/Emu/NP/np_contexts.h @@ -20,12 +20,12 @@ struct generic_async_transaction_context generic_async_transaction_context(const SceNpCommunicationId& communicationId, const SceNpCommunicationPassphrase& passphrase, u64 timeout); - std::optional get_transaction_status(); + std::optional get_transaction_status() const; void abort_transaction(); error_code wait_for_completion(); void set_result_and_wake(error_code err); - shared_mutex mutex; + mutable shared_mutex mutex; std::condition_variable_any wake_cond, completion_cond; std::optional result; SceNpCommunicationId communicationId; diff --git a/rpcs3/Emu/NP/np_gui_cache.cpp b/rpcs3/Emu/NP/np_gui_cache.cpp index fee08356d5..1b85275d24 100644 --- a/rpcs3/Emu/NP/np_gui_cache.cpp +++ b/rpcs3/Emu/NP/np_gui_cache.cpp @@ -63,7 +63,7 @@ namespace np np_gui_cache.error("Cache mismatch: tried to remove a member but it wasn't in the room"); } - error_code gui_cache_manager::get_room_member_list(const SceNpRoomId& room_id, u32 buf_len, vm::ptr data) + error_code gui_cache_manager::get_room_member_list(const SceNpRoomId& room_id, u32 buf_len, vm::ptr data) const { std::lock_guard lock(mutex); diff --git a/rpcs3/Emu/NP/np_gui_cache.h b/rpcs3/Emu/NP/np_gui_cache.h index d742cc5749..431259df6b 100644 --- a/rpcs3/Emu/NP/np_gui_cache.h +++ b/rpcs3/Emu/NP/np_gui_cache.h @@ -45,10 +45,10 @@ namespace np void add_member(const SceNpRoomId& room_id, const SceNpMatchingRoomMember* user_info, bool new_member); void del_member(const SceNpRoomId& room_id, const SceNpMatchingRoomMember* user_info); - error_code get_room_member_list(const SceNpRoomId& room_id, u32 buf_len, vm::ptr data); + error_code get_room_member_list(const SceNpRoomId& room_id, u32 buf_len, vm::ptr data) const; private: - shared_mutex mutex; + mutable shared_mutex mutex; std::map rooms; }; } // namespace np diff --git a/rpcs3/Emu/NP/np_handler.cpp b/rpcs3/Emu/NP/np_handler.cpp index f1fda0752c..3eb2bef0ed 100644 --- a/rpcs3/Emu/NP/np_handler.cpp +++ b/rpcs3/Emu/NP/np_handler.cpp @@ -1228,16 +1228,22 @@ namespace np } auto messages = rpcn->get_new_messages(); - if (basic_handler_registered) + + for (const auto msg_id : messages) { - for (const auto msg_id : messages) + const auto opt_msg = rpcn->get_message(msg_id); + + if (!opt_msg) + { + continue; + } + + const auto& msg = opt_msg.value(); + const localized_string_id loc_id = (msg->second.mainType == SCE_NP_BASIC_MESSAGE_MAIN_TYPE_INVITE) ? localized_string_id::CELL_NP_MESSAGE_INVITE_RECEIVED : localized_string_id::CELL_NP_MESSAGE_OTHER_RECEIVED; + rsx::overlays::queue_message(get_localized_string(loc_id, msg->first.c_str()), 6'000'000); + + if (basic_handler_registered) { - const auto opt_msg = rpcn->get_message(msg_id); - if (!opt_msg) - { - continue; - } - const auto& msg = opt_msg.value(); if (strncmp(msg->second.commId.data, basic_handler.context.data, sizeof(basic_handler.context.data) - 1) == 0) { u32 event; @@ -1441,7 +1447,7 @@ namespace np return req_id; } - u32 np_handler::get_players_history_count(u32 options) + u32 np_handler::get_players_history_count(u32 options) const { const bool all_history = (options == SCE_NP_BASIC_PLAYERS_HISTORY_OPTIONS_ALL); @@ -1459,7 +1465,7 @@ namespace np })); } - bool np_handler::get_player_history_entry(u32 options, u32 index, SceNpId* npid) + bool np_handler::get_player_history_entry(u32 options, u32 index, SceNpId* npid) const { const bool all_history = (options == SCE_NP_BASIC_PLAYERS_HISTORY_OPTIONS_ALL); diff --git a/rpcs3/Emu/NP/np_handler.h b/rpcs3/Emu/NP/np_handler.h index 62b69a73ac..7e43bbf3cd 100644 --- a/rpcs3/Emu/NP/np_handler.h +++ b/rpcs3/Emu/NP/np_handler.h @@ -261,8 +261,8 @@ namespace np ticket get_clan_ticket() const; void add_player_to_history(const SceNpId* npid, const char* description); u32 add_players_to_history(const SceNpId* npids, const char* description, u32 count); - u32 get_players_history_count(u32 options); - bool get_player_history_entry(u32 options, u32 index, SceNpId* npid); + u32 get_players_history_count(u32 options) const; + bool get_player_history_entry(u32 options, u32 index, SceNpId* npid) const; SceNpMatching2MemoryInfo get_memory_info() const; error_code abort_request(u32 req_id); @@ -518,7 +518,7 @@ namespace np player_history& get_player_and_set_timestamp(const SceNpId& npid, u64 timestamp); void save_players_history(); - shared_mutex mutex_history; + mutable shared_mutex mutex_history; std::map players_history; // npid / history struct diff --git a/rpcs3/Emu/NP/rpcn_client.cpp b/rpcs3/Emu/NP/rpcn_client.cpp index 56cfabd855..96ab505abf 100644 --- a/rpcs3/Emu/NP/rpcn_client.cpp +++ b/rpcs3/Emu/NP/rpcn_client.cpp @@ -3180,7 +3180,7 @@ namespace rpcn } } - std::optional>> rpcn_client::get_message(u64 id) + std::optional>> rpcn_client::get_message(u64 id) const { { std::lock_guard lock(mutex_messages); @@ -3238,21 +3238,21 @@ namespace rpcn active_messages.erase(id); } - u32 rpcn_client::get_num_friends() + u32 rpcn_client::get_num_friends() const { std::lock_guard lock(mutex_friends); return ::size32(friend_infos.friends); } - u32 rpcn_client::get_num_blocks() + u32 rpcn_client::get_num_blocks() const { std::lock_guard lock(mutex_friends); return ::size32(friend_infos.blocked); } - std::optional rpcn_client::get_friend_by_index(u32 index) + std::optional rpcn_client::get_friend_by_index(u32 index) const { std::lock_guard lock(mutex_friends); @@ -3270,7 +3270,7 @@ namespace rpcn return it->first; } - std::optional> rpcn_client::get_friend_presence_by_index(u32 index) + std::optional> rpcn_client::get_friend_presence_by_index(u32 index) const { std::lock_guard lock(mutex_friends); @@ -3284,7 +3284,7 @@ namespace rpcn return std::optional(*it); } - std::optional> rpcn_client::get_friend_presence_by_npid(const std::string& npid) + std::optional> rpcn_client::get_friend_presence_by_npid(const std::string& npid) const { std::lock_guard lock(mutex_friends); const auto it = friend_infos.friends.find(npid); diff --git a/rpcs3/Emu/NP/rpcn_client.h b/rpcs3/Emu/NP/rpcn_client.h index 377cd9c898..daa60d90e7 100644 --- a/rpcs3/Emu/NP/rpcn_client.h +++ b/rpcs3/Emu/NP/rpcn_client.h @@ -242,7 +242,7 @@ namespace rpcn std::mutex mutex_packets_to_send; // Friends related - shared_mutex mutex_friends; + mutable shared_mutex mutex_friends; std::set> friend_cbs; friend_data friend_infos; @@ -304,11 +304,11 @@ namespace rpcn std::optional add_friend(const std::string& friend_username); bool remove_friend(const std::string& friend_username); - u32 get_num_friends(); - u32 get_num_blocks(); - std::optional get_friend_by_index(u32 index); - std::optional> get_friend_presence_by_index(u32 index); - std::optional> get_friend_presence_by_npid(const std::string& npid); + u32 get_num_friends() const; + u32 get_num_blocks() const; + std::optional get_friend_by_index(u32 index) const; + std::optional> get_friend_presence_by_index(u32 index) const; + std::optional> get_friend_presence_by_npid(const std::string& npid) const; std::vector>> get_notifications(); std::map>> get_replies(); @@ -316,7 +316,7 @@ namespace rpcn std::map get_presence_states(); std::vector get_new_messages(); - std::optional>> get_message(u64 id); + std::optional>> get_message(u64 id) const; std::vector>>> get_messages_and_register_cb(SceNpBasicMessageMainType type, bool include_bootable, message_cb_func cb_func, void* cb_param); void remove_message_cb(message_cb_func cb_func, void* cb_param); void mark_message_used(u64 id); @@ -445,7 +445,7 @@ namespace rpcn return (void_cb_func < void_other_cb_func) || ((!(void_other_cb_func < void_cb_func)) && (cb_param < other.cb_param)); } }; - shared_mutex mutex_messages; + mutable shared_mutex mutex_messages; std::set message_cbs; std::unordered_map>> messages; // msg id / (sender / message) std::set active_messages; // msg id of messages that have not been discarded diff --git a/rpcs3/Emu/NP/signaling_handler.cpp b/rpcs3/Emu/NP/signaling_handler.cpp index 85c9321e28..a4c59763cb 100644 --- a/rpcs3/Emu/NP/signaling_handler.cpp +++ b/rpcs3/Emu/NP/signaling_handler.cpp @@ -832,7 +832,7 @@ u32 signaling_handler::init_sig2(const SceNpId& npid, u64 room_id, u16 member_id return conn_id; } -std::optional signaling_handler::get_conn_id_from_npid(const SceNpId& npid) +std::optional signaling_handler::get_conn_id_from_npid(const SceNpId& npid) const { std::lock_guard lock(data_mutex); @@ -843,7 +843,7 @@ std::optional signaling_handler::get_conn_id_from_npid(const SceNpId& npid) return std::nullopt; } -std::optional signaling_handler::get_sig_infos(u32 conn_id) +std::optional signaling_handler::get_sig_infos(u32 conn_id) const { std::lock_guard lock(data_mutex); if (sig_peers.contains(conn_id)) @@ -852,7 +852,7 @@ std::optional signaling_handler::get_sig_infos(u32 conn_id) return std::nullopt; } -std::optional signaling_handler::get_conn_id_from_addr(u32 addr, u16 port) +std::optional signaling_handler::get_conn_id_from_addr(u32 addr, u16 port) const { std::lock_guard lock(data_mutex); diff --git a/rpcs3/Emu/NP/signaling_handler.h b/rpcs3/Emu/NP/signaling_handler.h index a3eb027fe1..c97c049d36 100644 --- a/rpcs3/Emu/NP/signaling_handler.h +++ b/rpcs3/Emu/NP/signaling_handler.h @@ -63,9 +63,9 @@ public: u32 init_sig1(const SceNpId& npid); u32 init_sig2(const SceNpId& npid, u64 room_id, u16 member_id); - std::optional get_sig_infos(u32 conn_id); - std::optional get_conn_id_from_npid(const SceNpId& npid); - std::optional get_conn_id_from_addr(u32 addr, u16 port); + std::optional get_sig_infos(u32 conn_id) const; + std::optional get_conn_id_from_npid(const SceNpId& npid) const; + std::optional get_conn_id_from_addr(u32 addr, u16 port) const; void add_sig_ctx(u32 ctx_id); void remove_sig_ctx(u32 ctx_id); @@ -128,7 +128,7 @@ private: void retire_all_packets(std::shared_ptr& si); void stop_sig_nl(u32 conn_id, bool forceful); - shared_mutex data_mutex; + mutable shared_mutex data_mutex; atomic_t wakey = 0; signaling_packet sig_packet{}; diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 4476930607..4b3aaa0605 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -132,7 +132,7 @@ namespace rsx free_rsx_memory(Traits::get(sink)); } - Traits::clone_surface(cmd, sink, region.source, new_address, region); + Traits::clone_surface(cmd, sink, region.source, new_address, region, region.source->resolution_scaling_config); allocate_rsx_memory(Traits::get(sink)); if (invalidated) [[unlikely]] @@ -398,6 +398,7 @@ namespace rsx surface_antialiasing antialias, usz width, usz height, usz pitch, u8 bpp, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { surface_storage_type old_surface_storage; @@ -448,7 +449,7 @@ namespace rsx } } - if (Traits::surface_matches_properties(surface, format, width, height, antialias)) + if (Traits::surface_matches_properties(surface, format, width, height, antialias, scaling_config)) { if (!pitch_compatible) { @@ -495,7 +496,7 @@ namespace rsx for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) { auto &surface = *It; - if (Traits::surface_matches_properties(surface, format, width, height, antialias, true)) + if (Traits::surface_matches_properties(surface, format, width, height, antialias, scaling_config, true)) { new_surface_storage = std::move(surface); Traits::notify_surface_reused(new_surface_storage); @@ -531,7 +532,7 @@ namespace rsx if (!new_surface) { ensure(store); - new_surface_storage = Traits::create_new_surface(address, format, width, height, pitch, antialias, std::forward(extra_params)...); + new_surface_storage = Traits::create_new_surface(address, format, width, height, pitch, antialias, scaling_config, std::forward(extra_params)...); new_surface = Traits::get(new_surface_storage); Traits::prepare_surface_for_drawing(command_list, new_surface); allocate_rsx_memory(new_surface); @@ -842,11 +843,13 @@ namespace rsx surface_color_format color_format, surface_antialiasing antialias, usz width, usz height, usz pitch, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { return bind_surface_address( command_list, address, color_format, antialias, width, height, pitch, get_format_block_size_in_bytes(color_format), + scaling_config, std::forward(extra_params)...); } @@ -857,12 +860,14 @@ namespace rsx surface_depth_format2 depth_format, surface_antialiasing antialias, usz width, usz height, usz pitch, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { return bind_surface_address( command_list, address, depth_format, antialias, width, height, pitch, get_format_block_size_in_bytes(depth_format), + scaling_config, std::forward(extra_params)...); } @@ -969,6 +974,7 @@ namespace rsx surface_raster_type raster_type, const std::array &surface_addresses, u32 address_z, const std::array &surface_pitch, u32 zeta_pitch, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { u32 clip_width = clip_horizontal_reg; @@ -998,7 +1004,7 @@ namespace rsx m_bound_render_targets[surface_index] = std::make_pair(surface_addresses[surface_index], bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias, - clip_width, clip_height, surface_pitch[surface_index], std::forward(extra_params)...)); + clip_width, clip_height, surface_pitch[surface_index], scaling_config, std::forward(extra_params)...)); m_bound_render_target_ids.push_back(surface_index); } @@ -1014,7 +1020,7 @@ namespace rsx { m_bound_depth_stencil = std::make_pair(address_z, bind_address_as_depth_stencil(command_list, address_z, depth_format, antialias, - clip_width, clip_height, zeta_pitch, std::forward(extra_params)...)); + clip_width, clip_height, zeta_pitch, scaling_config, std::forward(extra_params)...)); } else { @@ -1463,5 +1469,113 @@ namespace rsx } } } + + void sync_scaling_config(command_list_type cmd, const rsx::surface_scaling_config_t& active_config) + { + auto process_list_function = [&](surface_ranged_map& data, const utils::address_range32& range) + { + std::vector surfaces_to_clone; + + for (auto It = data.begin_range(range); It != data.end();) + { + auto surface = Traits::get(It->second); + if (surface->get_resolution_scaling_config() == active_config) + { + ++It; + continue; + } + + // Perform a test scaling and check if anything is different after scaling + // There are many cases where this will avoid creating new surfaces + const auto [new_w, new_h] = rsx::apply_resolution_scale( + active_config, + surface->template get_surface_width<>(), + surface->template get_surface_height<>()); + + if (new_w == surface->width() && new_h == surface->height()) + { + // Not affected by resolution scale. Just update the details and move on. + surface->resolution_scaling_config = active_config; + ++It; + continue; + } + + surfaces_to_clone.push_back(surface); + + // Invalidate the previous surface + invalidate(It->second); + It = data.erase(It); + } + + for (auto& surface : surfaces_to_clone) + { + // Enqueue the memory transfer + surface_storage_type sink{}; + deferred_clipped_region copy{}; + copy.width = surface->template get_surface_width<>(); + copy.height = surface->template get_surface_height<>(); + copy.transfer_scale_x = 1.f; + copy.transfer_scale_y = 1.f; + copy.target = nullptr; + copy.source = surface; + + Traits::clone_surface(cmd, sink, surface, surface->base_addr, copy, active_config); + allocate_rsx_memory(Traits::get(sink)); + + // Replace with the new one + auto new_surface = Traits::get(sink); + ensure(copy.target == new_surface); + data.emplace(surface->get_memory_range(), std::move(sink)); + + // Force barrier to reduce VRAM pressure + new_surface->memory_barrier(cmd, rsx::surface_access::memory_read); + } + }; + + const auto rtt_bind_backup = m_bound_render_targets; + const auto dsv_bind_backup = m_bound_depth_stencil; + + // Unbind everything. We'll restore it later + for (auto& rtt_bind : m_bound_render_targets) + { + rtt_bind = {}; + } + + m_bound_depth_stencil = {}; + + process_list_function(m_render_targets_storage, m_render_targets_memory_range); + process_list_function(m_depth_stencil_storage, m_depth_stencil_memory_range); + + // Restore bindings. + for (int i = 0; i < 4; ++i) + { + const auto address = rtt_bind_backup[i].first; + if (!address) + { + continue; + } + + auto rtt = m_render_targets_storage.find(address); + ensure(rtt != m_render_targets_storage.end()); + + m_bound_render_targets[i] = + { + address, + Traits::get(rtt->second) + }; + } + + if (const auto ds_address = dsv_bind_backup.first) + { + auto ds = m_depth_stencil_storage.find(ds_address); + ensure(ds != m_depth_stencil_storage.end()); + + m_bound_depth_stencil = + { + ds_address, + Traits::get(ds->second) + }; + } + } }; } diff --git a/rpcs3/Emu/RSX/Common/surface_utils.h b/rpcs3/Emu/RSX/Common/surface_utils.h index bf7dee2db3..8ec97600bc 100644 --- a/rpcs3/Emu/RSX/Common/surface_utils.h +++ b/rpcs3/Emu/RSX/Common/surface_utils.h @@ -88,18 +88,18 @@ namespace rsx auto dst_h = std::get<3>(region); // Apply resolution scale if needed - if (g_cfg.video.resolution_scale_percent != 100) - { - auto src = static_cast(source); + auto src = static_cast(source); + std::tie(src_w, src_h) = rsx::apply_resolution_scale( + src->resolution_scaling_config, + src_w, src_h, + src->template get_surface_width(), + src->template get_surface_height()); - std::tie(src_w, src_h) = rsx::apply_resolution_scale(src_w, src_h, - src->template get_surface_width(), - src->template get_surface_height()); - - std::tie(dst_w, dst_h) = rsx::apply_resolution_scale(dst_w, dst_h, - target_surface->template get_surface_width(), - target_surface->template get_surface_height()); - } + std::tie(dst_w, dst_h) = rsx::apply_resolution_scale( + target_surface->resolution_scaling_config, + dst_w, dst_h, + target_surface->template get_surface_width(), + target_surface->template get_surface_height()); width = src_w; height = src_h; @@ -146,6 +146,9 @@ namespace rsx u8 samples_x = 1; u8 samples_y = 1; + // Scaling configuration + surface_scaling_config_t resolution_scaling_config; + rsx::address_range32 memory_range; std::unique_ptr> resolve_surface; @@ -303,6 +306,11 @@ namespace rsx format_info.gcm_depth_format = format; } + void set_resolution_scaling_config(const surface_scaling_config_t& config) + { + resolution_scaling_config = config; + } + inline rsx::surface_color_format get_surface_color_format() const { return format_info.gcm_color_format; @@ -323,6 +331,11 @@ namespace rsx ); } + inline const rsx::surface_scaling_config_t& get_resolution_scaling_config() const + { + return resolution_scaling_config; + } + inline bool dirty() const { return (state_flags != rsx::surface_state_flags::ready) || !old_contents.empty(); @@ -541,10 +554,16 @@ namespace rsx } // Apply resolution scale if needed - if (g_cfg.video.resolution_scale_percent != 100) + if (resolution_scaling_config.scale_percent != 100 || + region.source->resolution_scaling_config.scale_percent != 100) { - auto [src_width, src_height] = rsx::apply_resolution_scale(slice.width, slice.height, slice.source->width(), slice.source->height()); - auto [dst_width, dst_height] = rsx::apply_resolution_scale(slice.width, slice.height, slice.target->width(), slice.target->height()); + const auto& src_res_scale = region.source->resolution_scaling_config; + const auto& dst_res_scale = resolution_scaling_config; + const auto src_surface = ensure(dynamic_cast(slice.source)); + const auto dst_surface = ensure(dynamic_cast(slice.target)); + + auto [src_width, src_height] = rsx::apply_resolution_scale(src_res_scale, slice.width, slice.height, src_surface->get_surface_width(), src_surface->get_surface_height()); + auto [dst_width, dst_height] = rsx::apply_resolution_scale(dst_res_scale, slice.width, slice.height, dst_surface->get_surface_width(), dst_surface->get_surface_height()); slice.transfer_scale_x *= f32(dst_width) / src_width; slice.transfer_scale_y *= f32(dst_height) / src_height; @@ -552,8 +571,8 @@ namespace rsx slice.width = src_width; slice.height = src_height; - std::tie(slice.src_x, slice.src_y) = rsx::apply_resolution_scale(slice.src_x, slice.src_y, slice.source->width(), slice.source->height()); - std::tie(slice.dst_x, slice.dst_y) = rsx::apply_resolution_scale(slice.dst_x, slice.dst_y, slice.target->width(), slice.target->height()); + std::tie(slice.src_x, slice.src_y) = rsx::apply_resolution_scale(src_res_scale, slice.src_x, slice.src_y, src_surface->get_surface_width(), src_surface->get_surface_height()); + std::tie(slice.dst_x, slice.dst_y) = rsx::apply_resolution_scale(dst_res_scale, slice.dst_x, slice.dst_y, dst_surface->get_surface_width(), dst_surface->get_surface_height()); } } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index dfe8043bd3..8aed0ccc34 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2413,9 +2413,13 @@ namespace rsx // 2. The image has to have been generated on the GPU (fbo or blit target only) rsx::simple_array sections; - const bool use_upscaling = (result.upload_context == rsx::texture_upload_context::framebuffer_storage && g_cfg.video.resolution_scale_percent != 100); + const bool use_upscaling = (result.upload_context == rsx::texture_upload_context::framebuffer_storage); + auto to_surface_type = [](const copy_region_descriptor& rgn) -> typename surface_store_type::surface_type + { + return static_cast(rgn.src); + }; - if (!helpers::append_mipmap_level(sections, result, attributes, 0, use_upscaling, attributes)) [[unlikely]] + if (!helpers::append_mipmap_level(to_surface_type, sections, result, attributes, 0, use_upscaling, attributes)) [[unlikely]] { // Abort if mip0 is not compatible return result; @@ -2445,7 +2449,7 @@ namespace rsx options, range, extended_dimension, m_rtts, std::forward(extras)...); if (!ret.validate() || - !helpers::append_mipmap_level(sections, ret, attr2, subsurface, use_upscaling, attributes)) + !helpers::append_mipmap_level(to_surface_type, sections, ret, attr2, subsurface, use_upscaling, attributes)) { // Abort break; @@ -2778,7 +2782,7 @@ namespace rsx surf->template get_surface_height() != surf->height()) { // Must go through a scaling operation due to resolution scaling being present - ensure(g_cfg.video.resolution_scale_percent != 100); + ensure(src_subres.surface->resolution_scaling_config.scale_percent != 100); use_null_region = false; } } @@ -3389,8 +3393,8 @@ namespace rsx { const auto surface_width = src_subres.surface->template get_surface_width(); const auto surface_height = src_subres.surface->template get_surface_height(); - std::tie(src_area.x1, src_area.y1) = rsx::apply_resolution_scale(src_area.x1, src_area.y1, surface_width, surface_height); - std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale(src_area.x2, src_area.y2, surface_width, surface_height); + std::tie(src_area.x1, src_area.y1) = rsx::apply_resolution_scale(src_subres.surface->resolution_scaling_config, src_area.x1, src_area.y1, surface_width, surface_height); + std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale(src_subres.surface->resolution_scaling_config, src_area.x2, src_area.y2, surface_width, surface_height); // The resource is of surface type; possibly disabled AA emulation src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_read, src_area); @@ -3400,8 +3404,8 @@ namespace rsx { const auto surface_width = dst_subres.surface->template get_surface_width(); const auto surface_height = dst_subres.surface->template get_surface_height(); - std::tie(dst_area.x1, dst_area.y1) = rsx::apply_resolution_scale(dst_area.x1, dst_area.y1, surface_width, surface_height); - std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale(dst_area.x2, dst_area.y2, surface_width, surface_height); + std::tie(dst_area.x1, dst_area.y1) = rsx::apply_resolution_scale(dst_subres.surface->resolution_scaling_config, dst_area.x1, dst_area.y1, surface_width, surface_height); + std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale(dst_subres.surface->resolution_scaling_config, dst_area.x2, dst_area.y2, surface_width, surface_height); // The resource is of surface type; possibly disabled AA emulation dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_write, dst_area); diff --git a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h index 1560f40ad4..8898830034 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h @@ -357,11 +357,11 @@ namespace rsx const auto surface_width = section.surface->template get_surface_width(); const auto surface_height = section.surface->template get_surface_height(); - const auto [src_width, src_height] = rsx::apply_resolution_scale(section.src_area.width, h, surface_width, surface_height); - const auto [dst_width, dst_height] = rsx::apply_resolution_scale(section.dst_area.width, h, attr.width, attr.height); + const auto [src_width, src_height] = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, section.src_area.width, h, surface_width, surface_height); + const auto [dst_width, dst_height] = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, section.dst_area.width, h, attr.width, attr.height); - std::tie(src_x, src_y) = rsx::apply_resolution_scale(src_x, src_y, surface_width, surface_height); - std::tie(dst_x, dst_y) = rsx::apply_resolution_scale(dst_x, dst_y, attr.width, attr.height); + std::tie(src_x, src_y) = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, src_x, src_y, surface_width, surface_height); + std::tie(dst_x, dst_y) = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, dst_x, dst_y, attr.width, attr.height); section.surface->memory_barrier(cmd, rsx::surface_access::transfer_read); @@ -430,8 +430,10 @@ namespace rsx if (scaling) { // Since output is upscaled, also upscale on dst - const auto [_dst_x, _dst_y] = rsx::apply_resolution_scale(static_cast(dst_offset.x), static_cast(dst_y - dst_slice_begin), attr.width, attr.height); - const auto [_dst_w, _dst_h] = rsx::apply_resolution_scale(dst_w, height, attr.width, attr.height); + + const auto& scaling_config = rsx::get_current_renderer()->resolution_scaling_config; + const auto [_dst_x, _dst_y] = rsx::apply_resolution_scale(scaling_config, static_cast(dst_offset.x), static_cast(dst_y - dst_slice_begin), attr.width, attr.height); + const auto [_dst_w, _dst_h] = rsx::apply_resolution_scale(scaling_config, dst_w, height, attr.width, attr.height); out.push_back ({ @@ -660,10 +662,10 @@ namespace rsx bool is_depth = texptr->is_depth_surface(); auto attr2 = attr; - if (rsx::get_resolution_scale_percent() != 100) + if (texptr->resolution_scaling_config.scale_percent != 100) { - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(attr.width, attr.height, surface_width, surface_height); - const auto [unused, scaled_slice_h] = rsx::apply_resolution_scale(RSX_SURFACE_DIMENSION_IGNORED, attr.slice_h, surface_width, surface_height); + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(texptr->resolution_scaling_config, attr.width, attr.height, surface_width, surface_height); + const auto [unused, scaled_slice_h] = rsx::apply_resolution_scale(texptr->resolution_scaling_config, RSX_SURFACE_DIMENSION_IGNORED, attr.slice_h, surface_width, surface_height); attr2.width = scaled_w; attr2.height = scaled_h; attr2.slice_h = scaled_slice_h; @@ -841,7 +843,8 @@ namespace rsx } // If this method was called, there is no easy solution, likely means atlas gather is needed - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(attr2.width, attr2.height); + const auto& scaling_config = rsx::get_current_renderer()->resolution_scaling_config; + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(scaling_config, attr2.width, attr2.height); const auto format_class = classify_format(attr2.gcm_format); const auto upload_context = (fbos.empty()) ? texture_upload_context::shader_read : texture_upload_context::framebuffer_storage; @@ -892,14 +895,15 @@ namespace rsx return result; } - template + template bool append_mipmap_level( + to_surface_type_converter&& as_surface_type, // Cast function to surface type rsx::simple_array& sections, // Destination list - const sampled_image_descriptor& level, // Descriptor for the image level being checked - const image_section_attributes_t& attr, // Attributes of image level - u8 mipmap_level, // Level index - bool apply_upscaling, // Whether to upscale the results or not - const image_section_attributes_t& level0_attr) // Attributes of the first mipmap level + const sampled_image_descriptor& level, // Descriptor for the image level being checked + const image_section_attributes_t& attr, // Attributes of image level + u8 mipmap_level, // Level index + bool apply_upscaling, // Whether to upscale the results or not + const image_section_attributes_t& level0_attr) // Attributes of the first mipmap level { if (level.image_handle) { @@ -916,7 +920,8 @@ namespace rsx // Calculate transfer dimensions from attr if (level.upload_context == rsx::texture_upload_context::framebuffer_storage) [[likely]] { - std::tie(mip.src_w, mip.src_h) = rsx::apply_resolution_scale(attr.width, attr.height); + auto rtv = as_surface_type(mip); + std::tie(mip.src_w, mip.src_h) = rsx::apply_resolution_scale(rtv->resolution_scaling_config, attr.width, attr.height); } else { @@ -964,7 +969,9 @@ namespace rsx if (apply_upscaling) { auto& mip = sections.back(); - std::tie(mip.dst_w, mip.dst_h) = rsx::apply_resolution_scale(mip.dst_w, mip.dst_h, level0_attr.width, level0_attr.height); + std::tie(mip.dst_w, mip.dst_h) = rsx::apply_resolution_scale( + as_surface_type(mip)->resolution_scaling_config, + mip.dst_w, mip.dst_h, level0_attr.width, level0_attr.height); } return true; diff --git a/rpcs3/Emu/RSX/Core/RSXDisplay.cpp b/rpcs3/Emu/RSX/Core/RSXDisplay.cpp index e263a945ef..f86c6dea04 100644 --- a/rpcs3/Emu/RSX/Core/RSXDisplay.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDisplay.cpp @@ -49,7 +49,7 @@ namespace rsx } } - std::string framebuffer_statistics_t::to_string(bool squash) const + std::string framebuffer_statistics_t::to_string(const surface_scaling_config_t& scaling_config, bool squash) const { // Format is sorted by sample count struct sorted_message_t @@ -70,7 +70,7 @@ namespace rsx for (const auto& [aa_mode, stat] : data) { auto real_stat = stat; - std::tie(real_stat.width, real_stat.height) = apply_resolution_scale(stat.width, stat.height); + std::tie(real_stat.width, real_stat.height) = apply_resolution_scale(scaling_config, stat.width, stat.height); real_stats.push_back(real_stat); sorted_message_t msg; diff --git a/rpcs3/Emu/RSX/Core/RSXDisplay.h b/rpcs3/Emu/RSX/Core/RSXDisplay.h index 77407f2f19..6a04374e1b 100644 --- a/rpcs3/Emu/RSX/Core/RSXDisplay.h +++ b/rpcs3/Emu/RSX/Core/RSXDisplay.h @@ -12,6 +12,8 @@ namespace rsx { enum class surface_antialiasing : u8; + struct surface_scaling_config_t; + struct framebuffer_dimensions_t { u16 width; @@ -42,7 +44,7 @@ namespace rsx void add(u16 width, u16 height, rsx::surface_antialiasing aa); // Returns a formatted string representing the statistics collected over the frame. - std::string to_string(bool squash) const; + std::string to_string(const surface_scaling_config_t& scaling_config, bool squash) const; }; struct frame_statistics_t diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp index 53f6ce31e7..8ef45e1f06 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -699,7 +699,9 @@ namespace rsx const auto window_origin = REGS(m_ctx)->shader_window_origin(); const u32 window_height = REGS(m_ctx)->shader_window_height(); const auto pixel_center = REGS(m_ctx)->pixel_center(); - const f32 resolution_scale = (window_height <= static_cast(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale(); + const f32 resolution_scale = (window_height <= RSX(m_ctx)->resolution_scaling_config.min_scalable_dimension) + ? 1.f + : RSX(m_ctx)->resolution_scaling_config.scale_factor(); payload.wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale); payload.wpos_bias[0] = 0.f; diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index 3de2585523..d0c2e233e9 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -227,7 +227,7 @@ void GLGSRender::update_draw_state() case rsx::primitive_type::lines: case rsx::primitive_type::line_loop: case rsx::primitive_type::line_strip: - gl_state.line_width(rsx::method_registers.line_width() * rsx::get_resolution_scale()); + gl_state.line_width(rsx::method_registers.line_width() * resolution_scaling_config.scale_factor()); gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH); break; default: diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 4ad9619d7f..c1acabd601 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -73,6 +73,7 @@ void GLGSRender::set_viewport() { // NOTE: scale offset matrix already contains the viewport transformation const auto [clip_width, clip_height] = rsx::apply_resolution_scale( + resolution_scaling_config, rsx::method_registers.surface_clip_width(), rsx::method_registers.surface_clip_height()); glViewport(0, 0, clip_width, clip_height); @@ -138,8 +139,7 @@ void GLGSRender::on_init_thread() gl::init(); gl::set_command_context(gl_state); - // Enable adaptive vsync if vsync is requested - gl::set_swapinterval(g_cfg.video.vsync ? -1 : 0); + update_swap_interval(); if (g_cfg.video.debug_output) gl::enable_debugging(); @@ -580,6 +580,33 @@ void GLGSRender::on_exit() gl::set_primary_context_thread(false); } +void GLGSRender::update_swap_interval() +{ + const vsync_mode current_mode = g_cfg.video.vsync; + if (current_mode == m_vsync_mode) + { + return; + } + + // Enable adaptive vsync if vsync is requested + int swap_interval = 0; + switch (current_mode) + { + default: + case vsync_mode::off: + break; + case vsync_mode::adaptive: + swap_interval = -1; + break; + case vsync_mode::full: + swap_interval = 1; + break; + } + + gl::set_swapinterval(swap_interval); + m_vsync_mode = current_mode; +} + void GLGSRender::clear_surface(u32 arg) { if (skip_current_frame) return; @@ -910,7 +937,7 @@ void GLGSRender::load_program_env() m_draw_processor.fill_scale_offset_data(buf, false); m_draw_processor.fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 68)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 72)) = rsx::method_registers.point_size() * rsx::get_resolution_scale(); + *(reinterpret_cast(buf + 72)) = rsx::method_registers.point_size() * resolution_scaling_config.scale_factor(); *(reinterpret_cast(buf + 76)) = rsx::method_registers.clip_min(); *(reinterpret_cast(buf + 80)) = rsx::method_registers.clip_max(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index a05eb0bf3d..779519fee7 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -186,6 +186,8 @@ private: gl::texture* get_present_source(gl::present_surface_info* info, const rsx::avconf& avconfig); + void update_swap_interval(); + public: void set_viewport(); void set_scissor(bool clip_viewport); diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.cpp b/rpcs3/Emu/RSX/GL/GLOverlays.cpp index 9e588d5df2..a758804e4f 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.cpp +++ b/rpcs3/Emu/RSX/GL/GLOverlays.cpp @@ -399,8 +399,15 @@ namespace gl void ui_overlay_renderer::run(gl::command_context& cmd_, const areau& viewport, GLuint target, rsx::overlays::overlay& ui, bool flip_vertically) { - program_handle.uniforms["viewport"] = color4f(static_cast(viewport.width()), static_cast(viewport.height()), static_cast(viewport.x1), static_cast(viewport.y1)); - program_handle.uniforms["ui_scale"] = color4f(static_cast(ui.virtual_width), static_cast(ui.virtual_height), 1.f, 1.f); + ui.set_render_viewport( + static_cast(std::min(viewport.width(), std::numeric_limits::max())), + static_cast(std::min(viewport.height(), std::numeric_limits::max())) + ); + const auto ui_scale = color4f(static_cast(ui.virtual_width), static_cast(ui.virtual_height), 1.f, 1.f); + const auto ui_viewport = color4f(static_cast(viewport.width()), static_cast(viewport.height()), static_cast(viewport.x1), static_cast(viewport.y1)); + + program_handle.uniforms["viewport"] = ui_viewport; + program_handle.uniforms["ui_scale"] = ui_scale; saved_sampler_state save_30(30, m_sampler); saved_sampler_state save_31(31, m_sampler); @@ -458,12 +465,24 @@ namespace gl .texture_mode(texture_mode) .clip_fragments(cmd.config.clip_region) .pulse_glow(cmd.config.pulse_glow) + .set_sdf(cmd.config.sdf_config.func) .get(); program_handle.uniforms["timestamp"] = cmd.config.get_sinus_value(); program_handle.uniforms["albedo"] = cmd.config.color; program_handle.uniforms["clip_bounds"] = cmd.config.clip_rect; program_handle.uniforms["blur_intensity"] = static_cast(cmd.config.blur_strength); + + if (cmd.config.sdf_config.func != rsx::overlays::sdf_function::none) + { + auto sdf_config = cmd.config.sdf_config; + sdf_config.transform(static_cast(viewport).flipped_vertical(), {ui_scale.x, ui_scale.y}); + + program_handle.uniforms["sdf_params"] = color4f(sdf_config.hx, sdf_config.hy, sdf_config.br, sdf_config.bw); + program_handle.uniforms["sdf_origin"] = color2f(sdf_config.cx, sdf_config.cy); + program_handle.uniforms["sdf_border_color"] = sdf_config.border_color; + } + overlay_pass::run(cmd_, viewport, target, gl::image_aspect::color, true); } diff --git a/rpcs3/Emu/RSX/GL/GLPresent.cpp b/rpcs3/Emu/RSX/GL/GLPresent.cpp index 4c241d7d33..2aa11868ee 100644 --- a/rpcs3/Emu/RSX/GL/GLPresent.cpp +++ b/rpcs3/Emu/RSX/GL/GLPresent.cpp @@ -95,6 +95,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons image = section.surface->get_surface(rsx::surface_access::transfer_read); std::tie(info->width, info->height) = rsx::apply_resolution_scale( + resolution_scaling_config, std::min(surface_width, info->width), std::min(surface_height, info->height)); } @@ -225,7 +226,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) if (avconfig.stereo_enabled) [[unlikely]] { - const auto [unused, min_expected_height] = rsx::apply_resolution_scale(RSX_SURFACE_DIMENSION_IGNORED, buffer_height + 30); + const auto [unused, min_expected_height] = rsx::apply_resolution_scale(resolution_scaling_config, RSX_SURFACE_DIMENSION_IGNORED, buffer_height + 30); if (image_to_flip->height() < min_expected_height) { // Get image for second eye @@ -240,7 +241,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) else { // Account for possible insets - const auto [unused2, scaled_buffer_height] = rsx::apply_resolution_scale(RSX_SURFACE_DIMENSION_IGNORED, buffer_height); + const auto [unused2, scaled_buffer_height] = rsx::apply_resolution_scale(resolution_scaling_config, RSX_SURFACE_DIMENSION_IGNORED, buffer_height); buffer_height = std::min(image_to_flip->height() - min_expected_height, scaled_buffer_height); } } @@ -252,6 +253,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) if (info.emu_flip) { evaluate_cpu_usage_reduction_limits(); + update_swap_interval(); } // Get window state @@ -315,9 +317,11 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) // Lock to avoid modification during run-update chain std::lock_guard lock(*m_overlay_manager); + const areau display_area = {0, 0, static_cast(m_frame->client_width()), static_cast(m_frame->client_height())}; for (const auto& view : m_overlay_manager->get_views()) { - m_ui_renderer.run(cmd, aspect_ratio, target, *view.get(), flip_vertically); + const areau render_area = view->use_window_space ? display_area : aspect_ratio; + m_ui_renderer.run(cmd, render_area, target, *view.get(), flip_vertically); } } }; @@ -476,7 +480,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) "Texture uploads: %11u (%u from CPU - %02u%%, %u copies avoided)\n" "Vertex cache hits: %9u/%u (%u%%)\n" "Program cache lookup ellision: %u/%u (%u%%)", - info.stats.framebuffer_stats.to_string(!backend_config.supports_hw_msaa), + info.stats.framebuffer_stats.to_string(resolution_scaling_config, !backend_config.supports_hw_msaa), get_load(), info.stats.draw_calls, info.stats.setup_time, info.stats.vertex_upload_time, info.stats.textures_upload_time, info.stats.draw_exec_time, num_dirty_textures, texture_memory_size, num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate, @@ -513,6 +517,19 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) m_frame->flip(m_context); rsx::thread::flip(info); + // Data sync + const rsx::surface_scaling_config_t active_res_scaling_config = + { + .scale_percent = static_cast(g_cfg.video.resolution_scale_percent), + .min_scalable_dimension = static_cast(g_cfg.video.min_scalable_dimension), + }; + + if (active_res_scaling_config != this->resolution_scaling_config) + { + m_rtts.sync_scaling_config(cmd, active_res_scaling_config); + this->resolution_scaling_config = active_res_scaling_config; + } + // Cleanup m_gl_texture_cache.on_frame_end(); m_vertex_cache->purge(); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index a61fb4b365..34c2ca72d3 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -141,7 +141,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool /* m_framebuffer_layout.width, m_framebuffer_layout.height, m_framebuffer_layout.target, m_framebuffer_layout.aa_mode, m_framebuffer_layout.raster_type, m_framebuffer_layout.color_addresses, m_framebuffer_layout.zeta_address, - m_framebuffer_layout.actual_color_pitch, m_framebuffer_layout.actual_zeta_pitch); + m_framebuffer_layout.actual_color_pitch, m_framebuffer_layout.actual_zeta_pitch, + resolution_scaling_config); std::array color_targets; GLuint depth_stencil_target; @@ -448,7 +449,7 @@ void gl::render_target::load_memory(gl::command_context& cmd) subres.data = { vm::get_super_ptr(base_addr), static_cast::size_type>(rsx_pitch * surface_height * samples_y) }; // TODO: MSAA support - if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]] + if (resolution_scaling_config.scale_percent == 100 && spp == 1) [[likely]] { gl::upload_texture(cmd, this, get_gcm_format(), is_swizzled, { subres }); } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index ee3bc03956..f5e2252b92 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -98,7 +98,7 @@ namespace gl bool matches_dimensions(u16 _width, u16 _height) const { //Use forward scaling to account for rounding and clamping errors - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(_width, _height); + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(resolution_scaling_config, _width, _height); return (scaled_w == width()) && (scaled_h == height()); } @@ -138,11 +138,12 @@ struct gl_render_target_traits u32 address, rsx::surface_color_format surface_color_format, usz width, usz height, usz pitch, - rsx::surface_antialiasing antialias + rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& resolution_scaling_config ) { auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); - const auto [width_, height_] = rsx::apply_resolution_scale(static_cast(width), static_cast(height)); + const auto [width_, height_] = rsx::apply_resolution_scale(resolution_scaling_config, static_cast(width), static_cast(height)); u8 samples; rsx::surface_sample_layout sample_layout; @@ -162,6 +163,7 @@ struct gl_render_target_traits result->set_name(fmt::format("RTV_%u@0x%x", result->id(), address)); result->set_aa_mode(antialias); + result->set_resolution_scaling_config(resolution_scaling_config); result->set_native_pitch(static_cast(width) * get_format_block_size_in_bytes(surface_color_format) * result->samples_x); result->set_surface_dimensions(static_cast(width), static_cast(height), static_cast(pitch)); result->set_format(surface_color_format); @@ -182,11 +184,12 @@ struct gl_render_target_traits u32 address, rsx::surface_depth_format2 surface_depth_format, usz width, usz height, usz pitch, - rsx::surface_antialiasing antialias + rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& resolution_scaling_config ) { auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format); - const auto [width_, height_] = rsx::apply_resolution_scale(static_cast(width), static_cast(height)); + const auto [width_, height_] = rsx::apply_resolution_scale(resolution_scaling_config, static_cast(width), static_cast(height)); u8 samples; rsx::surface_sample_layout sample_layout; @@ -206,6 +209,7 @@ struct gl_render_target_traits result->set_name(fmt::format("DSV_%u@0x%x", result->id(), address)); result->set_aa_mode(antialias); + result->set_resolution_scaling_config(resolution_scaling_config); result->set_surface_dimensions(static_cast(width), static_cast(height), static_cast(pitch)); result->set_format(surface_depth_format); result->set_native_pitch(static_cast(width) * get_format_block_size_in_bytes(surface_depth_format) * result->samples_x); @@ -225,13 +229,17 @@ struct gl_render_target_traits void clone_surface( gl::command_context& cmd, std::unique_ptr& sink, gl::render_target* ref, - u32 address, barrier_descriptor_t& prev) + u32 address, barrier_descriptor_t& prev, + const rsx::surface_scaling_config_t& scaling_config) { if (!sink) { auto internal_format = static_cast(ref->get_internal_format()); - const auto [new_w, new_h] = rsx::apply_resolution_scale(prev.width, prev.height, - ref->get_surface_width(), ref->get_surface_height()); + const auto [new_w, new_h] = rsx::apply_resolution_scale( + scaling_config, + prev.width, prev.height, + ref->get_surface_width(), + ref->get_surface_height()); sink = std::make_unique(new_w, new_h, ref->samples(), internal_format, ref->format_class()); sink->add_ref(); @@ -240,6 +248,9 @@ struct gl_render_target_traits sink->state_flags = rsx::surface_state_flags::erase_bkgnd; sink->format_info = ref->format_info; + sink->sample_layout = ref->sample_layout; + sink->resolution_scaling_config = scaling_config; + sink->set_name(fmt::format("SINK_%u@0x%x", sink->id(), address)); sink->set_spp(ref->get_spp()); sink->set_native_pitch(static_cast(prev.width) * ref->get_bpp() * ref->samples_x); @@ -375,6 +386,7 @@ struct gl_render_target_traits gl::texture::internal_format format, usz width, usz height, rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& scaling_config, bool check_refs = false) { if (check_refs && surface->has_refs()) @@ -382,7 +394,8 @@ struct gl_render_target_traits return surface->get_internal_format() == format && surface->get_spp() == get_format_sample_count(antialias) && - surface->matches_dimensions(static_cast(width), static_cast(height)); + surface->matches_dimensions(static_cast(width), static_cast(height)) && + surface->resolution_scaling_config == scaling_config; } static @@ -391,10 +404,11 @@ struct gl_render_target_traits rsx::surface_color_format format, usz width, usz height, rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& scaling_config, bool check_refs=false) { const auto internal_fmt = rsx::internals::surface_color_format_to_gl(format).internal_format; - return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, check_refs); + return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, scaling_config, check_refs); } static @@ -403,10 +417,11 @@ struct gl_render_target_traits rsx::surface_depth_format2 format, usz width, usz height, rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& scaling_config, bool check_refs = false) { const auto internal_fmt = rsx::internals::surface_depth_format_to_gl(format).internal_format; - return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, check_refs); + return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, scaling_config, check_refs); } static diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 64e451e579..6f566b9a82 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -717,169 +717,168 @@ namespace gl } } } + + return; } - else + + std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; + image_memory_requirements mem_info; + pixel_buffer_layout mem_layout; + + std::span dst_buffer = staging_buffer; + u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); + u64 image_linear_size = staging_buffer.size(); + + const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); + + if (driver_caps.ARB_compute_shader_supported) { - std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; - image_memory_requirements mem_info; - pixel_buffer_layout mem_layout; - - std::span dst_buffer = staging_buffer; - u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); - u64 image_linear_size = staging_buffer.size(); - - const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); - - if (driver_caps.ARB_compute_shader_supported) + if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) { - if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) - { - g_upload_transfer_buffer.remove(); - g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size); - } - - if (g_compute_decode_buffer.size() < min_required_buffer_size) - { - g_compute_decode_buffer.remove(); - g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); - } + g_upload_transfer_buffer.remove(); + g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size); } - for (const rsx::subresource_layout& layout : input_layouts) + if (g_compute_decode_buffer.size() < min_required_buffer_size) { - if (driver_caps.ARB_compute_shader_supported) + g_compute_decode_buffer.remove(); + g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); + } + } + + for (const rsx::subresource_layout& layout : input_layouts) + { + if (driver_caps.ARB_compute_shader_supported) + { + u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); + + // We're in the "else" branch, so "is_compressed_host_format()" is always false. + // Handle emulated compressed formats with host unpack (R8G8 compressed) + row_pitch = std::max(row_pitch, dst->pitch()); + + // FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch. + image_linear_size = row_pitch * layout.height_in_texel * layout.depth; + + compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; + compute_scratch_mem.first = reinterpret_cast(static_cast(compute_scratch_mem.second)); + + g_upload_transfer_buffer.reserve_storage_on_heap(static_cast(image_linear_size)); + upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast(image_linear_size), 256); + dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; + } + + rsx::io_buffer io_buf = dst_buffer; + caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024); + auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); + + // Define upload region + coord3u region; + region.x = 0; + region.y = 0; + region.z = layout.layer; + region.width = layout.width_in_texel; + region.height = layout.height_in_texel; + region.depth = layout.depth; + + if (!driver_caps.ARB_compute_shader_supported) + { + unpack_settings.swap_bytes(op.require_swap); + dst->copy_from(staging_buffer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); + continue; + } + + // 0. Preconf + mem_layout.alignment = static_cast(caps.alignment); + mem_layout.swap_bytes = op.require_swap; + mem_layout.format = gl_format; + mem_layout.type = gl_type; + mem_layout.block_size = block_size_in_bytes; + + // 2. Upload memory to GPU + if (!op.require_deswizzle) + { + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size); + } + else + { + // 2.1 Copy data to deswizzle buf + if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size) { - u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); - - // We're in the "else" branch, so "is_compressed_host_format()" is always false. - // Handle emulated compressed formats with host unpack (R8G8 compressed) - row_pitch = std::max(row_pitch, dst->pitch()); - - // FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch. - image_linear_size = row_pitch * layout.height_in_texel * layout.depth; - - compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; - compute_scratch_mem.first = reinterpret_cast(static_cast(compute_scratch_mem.second)); - - g_upload_transfer_buffer.reserve_storage_on_heap(static_cast(image_linear_size)); - upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast(image_linear_size), 256); - dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; + g_deswizzle_scratch_buffer.remove(); + g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); } - rsx::io_buffer io_buf = dst_buffer; - caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024); - auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); + u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast(image_linear_size), 256); + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); - // Define upload region - coord3u region; - region.x = 0; - region.y = 0; - region.z = layout.layer; - region.width = layout.width_in_texel; - region.height = layout.height_in_texel; - region.depth = layout.depth; + // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem + const auto block_size = op.element_size * op.block_length; - if (driver_caps.ARB_compute_shader_supported) + if (op.require_swap) { - // 0. Preconf - mem_layout.alignment = static_cast(caps.alignment); - mem_layout.swap_bytes = op.require_swap; - mem_layout.format = gl_format; - mem_layout.type = gl_type; - mem_layout.block_size = block_size_in_bytes; + mem_layout.swap_bytes = false; - // 2. Upload memory to GPU - if (!op.require_deswizzle) + switch (op.element_size) { - g_upload_transfer_buffer.unmap(); - g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size); + case 1: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 2: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); } - else - { - // 2.1 Copy data to deswizzle buf - if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size) - { - g_deswizzle_scratch_buffer.remove(); - g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); - } - - u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast(image_linear_size), 256); - g_upload_transfer_buffer.unmap(); - g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); - - // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem - const auto block_size = op.element_size * op.block_length; - - if (op.require_swap) - { - mem_layout.swap_bytes = false; - - switch (op.element_size) - { - case 1: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 2: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 4: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - default: - fmt::throw_exception("Unimplemented element size deswizzle"); - } - } - else - { - switch (op.element_size) - { - case 1: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 2: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 4: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - default: - fmt::throw_exception("Unimplemented element size deswizzle"); - } - } - - // Barrier - g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast(image_linear_size)); - } - - // 3. Update configuration - mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes; - mem_info.image_size_in_bytes = image_linear_size; - mem_info.memory_required = 0; - - // 4. Dispatch compute routines - copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info); - - // Barrier - g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast(image_linear_size)); } else { - unpack_settings.swap_bytes(op.require_swap); - dst->copy_from(staging_buffer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); + switch (op.element_size) + { + case 1: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 2: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); + } } + + // Barrier + g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast(image_linear_size)); } + + // 3. Update configuration + mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes; + mem_info.image_size_in_bytes = image_linear_size; + mem_info.memory_required = 0; + + // 4. Dispatch compute routines + copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info); + + // Barrier + g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast(image_linear_size)); } } diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index 4a17dae081..6d0bbb18a3 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -14,20 +14,20 @@ namespace gl { struct pixel_buffer_layout { - GLenum format; - GLenum type; - u32 row_length; - u8 block_size; - bool swap_bytes; - u8 alignment; + GLenum format = GL_RGBA; + GLenum type = GL_UNSIGNED_BYTE; + u32 row_length = 0; + u8 block_size = 0; + bool swap_bytes = false; + u8 alignment = 0; u8 reserved; }; struct image_memory_requirements { - u64 image_size_in_texels; - u64 image_size_in_bytes; - u64 memory_required; + u64 image_size_in_texels = 0; + u64 image_size_in_bytes = 0; + u64 memory_required = 0; }; struct clear_cmd_info diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index a4b931186f..27b455374e 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -287,6 +287,7 @@ namespace gl u32 transfer_width = width; u32 transfer_height = height; u32 transfer_x = 0, transfer_y = 0; + u16 resolution_scale_percent = 100; if (context == rsx::texture_upload_context::framebuffer_storage) { @@ -295,9 +296,10 @@ namespace gl target_texture = surface->get_surface(rsx::surface_access::transfer_read); transfer_width *= surface->samples_x; transfer_height *= surface->samples_y; + resolution_scale_percent = surface->resolution_scaling_config.scale_percent; } - if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) || + if ((resolution_scale_percent != 100 && context == rsx::texture_upload_context::framebuffer_storage) || (vram_texture->pitch() != rsx_pitch)) { areai src_area = { 0, 0, 0, 0 }; diff --git a/rpcs3/Emu/RSX/GSRender.cpp b/rpcs3/Emu/RSX/GSRender.cpp index e9e859952e..f09f4704da 100644 --- a/rpcs3/Emu/RSX/GSRender.cpp +++ b/rpcs3/Emu/RSX/GSRender.cpp @@ -13,6 +13,8 @@ GSRender::GSRender(utils::serial* ar) noexcept : rsx::thread(ar) { m_frame = nullptr; } + + m_vsync_mode = g_cfg.video.vsync; } GSRender::~GSRender() diff --git a/rpcs3/Emu/RSX/GSRender.h b/rpcs3/Emu/RSX/GSRender.h index d2a6fd9c5f..f597b5562b 100644 --- a/rpcs3/Emu/RSX/GSRender.h +++ b/rpcs3/Emu/RSX/GSRender.h @@ -23,6 +23,8 @@ protected: draw_context_t m_context = nullptr; bool m_continuous_mode = false; + vsync_mode m_vsync_mode{}; + public: ~GSRender() override; diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.h b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.h index bd9b0e1068..88e1c53324 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.h +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.h @@ -16,7 +16,7 @@ namespace rsx { static constexpr u16 menu_entry_height = 40; static constexpr u16 menu_entry_margin = 30; - static constexpr u16 menu_checkbox_size = 20; + static constexpr u16 menu_checkbox_size = 24; static constexpr u16 element_height = 25; enum class page_navigation diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_main_menu.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_main_menu.cpp index 9d239355c9..8f27e4172d 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_main_menu.cpp +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_main_menu.cpp @@ -194,6 +194,7 @@ namespace rsx label_widget->set_size(m_sidebar->w, 60); label_widget->set_font("Arial", 16); label_widget->back_color.a = 0.f; + label_widget->set_margin(8, 0); label_widget->set_padding(16, 4, 16, 4); label_widget->auto_resize(); label_widget->set_size(label_widget->w, 60); @@ -213,6 +214,7 @@ namespace rsx auto icon_view = std::make_unique(); icon_view->set_raw_image(icon_info); icon_view->set_size(42, 60); + icon_view->set_margin(8, 0); icon_view->set_padding(18, 0, 18, 18); const u16 packed_width = icon_view->padding_left + icon_view->w + label_widget->w + 18; // rpad diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp index b359fb562c..4d0681193f 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp @@ -26,7 +26,7 @@ namespace rsx { if (!suspend_mode) { - Emu.after_kill_callback = []() { Emu.Restart(); }; + Emu.after_kill_callback = []() { Emu.Restart(true, false); }; // Make sure we keep the game window opened Emu.SetContinuousMode(true); diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp index 71b78fe1d8..9147b7e5c5 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp @@ -152,14 +152,16 @@ namespace rsx home_menu_settings_video::home_menu_settings_video(s16 x, s16 y, u16 width, u16 height, bool use_separators, home_menu_page* parent) : home_menu_settings_page(x, y, width, height, use_separators, parent, get_localized_string(localized_string_id::HOME_MENU_SETTINGS_VIDEO)) { + add_unsigned_slider(&g_cfg.video.resolution_scale_percent, localized_string_id::HOME_MENU_SETTINGS_VIDEO_RESOLUTION_SCALE_PERCENT, "%", 25); + add_unsigned_slider(&g_cfg.video.min_scalable_dimension, localized_string_id::HOME_MENU_SETTINGS_VIDEO_RESOLUTION_SCALE_THRESHOLD, "px", 1); + + add_dropdown(&g_cfg.video.vsync, localized_string_id::HOME_MENU_SETTINGS_VIDEO_VSYNC); + add_dropdown(&g_cfg.video.frame_limit, localized_string_id::HOME_MENU_SETTINGS_VIDEO_FRAME_LIMIT); add_unsigned_slider(&g_cfg.video.anisotropic_level_override, localized_string_id::HOME_MENU_SETTINGS_VIDEO_ANISOTROPIC_OVERRIDE, "x", 2, {{0, "Auto"}}, {14}); add_dropdown(&g_cfg.video.output_scaling, localized_string_id::HOME_MENU_SETTINGS_VIDEO_OUTPUT_SCALING); - if (g_cfg.video.renderer == video_renderer::vulkan && g_cfg.video.output_scaling == output_scaling_mode::fsr) - { - add_unsigned_slider(&g_cfg.video.rcas_sharpening_intensity, localized_string_id::HOME_MENU_SETTINGS_VIDEO_RCAS_SHARPENING, " %", 1); - } + add_unsigned_slider(&g_cfg.video.rcas_sharpening_intensity, localized_string_id::HOME_MENU_SETTINGS_VIDEO_RCAS_SHARPENING, " %", 1); add_checkbox(&g_cfg.video.stretch_to_display_area, localized_string_id::HOME_MENU_SETTINGS_VIDEO_STRETCH_TO_DISPLAY); @@ -183,6 +185,7 @@ namespace rsx add_unsigned_slider(&g_cfg.video.driver_wakeup_delay, localized_string_id::HOME_MENU_SETTINGS_ADVANCED_DRIVER_WAKE_UP_DELAY, " µs", 20, {}, {}, g_cfg.video.driver_wakeup_delay.min, 800); add_signed_slider(&g_cfg.video.vblank_rate, localized_string_id::HOME_MENU_SETTINGS_ADVANCED_VBLANK_FREQUENCY, " Hz", 30, {}, 30); add_checkbox(&g_cfg.video.vblank_ntsc, localized_string_id::HOME_MENU_SETTINGS_ADVANCED_VBLANK_NTSC); + add_checkbox(&g_cfg.video.handle_tiled_memory, localized_string_id::HOME_MENU_SETTINGS_ADVANCED_RSX_MEMORY_TILING); apply_layout(); } @@ -218,6 +221,7 @@ namespace rsx home_menu_settings_overlays::home_menu_settings_overlays(s16 x, s16 y, u16 width, u16 height, bool use_separators, home_menu_page* parent) : home_menu_settings_page(x, y, width, height, use_separators, parent, get_localized_string(localized_string_id::HOME_MENU_SETTINGS_OVERLAYS)) { + add_checkbox(&g_cfg.misc.play_music_during_boot, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_PLAY_MUSIC_DURING_BOOT); add_checkbox(&g_cfg.misc.show_trophy_popups, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_TROPHY_POPUPS); add_checkbox(&g_cfg.misc.show_rpcn_popups, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_RPCN_POPUPS); add_checkbox(&g_cfg.misc.show_shader_compilation_hint, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_SHADER_COMPILATION_HINT); @@ -226,6 +230,7 @@ namespace rsx add_checkbox(&g_cfg.misc.show_pressure_intensity_toggle_hint, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_PRESSURE_INTENSITY_TOGGLE_HINT); add_checkbox(&g_cfg.misc.show_analog_limiter_toggle_hint, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_ANALOG_LIMITER_TOGGLE_HINT); add_checkbox(&g_cfg.misc.show_mouse_and_keyboard_toggle_hint, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_MOUSE_AND_KB_TOGGLE_HINT); + add_checkbox(&g_cfg.misc.show_fatal_error_hints, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_FATAL_ERROR_HINTS); add_checkbox(&g_cfg.video.record_with_overlays, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_RECORD_WITH_OVERLAYS); apply_layout(); @@ -249,10 +254,11 @@ namespace rsx add_dropdown(&g_cfg.video.perf_overlay.position, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_POSITION); add_checkbox(&g_cfg.video.perf_overlay.center_x, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_CENTER_X); add_checkbox(&g_cfg.video.perf_overlay.center_y, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_CENTER_Y); - add_unsigned_slider(&g_cfg.video.perf_overlay.margin_x, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_MARGIN_X, " px", 1); - add_unsigned_slider(&g_cfg.video.perf_overlay.margin_y, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_MARGIN_Y, " px", 1); + add_float_slider(&g_cfg.video.perf_overlay.margin_x, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_MARGIN_X, " %", 0.25f); + add_float_slider(&g_cfg.video.perf_overlay.margin_y, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_MARGIN_Y, " %", 0.25f); add_unsigned_slider(&g_cfg.video.perf_overlay.font_size, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_FONT_SIZE, " px", 1); add_unsigned_slider(&g_cfg.video.perf_overlay.opacity, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_OPACITY, " %", 1); + add_checkbox(&g_cfg.video.perf_overlay.perf_overlay_use_window_space, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_USE_WINDOW_SPACE); apply_layout(); } diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.h b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.h index 13f47eb41e..ba8b730d44 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.h +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.h @@ -10,8 +10,6 @@ namespace rsx { namespace overlays { - void play_sound(sound_effect sound, std::optional volume); - struct home_menu_settings : public home_menu_page { public: diff --git a/rpcs3/Emu/RSX/Overlays/overlay_checkbox.cpp b/rpcs3/Emu/RSX/Overlays/overlay_checkbox.cpp index f7cb19237e..304c07a1f6 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_checkbox.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_checkbox.cpp @@ -84,7 +84,7 @@ namespace rsx::overlays void switchbox::set_size(u16 w, u16 h) { - const u16 dim = std::min(w, h); + const u16 dim = std::max(std::min(w, h), 14); box_layout::set_size(w, h); clear_items(); @@ -93,21 +93,17 @@ namespace rsx::overlays m_back_ellipse = nullptr; m_front_circle = nullptr; - if (dim < 4) - { - return; - } - auto ellipse_part = std::make_unique(); - auto circle_part = std::make_unique(); + auto circle_part = std::make_unique(); - ellipse_part->set_size(dim * 2, dim / 2); - ellipse_part->set_pos(0, dim / 4); - ellipse_part->radius = dim / 4; + ellipse_part->set_size(dim * 2, dim); + ellipse_part->set_padding(1); + ellipse_part->set_pos(0, 0); + ellipse_part->border_radius = (dim - 4) / 2; // Avoid perfect capsule shape since we want a border and perfect capsules can have a false border along the midline due to subgroup shenanigans circle_part->set_size(dim, dim); + circle_part->set_padding(4); circle_part->set_pos(0, 0); - circle_part->radius = dim / 2; m_back_ellipse = add_element(ellipse_part); m_front_circle = add_element(circle_part); @@ -130,13 +126,18 @@ namespace rsx::overlays if (m_is_checked) { - m_back_ellipse->back_color = this->fore_color * 0.5f; + m_back_ellipse->border_color.a = 0.f; + m_back_ellipse->border_size = 0; + m_back_ellipse->back_color = this->fore_color * 0.75f; m_back_ellipse->back_color.a = 1.f; - m_front_circle->back_color = this->fore_color; + m_front_circle->back_color = color4f(1.f); m_front_circle->set_pos(this->x + m_front_circle->w, this->y); } else { + m_back_ellipse->border_color = this->back_color * 0.75f; + m_back_ellipse->border_color.a = 1.f; + m_back_ellipse->border_size = 1; m_back_ellipse->back_color = this->back_color * 0.5f; m_back_ellipse->back_color.a = 1.f; m_front_circle->back_color = this->back_color; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_compile_notification.cpp b/rpcs3/Emu/RSX/Overlays/overlay_compile_notification.cpp index 434226bfa5..709e554edf 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_compile_notification.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_compile_notification.cpp @@ -18,7 +18,7 @@ namespace rsx } queue_message( - localized_string_id::RSX_OVERLAYS_COMPILING_SHADERS, + localized_string_id::RSX_OVERLAYS_SPINNER_NO_TEXT, 5'000'000, {}, message_pin_location::bottom_left, diff --git a/rpcs3/Emu/RSX/Overlays/overlay_controls.cpp b/rpcs3/Emu/RSX/Overlays/overlay_controls.cpp index 02b0ff5a68..5208efd747 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_controls.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_controls.cpp @@ -54,6 +54,43 @@ namespace rsx return result; } + void compiled_resource::sdf_config_t::transform(const areaf& target_viewport, const sizef& virtual_viewport) + { + const f32 scale_x = target_viewport.width() / virtual_viewport.width; + const f32 scale_y = target_viewport.height() / virtual_viewport.height; + + // Ideally the average should match the x and y scaling but arithmetic drift shifts the values around a bit. + // Also we need a way to define perfect circles when the aspect ratio is not respected. + const f32 scale_av = (scale_x + scale_y) / 2; + + hx *= scale_x; + hy *= scale_y; + br *= scale_av; + bw *= scale_av; + + // Border radius clamp + br = std::min({ br, hx, hy }); + + // Compute the function's origin. Account for flipped viewports as well. + if (target_viewport.x2 < target_viewport.x1) + { + cx = target_viewport.width() - (cx * scale_x) + target_viewport.x2; + } + else + { + cx = cx * scale_x + target_viewport.x1; + } + + if (target_viewport.y2 < target_viewport.y1) + { + cy = target_viewport.height() - (cy * scale_y) + target_viewport.y2; + } + else + { + cy = cy * scale_y + target_viewport.y1; + } + } + image_info::image_info(const std::string& filename, bool grayscaled) { fs::file f(filename, fs::read + fs::isfile); @@ -259,6 +296,12 @@ namespace rsx { v += vertex(x_offset, y_offset, 0.f, 0.f); } + + if (draw_commands[n].config.sdf_config.func != sdf_function::none) + { + draw_commands[n].config.sdf_config.cx += x_offset; + draw_commands[n].config.sdf_config.cy += y_offset; + } } } @@ -275,6 +318,12 @@ namespace rsx v += vertex(x_offset, y_offset, 0.f, 0.f); } + if (draw_commands[n].config.sdf_config.func != sdf_function::none) + { + draw_commands[n].config.sdf_config.cx += x_offset; + draw_commands[n].config.sdf_config.cy += y_offset; + } + draw_commands[n].config.clip_rect = clip_rect; draw_commands[n].config.clip_region = true; } @@ -382,25 +431,25 @@ namespace rsx m_is_compiled = false; } - void overlay_element::set_text(const std::string& text) + void overlay_element::set_text(std::string_view text) { std::u32string new_text = utf8_to_u32string(text); const bool is_dirty = this->text != new_text; - this->text = std::move(new_text); if (is_dirty) { + this->text = std::move(new_text); m_is_compiled = false; } } - void overlay_element::set_unicode_text(const std::u32string& text) + void overlay_element::set_unicode_text(std::u32string_view text) { const bool is_dirty = this->text != text; - this->text = text; if (is_dirty) { + this->text = text; m_is_compiled = false; } } @@ -585,6 +634,25 @@ namespace rsx return result; } + void overlay_element::configure_sdf(compiled_resource::command_config& config, sdf_function func) + { + const f32 rx = static_cast(x) + padding_left; + const f32 rw = static_cast(w) - (padding_left + padding_right); + const f32 ry = static_cast(y) + padding_top; + const f32 rh = static_cast(h) - (padding_top + padding_bottom); + + config.sdf_config.func = func; + config.sdf_config.cx = rx + (rw / 2.f); + config.sdf_config.cy = ry + (rh / 2.f); + config.sdf_config.hx = rw / 2.f; + config.sdf_config.hy = rh / 2.f; + config.sdf_config.br = 0.f; + config.sdf_config.bw = border_size; + config.sdf_config.border_color = border_color; + + config.disable_vertex_snap = true; + } + compiled_resource& overlay_element::get_compiled() { if (is_compiled()) @@ -609,6 +677,14 @@ namespace rsx config.pulse_sinus_offset = pulse_sinus_offset; config.pulse_speed_modifier = pulse_speed_modifier; + if (border_size != 0 && + border_color.a > 0.f && + w > border_size && + h > border_size) + { + configure_sdf(config, sdf_function::box); + } + auto& verts = compiled_resources_temp.draw_commands.front().verts; verts.resize(4); @@ -1095,82 +1171,33 @@ namespace rsx return compiled_resources; } -#ifdef __APPLE__ - if (true) -#else - if (radius == 0 || radius > (w / 2)) -#endif + overlay_element::get_compiled(); + auto& config = compiled_resources.draw_commands.front().config; + configure_sdf(config, sdf_function::rounded_box); + config.sdf_config.br = std::min({ static_cast(border_radius), config.sdf_config.hx, config.sdf_config.hy }); + + m_is_compiled = true; + return compiled_resources; + } + + compiled_resource& ellipse::get_compiled() + { + if (is_compiled()) + { + return compiled_resources; + } + + compiled_resources.clear(); + + if (!is_visible()) { - // Invalid radius - compiled_resources = overlay_element::get_compiled(); m_is_compiled = true; return compiled_resources; } - compiled_resource compiled_resources_temp = {}; - compiled_resources_temp.append({}); // Bg horizontal mid - compiled_resources_temp.append({}); // Bg horizontal top - compiled_resources_temp.append({}); // Bg horizontal bottom - compiled_resources_temp.append({}); // Bg upper-left - compiled_resources_temp.append({}); // Bg lower-left - compiled_resources_temp.append({}); // Bg upper-right - compiled_resources_temp.append({}); // Bg lower-right - - for (auto& draw_cmd : compiled_resources_temp.draw_commands) - { - auto& config = draw_cmd.config; - config.color = back_color; - config.disable_vertex_snap = true; - config.pulse_glow = pulse_effect_enabled; - config.pulse_sinus_offset = pulse_sinus_offset; - config.pulse_speed_modifier = pulse_speed_modifier; - } - - auto& bg0 = compiled_resources_temp.draw_commands[0]; - auto& bg1 = compiled_resources_temp.draw_commands[1]; - auto& bg2 = compiled_resources_temp.draw_commands[2]; - - bg0.verts.emplace_back(f32(x), f32(y + radius), 0.f, 0.f); - bg0.verts.emplace_back(f32(x + w), f32(y + radius), 0.f, 0.f); - bg0.verts.emplace_back(f32(x), f32(y + h) - radius, 0.f, 0.f); - bg0.verts.emplace_back(f32(x + w), f32(y + h) - radius, 0.f, 0.f); - - bg1.verts.emplace_back(f32(x + radius), f32(y), 0.f, 0.f); - bg1.verts.emplace_back(f32(x + w) - radius, f32(y), 0.f, 0.f); - bg1.verts.emplace_back(f32(x + radius), f32(y + radius), 0.f, 0.f); - bg1.verts.emplace_back(f32(x + w) - radius, f32(y + radius), 0.f, 0.f); - - bg2.verts.emplace_back(f32(x + radius), f32(y + h) - radius, 0.f, 0.f); - bg2.verts.emplace_back(f32(x + w) - radius, f32(y + h) - radius, 0.f, 0.f); - bg2.verts.emplace_back(f32(x + radius), f32(y + h), 0.f, 0.f); - bg2.verts.emplace_back(f32(x + w) - radius, f32(y + h), 0.f, 0.f); - - // Generate the quadrants - const f32 corners[4][2] = - { - { f32(x + radius), f32(y + radius) }, - { f32(x + radius), f32(y + h) - radius }, - { f32(x + w) - radius, f32(y + radius) }, - { f32(x + w) - radius, f32(y + h) - radius } - }; - - const f32 radius_f = static_cast(radius); - const f32 scale[4][2] = - { - { -radius_f, -radius_f }, - { -radius_f, +radius_f }, - { +radius_f, -radius_f }, - { +radius_f, +radius_f } - }; - - for (int i = 0; i < 4; ++i) - { - auto& command = compiled_resources_temp.draw_commands[i + 3]; - command.config.primitives = rsx::overlays::primitive_type::triangle_fan; - command.verts = generate_unit_quadrant(num_control_points, corners[i], scale[i]); - } - - compiled_resources.add(std::move(compiled_resources_temp), margin_left, margin_top); + rounded_rect::get_compiled(); + auto& config = compiled_resources.draw_commands.front().config; + configure_sdf(config, sdf_function::ellipse); m_is_compiled = true; return compiled_resources; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_controls.h b/rpcs3/Emu/RSX/Overlays/overlay_controls.h index dcfe33b199..8fa835f595 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_controls.h +++ b/rpcs3/Emu/RSX/Overlays/overlay_controls.h @@ -31,6 +31,14 @@ namespace rsx triangle_fan = 4 }; + enum class sdf_function : u8 + { + none = 0, + ellipse, + box, + rounded_box, + }; + struct image_info_base { int w = 0, h = 0, channels = 0; @@ -95,6 +103,25 @@ namespace rsx struct compiled_resource { + struct sdf_config_t + { + sdf_function func = sdf_function::none; + + f32 cx; // Center x + f32 cy; // Center y + f32 hx; // Half-size in X + f32 hy; // Half-size in Y + f32 br; // Border radius + f32 bw; // Border width + + color4f border_color; + + // Transform a SDF definition from one reference frame to another + // Target viewport - your actual render area + // Virtual viewport - the internal design viewport + void transform(const areaf& target_viewport, const sizef& virtual_viewport); + }; + struct command_config { primitive_type primitives = primitive_type::quad_list; @@ -105,6 +132,8 @@ namespace rsx f32 pulse_sinus_offset = 0.0f; // The current pulse offset f32 pulse_speed_modifier = 0.005f; + sdf_config_t sdf_config; + areaf clip_rect = {}; bool clip_region = false; @@ -171,6 +200,9 @@ namespace rsx f32 pulse_sinus_offset = 0.0f; // The current pulse offset f32 pulse_speed_modifier = 0.005f; + u8 border_size = 0; + color4f border_color = { 0.f, 0.f, 0.f, 1.f }; + // Analog to command_config::get_sinus_value // Apply modifier for sinus pulse. Resets the pulse. For example: // 0 -> reset to 0.5 rising @@ -210,8 +242,8 @@ namespace rsx // NOTE: Functions as a simple position offset. Top left corner is the anchor. virtual void set_margin(u16 left, u16 top); virtual void set_margin(u16 margin); - virtual void set_text(const std::string& text); - virtual void set_unicode_text(const std::u32string& text); + virtual void set_text(std::string_view text); + virtual void set_unicode_text(std::u32string_view text); void set_text(localized_string_id id); void set_text(const localized_string& container); virtual void set_font(const char* font_name, u16 font_size); @@ -237,6 +269,8 @@ namespace rsx protected: bool m_is_compiled = false; // Only use m_is_compiled as a getter in is_compiled() if possible + + void configure_sdf(compiled_resource::command_config& config, sdf_function func); }; struct layout_container : public overlay_element @@ -316,13 +350,18 @@ namespace rsx struct rounded_rect : public overlay_element { - u8 radius = 5; - u8 num_control_points = 8; // Smoothness control + u16 border_radius = 5; using overlay_element::overlay_element; compiled_resource& get_compiled() override; }; + struct ellipse : public rounded_rect + { + using rounded_rect::rounded_rect; + compiled_resource& get_compiled() override; + }; + struct image_view : public overlay_element { protected: diff --git a/rpcs3/Emu/RSX/Overlays/overlay_edit_text.cpp b/rpcs3/Emu/RSX/Overlays/overlay_edit_text.cpp index ba3d138f96..4fad1f65e9 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_edit_text.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_edit_text.cpp @@ -95,12 +95,12 @@ namespace rsx } } - void edit_text::set_text(const std::string& text) + void edit_text::set_text(std::string_view text) { set_unicode_text(utf8_to_u32string(text)); } - void edit_text::set_unicode_text(const std::u32string& text) + void edit_text::set_unicode_text(std::u32string_view text) { value = text; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_edit_text.hpp b/rpcs3/Emu/RSX/Overlays/overlay_edit_text.hpp index 624580e4f4..7d4f6d93b5 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_edit_text.hpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_edit_text.hpp @@ -26,8 +26,8 @@ namespace rsx using label::label; - void set_text(const std::string& text) override; - void set_unicode_text(const std::u32string& text) override; + void set_text(std::string_view text) override; + void set_unicode_text(std::u32string_view text) override; void set_placeholder(const std::u32string& placeholder_text); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_list_view.cpp b/rpcs3/Emu/RSX/Overlays/overlay_list_view.cpp index 23c45d29bb..943da6271b 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_list_view.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_list_view.cpp @@ -17,7 +17,7 @@ namespace rsx scroll_indicator_grip->set_pos(1, 0); scroll_indicator_grip->set_size(5, 5); - scroll_indicator_grip->radius = 2; + scroll_indicator_grip->border_radius = 2; scroll_indicator_track->set_size(7, height); m_scroll_indicator = std::make_unique(); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_loading_icon.hpp b/rpcs3/Emu/RSX/Overlays/overlay_loading_icon.hpp index 28db156e02..926e5457da 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_loading_icon.hpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_loading_icon.hpp @@ -27,8 +27,8 @@ namespace rsx m_frame_width = m_frame_height = 24; m_spacing_x = m_spacing_y = 6; - set_size(24, 30); - set_padding(4, 0, 2, 8); + set_size(24, 24); + set_padding(4, 0, 4, 0); } }; } diff --git a/rpcs3/Emu/RSX/Overlays/overlay_manager.cpp b/rpcs3/Emu/RSX/Overlays/overlay_manager.cpp index 9ffa9b14e3..70f933fbaa 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_manager.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_manager.cpp @@ -68,7 +68,7 @@ namespace rsx m_list_mutex.unlock_shared(); } - std::shared_ptr display_manager::get(u32 uid) + std::shared_ptr display_manager::get(u32 uid) const { reader_lock lock(m_list_mutex); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_manager.h b/rpcs3/Emu/RSX/Overlays/overlay_manager.h index e42f3721b3..be244b1997 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_manager.h +++ b/rpcs3/Emu/RSX/Overlays/overlay_manager.h @@ -21,7 +21,7 @@ namespace rsx std::vector> m_iface_list; std::vector> m_dirty_list; - shared_mutex m_list_mutex; + mutable shared_mutex m_list_mutex; lf_queue m_uids_to_remove; lf_queue m_type_ids_to_remove; atomic_t m_pending_removals_count = 0; @@ -130,11 +130,11 @@ namespace rsx void dispose(const std::vector& uids); // Returns pointer to the object matching the given uid - std::shared_ptr get(u32 uid); + std::shared_ptr get(u32 uid) const; // Returns pointer to the first object matching the given type template - std::shared_ptr get() + std::shared_ptr get() const { reader_lock lock(m_list_mutex); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_message.cpp b/rpcs3/Emu/RSX/Overlays/overlay_message.cpp index f4de82949f..1c49401d02 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_message.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_message.cpp @@ -31,7 +31,7 @@ namespace rsx m_visible_duration = expiration; m_refs = std::move(refs); - m_text.set_font("Arial", 14); + m_text.set_font("Arial", 12); m_text.set_text(msg_id); m_text.set_padding(4, 8, 4, 8); m_text.auto_resize(); @@ -90,7 +90,7 @@ namespace rsx return m_loc_id == id; } - bool message_item::text_matches(const std::u32string& text) const + bool message_item::text_matches(std::u32string_view text) const { return m_text.text == text; } diff --git a/rpcs3/Emu/RSX/Overlays/overlay_message.h b/rpcs3/Emu/RSX/Overlays/overlay_message.h index 219103e843..e8cb1a3285 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_message.h +++ b/rpcs3/Emu/RSX/Overlays/overlay_message.h @@ -31,7 +31,7 @@ namespace rsx compiled_resource& get_compiled() override; bool id_matches(localized_string_id id) const; - bool text_matches(const std::u32string& text) const; + bool text_matches(std::u32string_view text) const; void set_label_text(const std::string& text); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp index 8b68357208..635833c9d8 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp @@ -6,6 +6,7 @@ #include "Emu/Cell/PPUThread.h" #include +#include #include #include "util/cpu_stats.hpp" @@ -93,7 +94,6 @@ namespace rsx { // left, top, right, bottom const areau padding { m_padding, m_padding - std::min(4, m_padding), m_padding, m_padding }; - const positionu margin { m_margin_x, m_margin_y }; positionu pos; u16 graph_width = 0; @@ -116,6 +116,26 @@ namespace rsx graph_height += m_padding; } + const u16 overlay_width = std::max(m_body.w, graph_width); + const u16 overlay_height = static_cast(m_body.h + graph_height); + const auto percent_to_margin_px = [](f32 margin_percent, u16 virtual_size, u16 overlay_size) -> u32 + { + if (overlay_size >= virtual_size) + { + return 0; + } + + const u32 max_margin = virtual_size - overlay_size; + const u32 margin_px = static_cast(std::lround((std::clamp(margin_percent, 0.0f, 100.0f) / 100.0f) * max_margin)); + return std::min(margin_px, max_margin); + }; + + const positionu margin + { + percent_to_margin_px(m_margin_x, m_virtual_width, overlay_width), + percent_to_margin_px(m_margin_y, m_virtual_height, overlay_height) + }; + switch (m_quadrant) { case screen_quadrant::top_left: @@ -123,27 +143,27 @@ namespace rsx pos.y = margin.y; break; case screen_quadrant::top_right: - pos.x = virtual_width - std::max(m_body.w, graph_width) - margin.x; + pos.x = m_virtual_width - overlay_width - margin.x; pos.y = margin.y; break; case screen_quadrant::bottom_left: pos.x = margin.x; - pos.y = virtual_height - m_body.h - graph_height - margin.y; + pos.y = m_virtual_height - overlay_height - margin.y; break; case screen_quadrant::bottom_right: - pos.x = virtual_width - std::max(m_body.w, graph_width) - margin.x; - pos.y = virtual_height - m_body.h - graph_height - margin.y; + pos.x = m_virtual_width - overlay_width - margin.x; + pos.y = m_virtual_height - overlay_height - margin.y; break; } if (m_center_x) { - pos.x = (virtual_width - std::max(m_body.w, graph_width)) / 2; + pos.x = overlay_width >= m_virtual_width ? 0 : (m_virtual_width - overlay_width) / 2; } if (m_center_y) { - pos.y = (virtual_height - m_body.h - graph_height) / 2; + pos.y = overlay_height >= m_virtual_height ? 0 : (m_virtual_height - overlay_height) / 2; } elm.set_pos(pos.x, pos.y); @@ -381,7 +401,7 @@ namespace rsx m_force_repaint = true; } - void perf_metrics_overlay::set_margins(u32 margin_x, u32 margin_y, bool center_x, bool center_y) + void perf_metrics_overlay::set_margins(f32 margin_x, f32 margin_y, bool center_x, bool center_y) { if (m_margin_x == margin_x && m_margin_y == margin_y && m_center_x == center_x && m_center_y == center_y) return; @@ -431,6 +451,38 @@ namespace rsx m_force_update = true; } + void perf_metrics_overlay::set_render_viewport(u16 width, u16 height) + { + u16 new_virtual_width = virtual_width; + u16 new_virtual_height = virtual_height; + + if (use_window_space && width > 0 && height > 0) + { + const double scale_x = static_cast(width) / virtual_width; + const double scale_y = static_cast(height) / virtual_height; + const double scale = std::min(scale_x, scale_y); + + new_virtual_width = static_cast(std::min( + static_cast(std::lround(width / scale)), + std::numeric_limits::max())); + + new_virtual_height = static_cast(std::min( + static_cast(std::lround(height / scale)), + std::numeric_limits::max())); + } + + if (m_virtual_width == new_virtual_width && m_virtual_height == new_virtual_height) + return; + + m_virtual_width = new_virtual_width; + m_virtual_height = new_virtual_height; + + if (m_is_initialised) + { + reset_transforms(); + } + } + void perf_metrics_overlay::update(u64 /*timestamp_us*/) { const auto elapsed_update = m_update_timer.GetElapsedTimeInMilliSec(); @@ -896,7 +948,8 @@ namespace rsx perf_overlay->set_update_interval(perf_settings.update_interval); perf_overlay->set_font(perf_settings.font); perf_overlay->set_font_size(perf_settings.font_size); - perf_overlay->set_margins(perf_settings.margin_x, perf_settings.margin_y, perf_settings.center_x.get(), perf_settings.center_y.get()); + perf_overlay->set_margins(static_cast(perf_settings.margin_x.get()), static_cast(perf_settings.margin_y.get()), perf_settings.center_x.get(), perf_settings.center_y.get()); + perf_overlay->use_window_space = perf_settings.perf_overlay_use_window_space.get(); perf_overlay->set_opacity(perf_settings.opacity / 100.f); perf_overlay->set_body_colors(perf_settings.color_body, perf_settings.background_body); perf_overlay->set_title_colors(perf_settings.color_title, perf_settings.background_title); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.h b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.h index 4a121b5231..2b676e591b 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.h +++ b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.h @@ -37,10 +37,12 @@ namespace rsx u32 m_frames{}; std::string m_font{}; u16 m_font_size{}; - u32 m_margin_x{}; // horizontal distance to the screen border relative to the screen_quadrant in px - u32 m_margin_y{}; // vertical distance to the screen border relative to the screen_quadrant in px + f32 m_margin_x{}; // horizontal distance to the screen border relative to the screen_quadrant in percent of the window width + f32 m_margin_y{}; // vertical distance to the screen border relative to the screen_quadrant in percent of the window height u32 m_padding{}; // space between overlay elements f32 m_opacity{}; // 0..1 + u16 m_virtual_width{virtual_width}; + u16 m_virtual_height{virtual_height}; bool m_center_x{}; // center the overlay horizontally bool m_center_y{}; // center the overlay vertically @@ -96,11 +98,14 @@ namespace rsx void set_update_interval(u32 update_interval); void set_font(std::string font); void set_font_size(u16 font_size); - void set_margins(u32 margin_x, u32 margin_y, bool center_x, bool center_y); + void set_margins(f32 margin_x, f32 margin_y, bool center_x, bool center_y); void set_opacity(f32 opacity); void set_body_colors(std::string color, std::string background); void set_title_colors(std::string color, std::string background); void force_next_update(); + void set_render_viewport(u16 width, u16 height) override; + u16 get_virtual_width() const override { return m_virtual_width; } + u16 get_virtual_height() const override { return m_virtual_height; } void update(u64 timestamp_us) override; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.cpp b/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.cpp index 860f54544a..b9140f2a69 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.cpp @@ -58,7 +58,7 @@ namespace rsx set_pos(x + dx, y + dy); } - void progress_bar::set_text(const std::string& str) + void progress_bar::set_text(std::string_view str) { text_view.set_text(str); text_view.align_text(text_align::center); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.hpp b/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.hpp index 6ed6b73c77..d622796dae 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.hpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.hpp @@ -24,7 +24,7 @@ namespace rsx void set_pos(s16 _x, s16 _y) override; void set_size(u16 _w, u16 _h) override; void translate(s16 dx, s16 dy) override; - void set_text(const std::string& str) override; + void set_text(std::string_view str) override; compiled_resource& get_compiled() override; }; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_select.cpp b/rpcs3/Emu/RSX/Overlays/overlay_select.cpp index 5ef6eb254c..6e0d4a4c6c 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_select.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_select.cpp @@ -144,7 +144,7 @@ namespace rsx::overlays auto background = std::make_unique(); background->set_size(w, h); - background->radius = std::min(h / 4, 5); + background->border_radius = std::min(h / 4, 5); background->back_color = color4f(0.3f, 0.3f, 0.3f, 1.0f); const u16 arrow_size = std::min(h / 2, max_dropdown_arrow_dimension); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_slider.cpp b/rpcs3/Emu/RSX/Overlays/overlay_slider.cpp index bf00563ef5..4a0d607212 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_slider.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_slider.cpp @@ -8,7 +8,7 @@ namespace rsx::overlays constexpr u16 slider_indicator_radius = 8; constexpr u16 slider_indicator_dia = slider_indicator_radius * 2; constexpr const char* slider_label_font_family = "Arial"; - constexpr int slider_label_font_size = 10; + constexpr int slider_label_font_size = 11; void slider::init() { @@ -29,25 +29,27 @@ namespace rsx::overlays // Base components auto background = std::make_unique(); auto foreground = std::make_unique(); - auto indicator = std::make_unique(); + auto indicator = std::make_unique(); auto value_label = std::make_unique