diff --git a/.ci/build-mac-arm64.sh b/.ci/build-mac-arm64.sh index 65772708b0..4d46cac7ef 100755 --- a/.ci/build-mac-arm64.sh +++ b/.ci/build-mac-arm64.sh @@ -9,9 +9,9 @@ export HOMEBREW_NO_INSTALL_CLEANUP=1 brew install -f --overwrite --quiet ccache pipenv googletest ffmpeg@5 "llvm@$LLVM_COMPILER_VER" glew sdl3 vulkan-headers brew link -f --quiet "llvm@$LLVM_COMPILER_VER" ffmpeg@5 -# moltenvk based on commit for 1.3.0 release +# moltenvk based on commit for 1.4.0 release export HOMEBREW_DEVELOPER=1 # Prevents blocking of local formulae -wget https://raw.githubusercontent.com/Homebrew/homebrew-core/7255441cbcafabaa8950f67c7ec55ff499dbb2d3/Formula/m/molten-vk.rb +wget https://raw.githubusercontent.com/Homebrew/homebrew-core/ea2bec5f1f4384e188d7fc0702ab21a20a2ced08/Formula/m/molten-vk.rb /opt/homebrew/bin/brew install -f --overwrite --formula --quiet ./molten-vk.rb export HOMEBREW_DEVELOPER=0 diff --git a/.ci/build-mac.sh b/.ci/build-mac.sh index f5a74c303f..202e2f3a86 100755 --- a/.ci/build-mac.sh +++ b/.ci/build-mac.sh @@ -15,9 +15,9 @@ arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebr arch -x86_64 /usr/local/bin/brew install -f --overwrite --quiet ffmpeg@5 "llvm@$LLVM_COMPILER_VER" glew sdl3 vulkan-headers arch -x86_64 /usr/local/bin/brew link -f --overwrite --quiet "llvm@$LLVM_COMPILER_VER" ffmpeg@5 -# moltenvk based on commit for 1.3.0 release +# moltenvk based on commit for 1.4.0 release export HOMEBREW_DEVELOPER=1 # Prevents blocking of local formulae -wget https://raw.githubusercontent.com/Homebrew/homebrew-core/7255441cbcafabaa8950f67c7ec55ff499dbb2d3/Formula/m/molten-vk.rb +wget https://raw.githubusercontent.com/Homebrew/homebrew-core/ea2bec5f1f4384e188d7fc0702ab21a20a2ced08/Formula/m/molten-vk.rb arch -x86_64 /usr/local/bin/brew install -f --overwrite --formula --quiet ./molten-vk.rb export HOMEBREW_DEVELOPER=0 export CXX=clang++ diff --git a/.ci/deploy-linux.sh b/.ci/deploy-linux.sh index fe6174fc78..b0599400d2 100755 --- a/.ci/deploy-linux.sh +++ b/.ci/deploy-linux.sh @@ -32,6 +32,25 @@ if [ "$DEPLOY_APPIMAGE" = "true" ]; then # Remove git directory containing local commit history file rm -rf ./AppDir/usr/share/rpcs3/git + # Download translations + mkdir -p "./AppDir/usr/translations" + ZIP_URL=$(curl -fsSL "https://api.github.com/repos/RPCS3/rpcs3_translations/releases/latest" \ + | grep "browser_download_url" \ + | grep "RPCS3-languages.zip" \ + | cut -d '"' -f 4) + if [ -z "$ZIP_URL" ]; then + echo "Failed to find RPCS3-languages.zip in the latest release. Continuing without translations." + else + echo "Downloading translations from: $ZIP_URL" + curl -L -o translations.zip "$ZIP_URL" || { + echo "Failed to download translations.zip. Continuing without translations." + exit 0 + } + unzip -o translations.zip -d "./AppDir/usr/translations" >/dev/null 2>&1 || \ + echo "Failed to extract translations.zip. Continuing without translations." + rm -f translations.zip + fi + curl -fsSLo /uruntime "https://github.com/VHSgunzo/uruntime/releases/download/v0.3.4/uruntime-appimage-dwarfs-$CPU_ARCH" chmod +x /uruntime /uruntime --appimage-mkdwarfs -f --set-owner 0 --set-group 0 --no-history --no-create-timestamp \ diff --git a/.ci/deploy-mac-arm64.sh b/.ci/deploy-mac-arm64.sh index d7876c93bf..6e067bde9f 100755 --- a/.ci/deploy-mac-arm64.sh +++ b/.ci/deploy-mac-arm64.sh @@ -31,6 +31,25 @@ rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \ ../../.ci/optimize-mac.sh rpcs3.app +# Download translations +mkdir -p "rpcs3.app/Contents/translations" +ZIP_URL=$(curl -fsSL "https://api.github.com/repos/RPCS3/rpcs3_translations/releases/latest" \ + | grep "browser_download_url" \ + | grep "RPCS3-languages.zip" \ + | cut -d '"' -f 4) +if [ -z "$ZIP_URL" ]; then + echo "Failed to find RPCS3-languages.zip in the latest release. Continuing without translations." +else + echo "Downloading translations from: $ZIP_URL" + curl -L -o translations.zip "$ZIP_URL" || { + echo "Failed to download translations.zip. Continuing without translations." + exit 0 + } + unzip -o translations.zip -d "rpcs3.app/Contents/translations" >/dev/null 2>&1 || \ + echo "Failed to extract translations.zip. Continuing without translations." + rm -f translations.zip +fi + # Hack install_name_tool -delete_rpath /opt/homebrew/lib RPCS3.app/Contents/MacOS/rpcs3 || echo "Hack for deleting rpath /opt/homebrew/lib not needed" install_name_tool -delete_rpath /opt/homebrew/opt/llvm@$LLVM_COMPILER_VER/lib RPCS3.app/Contents/MacOS/rpcs3 || echo "Hack for deleting rpath /opt/homebrew/opt/llvm@$LLVM_COMPILER_VER/lib not needed" diff --git a/.ci/deploy-mac.sh b/.ci/deploy-mac.sh index 15348df05a..4b54267539 100755 --- a/.ci/deploy-mac.sh +++ b/.ci/deploy-mac.sh @@ -32,6 +32,25 @@ rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \ ../../.ci/optimize-mac.sh rpcs3.app +# Download translations +mkdir -p "rpcs3.app/Contents/translations" +ZIP_URL=$(curl -fsSL "https://api.github.com/repos/RPCS3/rpcs3_translations/releases/latest" \ + | grep "browser_download_url" \ + | grep "RPCS3-languages.zip" \ + | cut -d '"' -f 4) +if [ -z "$ZIP_URL" ]; then + echo "Failed to find RPCS3-languages.zip in the latest release. Continuing without translations." +else + echo "Downloading translations from: $ZIP_URL" + curl -L -o translations.zip "$ZIP_URL" || { + echo "Failed to download translations.zip. Continuing without translations." + exit 0 + } + unzip -o translations.zip -d "rpcs3.app/Contents/translations" >/dev/null 2>&1 || \ + echo "Failed to extract translations.zip. Continuing without translations." + rm -f translations.zip +fi + # Need to do this rename hack due to case insensitive filesystem mv rpcs3.app RPCS3_.app mv RPCS3_.app RPCS3.app diff --git a/.ci/deploy-windows-clang.sh b/.ci/deploy-windows-clang.sh index d45cb45acf..c95f82e7b8 100644 --- a/.ci/deploy-windows-clang.sh +++ b/.ci/deploy-windows-clang.sh @@ -24,6 +24,25 @@ mkdir ./bin/config/input_configs curl -fsSL 'https://raw.githubusercontent.com/gabomdq/SDL_GameControllerDB/master/gamecontrollerdb.txt' 1> ./bin/config/input_configs/gamecontrollerdb.txt curl -fsSL 'https://rpcs3.net/compatibility?api=v1&export' | iconv -t UTF-8 1> ./bin/GuiConfigs/compat_database.dat +# Download translations +mkdir -p ./bin/share/qt6/translations +ZIP_URL=$(curl -fsSL "https://api.github.com/repos/RPCS3/rpcs3_translations/releases/latest" \ + | grep "browser_download_url" \ + | grep "RPCS3-languages.zip" \ + | cut -d '"' -f 4) +if [ -z "$ZIP_URL" ]; then + echo "Failed to find RPCS3-languages.zip in the latest release. Continuing without translations." +else + echo "Downloading translations from: $ZIP_URL" + curl -L -o translations.zip "$ZIP_URL" || { + echo "Failed to download translations.zip. Continuing without translations." + exit 0 + } + unzip -o translations.zip -d "./bin/share/qt6/translations" >/dev/null 2>&1 || \ + echo "Failed to extract translations.zip. Continuing without translations." + rm -f translations.zip +fi + # Package artifacts 7z a -m0=LZMA2 -mx9 "$BUILD" ./bin/* diff --git a/.ci/deploy-windows.sh b/.ci/deploy-windows.sh index b885831511..069f8fb637 100755 --- a/.ci/deploy-windows.sh +++ b/.ci/deploy-windows.sh @@ -15,6 +15,25 @@ mkdir ./bin/config/input_configs curl -fsSL 'https://raw.githubusercontent.com/gabomdq/SDL_GameControllerDB/master/gamecontrollerdb.txt' 1> ./bin/config/input_configs/gamecontrollerdb.txt curl -fsSL 'https://rpcs3.net/compatibility?api=v1&export' | iconv -t UTF-8 1> ./bin/GuiConfigs/compat_database.dat +# Download translations +mkdir -p ./bin/qt6/translations +ZIP_URL=$(curl -fsSL "https://api.github.com/repos/RPCS3/rpcs3_translations/releases/latest" \ + | grep "browser_download_url" \ + | grep "RPCS3-languages.zip" \ + | cut -d '"' -f 4) +if [ -z "$ZIP_URL" ]; then + echo "Failed to find RPCS3-languages.zip in the latest release. Continuing without translations." +else + echo "Downloading translations from: $ZIP_URL" + curl -L -o translations.zip "$ZIP_URL" || { + echo "Failed to download translations.zip. Continuing without translations." + exit 0 + } + unzip -o translations.zip -d "./bin/qt6/translations" >/dev/null 2>&1 || \ + echo "Failed to extract translations.zip. Continuing without translations." + rm -f translations.zip +fi + # Download SSL certificate (not needed with CURLSSLOPT_NATIVE_CA) #curl -fsSL 'https://curl.haxx.se/ca/cacert.pem' 1> ./bin/cacert.pem diff --git a/3rdparty/FAudio b/3rdparty/FAudio index ba876ce3be..8de3616b5b 160000 --- a/3rdparty/FAudio +++ b/3rdparty/FAudio @@ -1 +1 @@ -Subproject commit ba876ce3be73eabd7094fa276a751ede8328b608 +Subproject commit 8de3616b5b204260fe639e76587731d8a73b8d2c diff --git a/3rdparty/MoltenVK/CMakeLists.txt b/3rdparty/MoltenVK/CMakeLists.txt index ec9c2b802b..a4d8b02a8c 100644 --- a/3rdparty/MoltenVK/CMakeLists.txt +++ b/3rdparty/MoltenVK/CMakeLists.txt @@ -3,7 +3,7 @@ include(ExternalProject) ExternalProject_Add(moltenvk GIT_REPOSITORY https://github.com/KhronosGroup/MoltenVK.git - GIT_TAG 49b97f2 + GIT_TAG 4588705 BUILD_IN_SOURCE 1 SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK CONFIGURE_COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/fetchDependencies" --macos diff --git a/3rdparty/curl/curl b/3rdparty/curl/curl index 11b991232f..400fffa90f 160000 --- a/3rdparty/curl/curl +++ b/3rdparty/curl/curl @@ -1 +1 @@ -Subproject commit 11b991232fbcaa88e2b1faecac224416b0001e35 +Subproject commit 400fffa90f30c7a2dc762fa33009d24851bd2016 diff --git a/3rdparty/curl/libcurl.vcxproj b/3rdparty/curl/libcurl.vcxproj index e39805855d..dae28be346 100644 --- a/3rdparty/curl/libcurl.vcxproj +++ b/3rdparty/curl/libcurl.vcxproj @@ -80,10 +80,12 @@ + + @@ -92,9 +94,9 @@ - + @@ -120,7 +122,6 @@ - @@ -147,7 +148,6 @@ - @@ -159,6 +159,7 @@ + @@ -203,6 +204,7 @@ + @@ -225,7 +227,6 @@ - @@ -274,10 +275,12 @@ + + @@ -287,14 +290,13 @@ - + - @@ -325,7 +327,6 @@ - @@ -357,6 +358,7 @@ + @@ -403,6 +405,7 @@ + diff --git a/3rdparty/curl/libcurl.vcxproj.filters b/3rdparty/curl/libcurl.vcxproj.filters index 4bd52daddf..17f760c54b 100644 --- a/3rdparty/curl/libcurl.vcxproj.filters +++ b/3rdparty/curl/libcurl.vcxproj.filters @@ -42,9 +42,6 @@ Source Files - - Source Files - Source Files @@ -159,9 +156,6 @@ Source Files - - Source Files - Source Files @@ -333,9 +327,6 @@ Source Files - - Source Files - Source Files @@ -396,9 +387,6 @@ Source Files - - Source Files - Source Files @@ -546,6 +534,21 @@ Source Files + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + @@ -623,9 +626,6 @@ Header Files - - Header Files - Header Files @@ -926,9 +926,6 @@ Header Files - - Header Files - Header Files @@ -953,9 +950,6 @@ Header Files - - Header Files - Header Files @@ -1112,6 +1106,21 @@ Header Files + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + diff --git a/3rdparty/libsdl-org/SDL b/3rdparty/libsdl-org/SDL index a8589a8422..badbf8da4e 160000 --- a/3rdparty/libsdl-org/SDL +++ b/3rdparty/libsdl-org/SDL @@ -1 +1 @@ -Subproject commit a8589a84226a6202831a3d49ff4edda4acab9acd +Subproject commit badbf8da4ee72b3ef599c721ffc9899e8d7c8d90 diff --git a/3rdparty/libsdl-org/SDL.vcxproj b/3rdparty/libsdl-org/SDL.vcxproj index 81b7d853a5..f0b38ca09f 100644 --- a/3rdparty/libsdl-org/SDL.vcxproj +++ b/3rdparty/libsdl-org/SDL.vcxproj @@ -384,7 +384,6 @@ - diff --git a/3rdparty/libsdl-org/SDL.vcxproj.filters b/3rdparty/libsdl-org/SDL.vcxproj.filters index d6a29f2faf..5839899c0d 100644 --- a/3rdparty/libsdl-org/SDL.vcxproj.filters +++ b/3rdparty/libsdl-org/SDL.vcxproj.filters @@ -1457,9 +1457,6 @@ sensor\windows - - render - render diff --git a/3rdparty/zstd/CMakeLists.txt b/3rdparty/zstd/CMakeLists.txt index c5c8dd66ff..431272966d 100644 --- a/3rdparty/zstd/CMakeLists.txt +++ b/3rdparty/zstd/CMakeLists.txt @@ -9,7 +9,8 @@ else() option(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" OFF) option(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON) option(ZSTD_BUILD_TESTS "BUILD TESTS" OFF) - add_subdirectory(zstd/build/cmake EXLUDE_FROM_ALL) + + add_subdirectory(zstd/build/cmake EXCLUDE_FROM_ALL) add_library(3rdparty_zstd INTERFACE) target_link_libraries(3rdparty_zstd INTERFACE libzstd_static) endif() diff --git a/Utilities/deferred_op.hpp b/Utilities/deferred_op.hpp new file mode 100644 index 0000000000..7186cea2d4 --- /dev/null +++ b/Utilities/deferred_op.hpp @@ -0,0 +1,27 @@ +#pragma once + +// Generic deferred routine wrapper +// Use-case is similar to "defer" statement in other languages, just invokes a callback when the object goes out of scope + +#include + +namespace utils +{ + template + requires std::is_invocable_v + class deferred_op + { + public: + deferred_op(F&& callback) + : m_callback(callback) + {} + + ~deferred_op() + { + m_callback(); + } + + private: + F m_callback; + }; +} diff --git a/buildfiles/cmake/FindFFMPEG.cmake b/buildfiles/cmake/FindFFMPEG.cmake index f4f62034a8..55e3fd8d88 100644 --- a/buildfiles/cmake/FindFFMPEG.cmake +++ b/buildfiles/cmake/FindFFMPEG.cmake @@ -66,7 +66,7 @@ else () find_package_handle_standard_args(FFMPEG DEFAULT_MSG - FFMPEG_LIBAVCODEC FFMPEG_LIBAVFORMAT FFMPEG_LIBSWSCALE FFMPEG_LIBSWRESAMPLE + FFMPEG_LIBAVCODEC FFMPEG_LIBAVFORMAT FFMPEG_LIBAVUTIL FFMPEG_LIBSWSCALE FFMPEG_LIBSWRESAMPLE ) if (FFMPEG_FOUND) diff --git a/objdump.cpp b/objdump.cpp index 76caef1c9b..7955462557 100644 --- a/objdump.cpp +++ b/objdump.cpp @@ -25,6 +25,7 @@ #include #include #include +#include std::string to_hex(std::uint64_t value, bool prfx = true) { @@ -85,7 +86,7 @@ int main(int argc, char* argv[]) // Decode address and try to find the object std::uint64_t addr = -1; - std::from_chars(arg.data() + strlen("--start-address=0x"), arg.data() + arg.size(), addr, 16); + std::from_chars(arg.data() + ("--start-address=0x"sv).size(), arg.data() + arg.size(), addr, 16); for (int j = 0; j < 0x100'0000; j++) { diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 72d2f22dfd..cd00267288 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -10,16 +10,6 @@ include(CheckFunctionExists) set(CMAKE_CXX_STANDARD 20) -set(ADDITIONAL_LIBS "") -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - #on some Linux distros shm_unlink and similar functions are in librt only - list(APPEND ADDITIONAL_LIBS "rt") -elseif(NOT WIN32 AND NOT CMAKE_CXX_FLAGS MATCHES "LIBICONV_PLUG") - #it seems like glibc includes the iconv functions we use but other libc - #implementations like the one on OSX don't seem implement them - list(APPEND ADDITIONAL_LIBS "iconv") -endif() - if(UNIX AND NOT APPLE AND NOT ANDROID) add_compile_definitions(DATADIR="${CMAKE_INSTALL_FULL_DATADIR}/rpcs3") # Optionally enable X11 for window management @@ -78,8 +68,16 @@ if (NOT ANDROID) 3rdparty::libcurl 3rdparty::zlib 3rdparty::opencv - 3rdparty::fusion - ${ADDITIONAL_LIBS}) + 3rdparty::fusion) + + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + #on some Linux distros shm_unlink and similar functions are in librt only + target_link_libraries(rpcs3_lib PRIVATE rt) + elseif(NOT WIN32 AND NOT CMAKE_CXX_FLAGS MATCHES "LIBICONV_PLUG") + #it seems like glibc includes the iconv functions we use but other libc + #implementations like the one on OSX don't seem implement them + target_link_libraries(rpcs3_lib PRIVATE iconv) + endif() # Unix display manager if(X11_FOUND) @@ -106,19 +104,16 @@ if (NOT ANDROID) endif() # Build rpcs3 executable + add_executable(rpcs3 WIN32 MACOSX_BUNDLE) if(WIN32) - add_executable(rpcs3 WIN32) target_sources(rpcs3 PRIVATE rpcs3.rc) target_compile_definitions(rpcs3 PRIVATE UNICODE _UNICODE) elseif(APPLE) - add_executable(rpcs3 MACOSX_BUNDLE) target_sources(rpcs3 PRIVATE rpcs3.icns update_helper.sh) set_source_files_properties(update_helper.sh PROPERTIES MACOSX_PACKAGE_LOCATION Resources) set_target_properties(rpcs3 PROPERTIES MACOSX_BUNDLE_INFO_PLIST "${CMAKE_CURRENT_SOURCE_DIR}/rpcs3.plist.in") - else() - add_executable(rpcs3) endif() target_sources(rpcs3 @@ -137,17 +132,12 @@ if (NOT ANDROID) # Copy icons to executable directory if(APPLE) - if (CMAKE_BUILD_TYPE MATCHES "Debug" OR CMAKE_BUILD_TYPE MATCHES "RelWithDebInfo") - set(QT_DEPLOY_FLAGS "-no-strip") - else() - set(QT_DEPLOY_FLAGS "") - endif() qt_finalize_target(rpcs3) add_custom_command(TARGET rpcs3 POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/rpcs3.icns $/../Resources/rpcs3.icns COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/Icons $/../Resources/Icons COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/GuiConfigs $/../Resources/GuiConfigs - COMMAND "${MACDEPLOYQT_EXECUTABLE}" "${PROJECT_BINARY_DIR}/bin/rpcs3.app" "${QT_DEPLOY_FLAGS}") + COMMAND "${MACDEPLOYQT_EXECUTABLE}" "${PROJECT_BINARY_DIR}/bin/rpcs3.app" "$<$:-no-strip>") elseif(UNIX) add_custom_command(TARGET rpcs3 POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin/Icons $/Icons diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 6655841278..294241d07e 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -403,6 +403,7 @@ target_sources(rpcs3_emu PRIVATE Io/GunCon3.cpp Io/Infinity.cpp Io/interception.cpp + Io/KamenRider.cpp Io/KeyboardHandler.cpp Io/midi_config_types.cpp Io/mouse_config.cpp diff --git a/rpcs3/Emu/Cell/Modules/cellGem.cpp b/rpcs3/Emu/Cell/Modules/cellGem.cpp index c50c93fc44..f7fbf90fae 100644 --- a/rpcs3/Emu/Cell/Modules/cellGem.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGem.cpp @@ -2284,6 +2284,8 @@ error_code cellGemClearStatusFlags(u32 gem_num, u64 mask) error_code cellGemConvertVideoFinish(ppu_thread& ppu) { + ppu.state += cpu_flag::wait; + cellGem.warning("cellGemConvertVideoFinish()"); auto& gem = g_fxo->get(); diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 0a17de2d0f..3e0959807b 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -1115,7 +1115,24 @@ void PPUTranslator::VCFSX(ppu_opcode_t op) void PPUTranslator::VCFUX(ppu_opcode_t op) { const auto b = get_vr(op.vb); - set_vr(op.vd, fpcast(b) * fsplat(std::pow(2, -static_cast(op.vuimm)))); + +#ifdef ARCH_ARM64 + return set_vr(op.vd, fpcast(b) * fsplat(std::pow(2, -static_cast(op.vuimm)))); +#else + if (m_use_avx512) + { + return set_vr(op.vd, fpcast(b) * fsplat(std::pow(2, -static_cast(op.vuimm)))); + } + + constexpr int bit_shift = 9; + const auto shifted = (b >> bit_shift); + const auto cleared = shifted << bit_shift; + const auto low_bits = b - cleared; + const auto high_part = fpcast(noncast(shifted)) * fsplat(static_cast(1u << bit_shift)); + const auto low_part = fpcast(noncast(low_bits)); + const auto temp = high_part + low_part; + set_vr(op.vd, temp * fsplat(std::pow(2, -static_cast(op.vuimm)))); +#endif } void PPUTranslator::VCMPBFP(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index f5b4b9696d..20ccaf9ec8 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -3844,7 +3844,8 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) } // Writeback of unchanged data. Only check memory change - if (cmp_rdata(rdata, vm::_ref(addr)) && res.compare_and_swap_test(rtime, rtime + 128)) + // For the comparison, load twice for atomicity + if (cmp_rdata(rdata, vm::_ref(addr)) && res == rtime && cmp_rdata(rdata, vm::_ref(addr)) && res.compare_and_swap_test(rtime, rtime + 128)) { raddr = 0; // Disable notification return true; diff --git a/rpcs3/Emu/Cell/lv2/sys_memory.cpp b/rpcs3/Emu/Cell/lv2/sys_memory.cpp index 9f726e994d..fb8c5b51ac 100644 --- a/rpcs3/Emu/Cell/lv2/sys_memory.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_memory.cpp @@ -213,6 +213,8 @@ error_code sys_memory_allocate_from_container(cpu_thread& cpu, u64 size, u32 cid if (alloc_addr) { + sys_memory.notice("sys_memory_allocate_from_container(): Allocated 0x%x address (size=0x%x)", addr, size); + vm::lock_sudo(addr, static_cast(size)); cpu.check_state(); *alloc_addr = addr; diff --git a/rpcs3/Emu/Cell/lv2/sys_usbd.cpp b/rpcs3/Emu/Cell/lv2/sys_usbd.cpp index e9f57dbec6..fd5257b03a 100644 --- a/rpcs3/Emu/Cell/lv2/sys_usbd.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_usbd.cpp @@ -18,6 +18,7 @@ #include "Emu/Io/Skylander.h" #include "Emu/Io/Infinity.h" #include "Emu/Io/Dimensions.h" +#include "Emu/Io/KamenRider.h" #include "Emu/Io/GHLtar.h" #include "Emu/Io/ghltar_config.h" #include "Emu/Io/guncon3_config.h" @@ -175,7 +176,7 @@ private: {0x1430, 0x0150, 0x0150, "Skylanders Portal", &usb_device_skylander::get_num_emu_devices, &usb_device_skylander::make_instance}, {0x0E6F, 0x0129, 0x0129, "Disney Infinity Base", &usb_device_infinity::get_num_emu_devices, &usb_device_infinity::make_instance}, {0x0E6F, 0x0241, 0x0241, "Lego Dimensions Portal", &usb_device_dimensions::get_num_emu_devices, &usb_device_dimensions::make_instance}, - {0x0E6F, 0x200A, 0x200A, "Kamen Rider Summonride Portal", nullptr, nullptr}, + {0x0E6F, 0x200A, 0x200A, "Kamen Rider Summonride Portal", &usb_device_kamen_rider::get_num_emu_devices, &usb_device_kamen_rider::make_instance}, // Cameras // {0x1415, 0x0020, 0x2000, "Sony Playstation Eye", nullptr, nullptr}, // TODO: verifiy diff --git a/rpcs3/Emu/Io/Dimensions.cpp b/rpcs3/Emu/Io/Dimensions.cpp index 158f310bdb..e80a64c0c9 100644 --- a/rpcs3/Emu/Io/Dimensions.cpp +++ b/rpcs3/Emu/Io/Dimensions.cpp @@ -544,9 +544,7 @@ std::optional> dimensions_toypad::pop_added_removed_response( std::lock_guard lock(m_dimensions_mutex); if (m_figure_added_removed_responses.empty()) - { return std::nullopt; - } std::array response = m_figure_added_removed_responses.front(); m_figure_added_removed_responses.pop(); @@ -597,7 +595,6 @@ void usb_device_dimensions::interrupt_transfer(u32 buf_size, u8* buf, u32 endpoi { // Read Endpoint, if a request has not been sent via the write endpoint, set expected result as // EHCI_CC_HALTED so the game doesn't report the Toypad as being disconnected. - std::lock_guard lock(m_query_mutex); std::optional> response = g_dimensionstoypad.pop_added_removed_response(); if (response) { @@ -696,7 +693,6 @@ void usb_device_dimensions::interrupt_transfer(u32 buf_size, u8* buf, u32 endpoi break; } } - std::lock_guard lock(m_query_mutex); m_queries.push(q_result); break; } diff --git a/rpcs3/Emu/Io/Dimensions.h b/rpcs3/Emu/Io/Dimensions.h index e4886c11f1..e2bfbd1e7f 100644 --- a/rpcs3/Emu/Io/Dimensions.h +++ b/rpcs3/Emu/Io/Dimensions.h @@ -79,6 +79,5 @@ public: void isochronous_transfer(UsbTransfer* transfer) override; protected: - shared_mutex m_query_mutex; std::queue> m_queries; }; diff --git a/rpcs3/Emu/Io/Infinity.cpp b/rpcs3/Emu/Io/Infinity.cpp index ff4ce9e85b..b35879207d 100644 --- a/rpcs3/Emu/Io/Infinity.cpp +++ b/rpcs3/Emu/Io/Infinity.cpp @@ -258,13 +258,13 @@ void infinity_base::get_figure_identifier(u8 fig_num, u8 sequence, std::array> infinity_base::pop_added_removed_response() { - return !m_figure_added_removed_responses.empty(); -} + std::lock_guard lock(infinity_mutex); + + if (m_figure_added_removed_responses.empty()) + return std::nullopt; -std::array infinity_base::pop_added_removed_response() -{ std::array response = m_figure_added_removed_responses.front(); m_figure_added_removed_responses.pop(); return response; @@ -399,9 +399,10 @@ void usb_device_infinity::interrupt_transfer(u32 buf_size, u8* buf, u32 endpoint { // Respond after FF command transfer->expected_time = get_timestamp() + 1000; - if (g_infinitybase.has_figure_been_added_removed()) + std::optional> response = g_infinitybase.pop_added_removed_response(); + if (response) { - memcpy(buf, g_infinitybase.pop_added_removed_response().data(), 0x20); + memcpy(buf, response.value().data(), 0x20); } else if (!m_queries.empty()) { diff --git a/rpcs3/Emu/Io/Infinity.h b/rpcs3/Emu/Io/Infinity.h index 2bfcff245f..c8bb4c6e38 100644 --- a/rpcs3/Emu/Io/Infinity.h +++ b/rpcs3/Emu/Io/Infinity.h @@ -3,6 +3,7 @@ #include "Emu/Io/usb_device.h" #include "Utilities/mutex.h" #include +#include #include struct infinity_figure @@ -24,8 +25,7 @@ public: void query_block(u8 fig_num, u8 block, std::array& reply_buf, u8 sequence); void write_block(u8 fig_num, u8 block, const u8* to_write_buf, std::array& reply_buf, u8 sequence); void get_figure_identifier(u8 fig_num, u8 sequence, std::array& reply_buf); - bool has_figure_been_added_removed() const; - std::array pop_added_removed_response(); + std::optional> pop_added_removed_response(); bool remove_figure(u8 position); u32 load_figure(const std::array& buf, fs::file in_file, u8 position); diff --git a/rpcs3/Emu/Io/KamenRider.cpp b/rpcs3/Emu/Io/KamenRider.cpp new file mode 100644 index 0000000000..aaa4836f08 --- /dev/null +++ b/rpcs3/Emu/Io/KamenRider.cpp @@ -0,0 +1,291 @@ +#include "stdafx.h" +#include "KamenRider.h" + +LOG_CHANNEL(kamen_rider_log, "kamen_rider"); + +rider_gate g_ridergate; + +void kamen_rider_figure::save() +{ + if (!kamen_file) + { + kamen_rider_log.error("Tried to save kamen rider figure to file but no kamen rider figure is active!"); + return; + } + kamen_file.seek(0, fs::seek_set); + kamen_file.write(data.data(), 0x14 * 0x10); +} + +u8 rider_gate::generate_checksum(const std::array& data, u32 num_of_bytes) const +{ + ensure(num_of_bytes <= data.size()); + int checksum = 0; + for (u32 i = 0; i < num_of_bytes; i++) + { + checksum += data[i]; + } + return (checksum & 0xFF); +} + +kamen_rider_figure& rider_gate::get_figure_by_uid(const std::array uid) +{ + for (kamen_rider_figure& figure : figures) + { + if (figure.uid == uid) + { + return figure; + } + } + return figures[7]; +} + +void rider_gate::get_blank_response(u8 command, u8 sequence, std::array& reply_buf) +{ + reply_buf = {0x55, 0x02, command, sequence}; + reply_buf[4] = generate_checksum(reply_buf, 4); +} + +void rider_gate::wake_rider_gate(std::array& reply_buf, u8 command, u8 sequence) +{ + std::lock_guard lock(kamen_mutex); + + m_is_awake = true; + reply_buf = {0x55, 0x1a, command, sequence, 0x00, 0x07, 0x00, 0x03, 0x02, + 0x09, 0x20, 0x03, 0xf5, 0x00, 0x19, 0x42, 0x52, 0xb7, + 0xb9, 0xa1, 0xae, 0x2b, 0x88, 0x42, 0x05, 0xfe, 0xe0, 0x1c, 0xac}; +} + +void rider_gate::get_list_tags(std::array& reply_buf, u8 command, u8 sequence) +{ + std::lock_guard lock(kamen_mutex); + + reply_buf = {0x55, 0x02, command, sequence}; + u8 index = 4; + for (const kamen_rider_figure& figure : figures) + { + if (figure.present) + { + reply_buf[index] = 0x09; + memcpy(&reply_buf[index + 1], figure.data.data(), 7); + index += 8; + reply_buf[1] += 8; + } + } + reply_buf[index] = generate_checksum(reply_buf, index); +} + +void rider_gate::query_block(std::array& reply_buf, u8 command, u8 sequence, const u8* uid, u8 sector, u8 block) +{ + std::lock_guard lock(kamen_mutex); + + reply_buf = {0x55, 0x13, command, sequence, 0x00}; + + const std::array uid_array = {uid[0], uid[1], uid[2], uid[3], uid[4], uid[5], uid[6]}; + + const kamen_rider_figure& figure = get_figure_by_uid(uid_array); + if (figure.present) + { + if (sector < 5 && block < 4) + { + memcpy(&reply_buf[5], &figure.data[(sector * 4 * 16) + (block * 16)], 16); + } + } + reply_buf[21] = generate_checksum(reply_buf, 21); +} + +void rider_gate::write_block(std::array& replyBuf, u8 command, u8 sequence, const u8* uid, u8 sector, u8 block, const u8* to_write_buf) +{ + std::lock_guard lock(kamen_mutex); + + const std::array uid_array = {uid[0], uid[1], uid[2], uid[3], uid[4], uid[5], uid[6]}; + + kamen_rider_figure& figure = get_figure_by_uid(uid_array); + if (figure.present) + { + if (sector < 5 && block < 4) + { + memcpy(&figure.data[(sector * 4 * 16) + (block * 16)], to_write_buf, 16); + } + } + + get_blank_response(command, sequence, replyBuf); +} + +std::optional> rider_gate::pop_added_removed_response() +{ + std::lock_guard lock(kamen_mutex); + + if (m_figure_added_removed_responses.empty()) + { + return std::nullopt; + } + + std::array response = m_figure_added_removed_responses.front(); + m_figure_added_removed_responses.pop(); + return response; +} + +bool rider_gate::remove_figure(u8 index) +{ + std::lock_guard lock(kamen_mutex); + + auto& figure = figures[index]; + + if (figure.present) + { + figure.present = false; + figure.save(); + figure.kamen_file.close(); + if (m_is_awake) + { + std::array figure_removed_response = {0x56, 0x09, 0x09, 0x00}; + memcpy(&figure_removed_response[4], figure.uid.data(), figure.uid.size()); + figure_removed_response[11] = generate_checksum(figure_removed_response, 11); + m_figure_added_removed_responses.push(std::move(figure_removed_response)); + } + figure.uid = {}; + return true; + } + + return false; +} + +u8 rider_gate::load_figure(const std::array& buf, fs::file in_file) +{ + std::lock_guard lock(kamen_mutex); + + u8 found_slot = 0xFF; + + // mimics spot retaining on the portal + for (auto i = 0; i < 7; i++) + { + if (!figures[i].present) + { + if (i < found_slot) + { + found_slot = i; + } + } + } + + if (found_slot != 0xFF) + { + auto& figure = figures[found_slot]; + memcpy(figure.data.data(), buf.data(), buf.size()); + figure.kamen_file = std::move(in_file); + figure.uid = {buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6]}; + figure.present = true; + + if (m_is_awake) + { + std::array figure_added_response = {0x56, 0x09, 0x09, 0x01}; + memcpy(&figure_added_response[4], figure.uid.data(), figure.uid.size()); + figure_added_response[11] = generate_checksum(figure_added_response, 11); + m_figure_added_removed_responses.push(std::move(figure_added_response)); + } + } + return found_slot; +} + +usb_device_kamen_rider::usb_device_kamen_rider(const std::array& location) + : usb_device_emulated(location) +{ + device = UsbDescriptorNode(USB_DESCRIPTOR_DEVICE, UsbDeviceDescriptor{0x200, 0x0, 0x0, 0x0, 0x40, 0x0E6F, 0x200A, 0x100, 0x1, 0x2, 0x3, 0x1}); + auto& config0 = device.add_node(UsbDescriptorNode(USB_DESCRIPTOR_CONFIG, UsbDeviceConfiguration{0x29, 0x1, 0x1, 0x0, 0x80, 0xFA})); + config0.add_node(UsbDescriptorNode(USB_DESCRIPTOR_INTERFACE, UsbDeviceInterface{0x0, 0x0, 0x2, 0x3, 0x0, 0x0, 0x0})); + config0.add_node(UsbDescriptorNode(USB_DESCRIPTOR_ENDPOINT, UsbDeviceEndpoint{0x81, 0x3, 0x40, 0x1})); + config0.add_node(UsbDescriptorNode(USB_DESCRIPTOR_ENDPOINT, UsbDeviceEndpoint{0x1, 0x3, 0x40, 0x1})); +} + +usb_device_kamen_rider::~usb_device_kamen_rider() +{ +} + +std::shared_ptr usb_device_kamen_rider::make_instance(u32, const std::array& location) +{ + return std::make_shared(location); +} + +u16 usb_device_kamen_rider::get_num_emu_devices() +{ + return 1; +} + +void usb_device_kamen_rider::control_transfer(u8 bmRequestType, u8 bRequest, u16 wValue, u16 wIndex, u16 wLength, u32 buf_size, u8* buf, UsbTransfer* transfer) +{ + usb_device_emulated::control_transfer(bmRequestType, bRequest, wValue, wIndex, wLength, buf_size, buf, transfer); +} + +void usb_device_kamen_rider::interrupt_transfer(u32 buf_size, u8* buf, u32 endpoint, UsbTransfer* transfer) +{ + ensure(buf_size == 0x40); + + transfer->fake = true; + transfer->expected_count = buf_size; + transfer->expected_result = HC_CC_NOERR; + + if (endpoint == 0x81) + { + // Respond after FF command + transfer->expected_time = get_timestamp() + 1000; + std::optional> response = g_ridergate.pop_added_removed_response(); + if (response) + { + memcpy(buf, response.value().data(), 0x40); + } + else if (!m_queries.empty()) + { + memcpy(buf, m_queries.front().data(), 0x20); + m_queries.pop(); + } + else + { + transfer->expected_count = 0; + transfer->expected_result = EHCI_CC_HALTED; + } + } + else if (endpoint == 0x01) + { + const u8 command = buf[2]; + const u8 sequence = buf[3]; + + std::array q_result{}; + + switch (command) + { + case 0xB0: // Wake + { + g_ridergate.wake_rider_gate(q_result, command, sequence); + break; + } + case 0xC0: + case 0xC3: // Color Commands + { + g_ridergate.get_blank_response(command, sequence, q_result); + break; + } + case 0xD0: // Tag List + { + // Return list of figure UIDs, separated by an 09 + g_ridergate.get_list_tags(q_result, command, sequence); + break; + } + case 0xD2: // Read + { + // Read 16 bytes from figure with UID buf[4] - buf[10] + g_ridergate.query_block(q_result, command, sequence, &buf[4], buf[11], buf[12]); + break; + } + case 0xD3: + { + // Write 16 bytes to figure with UID buf[4] - buf[10] + g_ridergate.write_block(q_result, command, sequence, &buf[4], buf[11], buf[12], &buf[13]); + break; + } + default: + kamen_rider_log.error("Unhandled Query Type: 0x%02X", command); + break; + } + m_queries.push(std::move(q_result)); + } +} diff --git a/rpcs3/Emu/Io/KamenRider.h b/rpcs3/Emu/Io/KamenRider.h new file mode 100644 index 0000000000..0e30024b06 --- /dev/null +++ b/rpcs3/Emu/Io/KamenRider.h @@ -0,0 +1,60 @@ +#pragma once + +#include "Emu/Io/usb_device.h" +#include "Utilities/mutex.h" +#include +#include +#include + +struct kamen_rider_figure +{ + fs::file kamen_file; + std::array data{}; + std::array uid{}; + bool present = false; + void save(); +}; + +class rider_gate +{ +public: + void get_blank_response(u8 command, u8 sequence, std::array& reply_buf); + void wake_rider_gate(std::array& replyBuf, u8 command, u8 sequence); + void get_list_tags(std::array& replyBuf, u8 command, u8 sequence); + void query_block(std::array& replyBuf, u8 command, u8 sequence, const u8* uid, u8 sector, u8 block); + void write_block(std::array& replyBuf, u8 command, u8 sequence, const u8* uid, u8 sector, u8 block, const u8* to_write_buf); + std::optional> pop_added_removed_response(); + + bool remove_figure(u8 position); + u8 load_figure(const std::array& buf, fs::file in_file); + +protected: + shared_mutex kamen_mutex; + std::array figures{}; + +private: + u8 generate_checksum(const std::array& data, u32 num_of_bytes) const; + kamen_rider_figure& get_figure_by_uid(const std::array uid); + + std::queue> m_figure_added_removed_responses; + + bool m_is_awake = false; +}; + +extern rider_gate g_ridergate; + +class usb_device_kamen_rider : public usb_device_emulated +{ +public: + usb_device_kamen_rider(const std::array& location); + ~usb_device_kamen_rider(); + + static std::shared_ptr make_instance(u32 controller_index, const std::array& location); + static u16 get_num_emu_devices(); + + void control_transfer(u8 bmRequestType, u8 bRequest, u16 wValue, u16 wIndex, u16 wLength, u32 buf_size, u8* buf, UsbTransfer* transfer) override; + void interrupt_transfer(u32 buf_size, u8* buf, u32 endpoint, UsbTransfer* transfer) override; + +protected: + std::queue> m_queries; +}; diff --git a/rpcs3/Emu/Io/PadHandler.cpp b/rpcs3/Emu/Io/PadHandler.cpp index bc61efc895..4fc8fa6376 100644 --- a/rpcs3/Emu/Io/PadHandler.cpp +++ b/rpcs3/Emu/Io/PadHandler.cpp @@ -170,7 +170,7 @@ u16 PadHandlerBase::ConvertAxis(f32 value) // The DS3, (and i think xbox controllers) give a 'square-ish' type response, so that the corners will give (almost)max x/y instead of the ~30x30 from a perfect circle // using a simple scale/sensitivity increase would *work* although it eats a chunk of our usable range in exchange -// this might be the best for now, in practice it seems to push the corners to max of 20x20, with a squircle_factor of 8000 +// this might be the best for now, in practice it seems to push the corners to max of 20x20, with a squircle_factor of ~4000 // This function assumes inX and inY is already in 0-255 void PadHandlerBase::ConvertToSquirclePoint(u16& inX, u16& inY, u32 squircle_factor) { diff --git a/rpcs3/Emu/Io/pad_config.h b/rpcs3/Emu/Io/pad_config.h index 5f8ea18a74..fa695c4941 100644 --- a/rpcs3/Emu/Io/pad_config.h +++ b/rpcs3/Emu/Io/pad_config.h @@ -88,8 +88,8 @@ struct cfg_pad final : cfg::node cfg::uint<0, 1000000> rstick_anti_deadzone{ this, "Right Stick Anti-Deadzone", 0 }; cfg::uint<0, 1000000> ltriggerthreshold{ this, "Left Trigger Threshold", 0 }; cfg::uint<0, 1000000> rtriggerthreshold{ this, "Right Trigger Threshold", 0 }; - cfg::uint<0, 1000000> lpadsquircling{ this, "Left Pad Squircling Factor", 8000 }; - cfg::uint<0, 1000000> rpadsquircling{ this, "Right Pad Squircling Factor", 8000 }; + cfg::uint<0, 1000000> lpadsquircling{ this, "Left Pad Squircling Factor", 4000 }; + cfg::uint<0, 1000000> rpadsquircling{ this, "Right Pad Squircling Factor", 4000 }; cfg::uint<0, 255> colorR{ this, "Color Value R", 0 }; cfg::uint<0, 255> colorG{ this, "Color Value G", 0 }; diff --git a/rpcs3/Emu/Io/pad_types.cpp b/rpcs3/Emu/Io/pad_types.cpp index 9c67cc20f5..9005bc7fe5 100644 --- a/rpcs3/Emu/Io/pad_types.cpp +++ b/rpcs3/Emu/Io/pad_types.cpp @@ -217,6 +217,12 @@ bool Pad::get_analog_limiter_button_active(bool is_toggle_mode, u32 player_id) const Button& analog_limiter_button = m_buttons[m_analog_limiter_button_index]; + if (analog_limiter_button.m_key_codes.empty()) + { + // Active by default if no button was assigned + return true; + } + if (is_toggle_mode) { const bool pressed = analog_limiter_button.m_pressed; diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 5441176d6a..b118ac8027 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -950,7 +950,7 @@ namespace vm return true; } - static u32 _page_unmap(u32 addr, u32 max_size, u64 bflags, utils::shm* shm, std::vector>& unmap_events) + static u32 _page_unmap(u32 addr, u32 max_size, u64 bflags, utils::shm* shm, std::vector>& unmap_events, bool is_block_termination = false) { perf_meter<"PAGE_UNm"_u64> perf0; @@ -1021,7 +1021,11 @@ namespace vm ppu_remove_hle_instructions(addr, size); // Actually unmap memory - if (is_noop) + if (is_block_termination && (!shm || is_noop)) + { + // We can skip it if the block is freed + } + else if (is_noop) { std::memset(g_sudo_addr + addr, 0, size); } @@ -1327,7 +1331,7 @@ namespace vm const auto size = it->second.first; std::vector> event_data; - ensure(size == _page_unmap(it->first, size, this->flags, it->second.second.get(), unmapped ? *unmapped : event_data)); + ensure(size == _page_unmap(it->first, size, this->flags, it->second.second.get(), unmapped ? *unmapped : event_data, true)); if (it->second.second && addr < 0xE0000000) { diff --git a/rpcs3/Emu/NP/np_handler.cpp b/rpcs3/Emu/NP/np_handler.cpp index b64fad1767..30eac7f470 100644 --- a/rpcs3/Emu/NP/np_handler.cpp +++ b/rpcs3/Emu/NP/np_handler.cpp @@ -646,7 +646,7 @@ namespace np for (; it != end; ++it) { - strcpy(ifr.ifr_name, it->ifr_name); + strcpy_trunc(ifr.ifr_name, it->ifr_name); if (ioctl(sock, SIOCGIFFLAGS, &ifr) == 0) { if (!(ifr.ifr_flags & IFF_LOOPBACK)) diff --git a/rpcs3/Emu/RSX/Common/buffer_stream.hpp b/rpcs3/Emu/RSX/Common/buffer_stream.hpp index ff68d6008d..5cf4da22fa 100644 --- a/rpcs3/Emu/RSX/Common/buffer_stream.hpp +++ b/rpcs3/Emu/RSX/Common/buffer_stream.hpp @@ -40,10 +40,15 @@ namespace utils /** * Stream a 128 bits vector from src to dst. */ - static inline - void stream_vector_from_memory(void* dst, void* src) + template + void stream_vector_from_memory(void* dst, void* src) { - const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(src)); - _mm_stream_si128(reinterpret_cast<__m128i*>(dst), vector); + auto _src = reinterpret_cast<__m128i*>(src); + auto _dst = reinterpret_cast<__m128i*>(dst); + for (int i = 0; i < Count; ++i, ++_src, ++_dst) + { + const __m128i vector = _mm_loadu_si128(_src); + _mm_stream_si128(_dst, vector); + } } } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 606e4716d2..dfe8043bd3 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1869,314 +1869,319 @@ namespace rsx { return{ cached_texture->get_view(remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() }; } + + return {}; } - else + + // Fast lookup for cyclic reference + if (m_rtts.address_is_bound(attr.address)) [[unlikely]] { - // Fast lookup for cyclic reference - if (m_rtts.address_is_bound(attr.address)) [[unlikely]] + if (auto texptr = m_rtts.get_surface_at(attr.address); + helpers::check_framebuffer_resource(texptr, attr, extended_dimension)) { - if (auto texptr = m_rtts.get_surface_at(attr.address); - helpers::check_framebuffer_resource(texptr, attr, extended_dimension)) + const bool force_convert = !render_target_format_is_compatible(texptr, attr.gcm_format); + + auto result = helpers::process_framebuffer_resource_fast( + cmd, texptr, attr, scale, extended_dimension, remap, true, force_convert); + + if (!options.skip_texture_barriers && result.is_cyclic_reference) { - const bool force_convert = !render_target_format_is_compatible(texptr, attr.gcm_format); - - auto result = helpers::process_framebuffer_resource_fast( - cmd, texptr, attr, scale, extended_dimension, remap, true, force_convert); - - if (!options.skip_texture_barriers && result.is_cyclic_reference) - { - // A texture barrier is only necessary when the rendertarget is going to be bound as a shader input. - // If a temporary copy is to be made, this should not be invoked - insert_texture_barrier(cmd, texptr); - } - - return result; + // A texture barrier is only necessary when the rendertarget is going to be bound as a shader input. + // If a temporary copy is to be made, this should not be invoked + insert_texture_barrier(cmd, texptr); } + + return result; + } + } + + rsx::simple_array overlapping_fbos; + rsx::simple_array overlapping_locals; + + auto fast_fbo_check = [&]() -> sampled_image_descriptor + { + const auto& last = overlapping_fbos.back(); + if (last.src_area.x == 0 && last.src_area.y == 0 && !last.is_clipped) + { + const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format); + + return helpers::process_framebuffer_resource_fast( + cmd, last.surface, attr, scale, extended_dimension, remap, false, force_convert); } - rsx::simple_array overlapping_fbos; - rsx::simple_array overlapping_locals; + return {}; + }; - auto fast_fbo_check = [&]() -> sampled_image_descriptor + // Check surface cache early if the option is enabled + if (options.prefer_surface_cache) + { + const u16 block_h = (attr.depth * attr.slice_h); + overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read); + + if (!overlapping_fbos.empty()) { - const auto& last = overlapping_fbos.back(); - if (last.src_area.x == 0 && last.src_area.y == 0 && !last.is_clipped) + if (auto result = fast_fbo_check(); result.validate()) { - const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format); - - return helpers::process_framebuffer_resource_fast( - cmd, last.surface, attr, scale, extended_dimension, remap, false, force_convert); - } - - return {}; - }; - - // Check surface cache early if the option is enabled - if (options.prefer_surface_cache) - { - const u16 block_h = (attr.depth * attr.slice_h); - overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read); - - if (!overlapping_fbos.empty()) - { - if (auto result = fast_fbo_check(); result.validate()) - { - return result; - } - - if (options.skip_texture_merge) - { - overlapping_fbos.clear(); - } - } - } - - // Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache - const u32 lookup_mask = rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src; - overlapping_locals = find_texture_from_range(memory_range, attr.height > 1 ? attr.pitch : 0, lookup_mask & options.lookup_mask); - - // Search for exact match if possible - for (auto& cached_texture : overlapping_locals) - { - if (cached_texture->matches(attr.address, attr.gcm_format, attr.width, attr.height, attr.depth, 0)) - { -#ifdef TEXTURE_CACHE_DEBUG - if (!memory_range.inside(cached_texture->get_confirmed_range())) - { - // TODO. This is easily possible for blit_dst textures if the blit is incomplete in Y - // The possibility that a texture will be split into parts on the CPU like this is very rare - continue; - } -#endif - if (attr.swizzled != cached_texture->is_swizzled()) - { - // We can have the correct data in cached_texture but it needs decoding before it can be sampled. - // Usually a sign of a game bug where the developer forgot to mark the texture correctly the first time we see it. - // TODO: This section should execute under an exclusive lock, but we're not actually modifying any object references, only flags - rsx_log.warning("A texture was found in cache for address 0x%x, but swizzle flag does not match", attr.address); - cached_texture->unprotect(); - cached_texture->set_dirty(true); - break; - } - - return{ cached_texture->get_view(remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() }; - } - } - - if (!overlapping_locals.empty()) - { - // Remove everything that is not a transfer target - overlapping_locals.erase_if([](const auto& e) - { - return e->is_dirty() || (e->get_context() != rsx::texture_upload_context::blit_engine_dst); - }); - } - - if (!options.prefer_surface_cache) - { - // Now check for surface cache hits - const u16 block_h = (attr.depth * attr.slice_h); - overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read); - } - - if (!overlapping_fbos.empty() || !overlapping_locals.empty()) - { - int _pool = -1; - if (overlapping_locals.empty()) [[likely]] - { - _pool = 0; - } - else if (overlapping_fbos.empty()) - { - _pool = 1; - } - else - { - _pool = (overlapping_locals.back()->last_write_tag < overlapping_fbos.back().surface->last_use_tag) ? 0 : 1; - } - - if (_pool == 0) - { - // Surface cache data is newer, check if this thing fits our search parameters - if (!options.prefer_surface_cache) - { - if (auto result = fast_fbo_check(); result.validate()) - { - return result; - } - } - } - else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d) - { - const auto last = overlapping_locals.back(); - const auto normalized_width = u16(last->get_width() * get_format_block_size_in_bytes(last->get_gcm_format())) / attr.bpp; - - if (last->get_section_base() == attr.address && - normalized_width >= attr.width && last->get_height() >= attr.height) - { - u32 gcm_format = attr.gcm_format; - const bool gcm_format_is_depth = helpers::is_gcm_depth_format(attr.gcm_format); - - if (!gcm_format_is_depth && last->is_depth_texture()) - { - // While the copy routines can perform a typeless cast, prefer to not cross the aspect barrier if possible - gcm_format = helpers::get_compatible_depth_format(attr.gcm_format); - } - - auto new_attr = attr; - new_attr.gcm_format = gcm_format; - - if (last->get_gcm_format() == attr.gcm_format && attr.edge_clamped) - { - // Clipped view - auto viewed_image = last->get_raw_texture(); - sampled_image_descriptor result = { viewed_image->get_view(remap), last->get_context(), - viewed_image->format_class(), scale, extended_dimension, false, viewed_image->samples() }; - - helpers::calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(normalized_width, last->get_height())); - return result; - } - - return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {}, - last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap }; - } - } - - auto result = helpers::merge_cache_resources( - cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, remap, _pool); - - const bool is_simple_subresource_copy = - (result.external_subresource_desc.op == deferred_request_command::copy_image_static) || - (result.external_subresource_desc.op == deferred_request_command::copy_image_dynamic) || - (result.external_subresource_desc.op == deferred_request_command::blit_image_static); - - if (attr.edge_clamped && - !g_cfg.video.strict_rendering_mode && - is_simple_subresource_copy && - render_target_format_is_compatible(result.external_subresource_desc.src0(), attr.gcm_format)) - { - if (result.external_subresource_desc.op != deferred_request_command::blit_image_static) [[ likely ]] - { - helpers::convert_image_copy_to_clip_descriptor( - result, - position2i(result.external_subresource_desc.x, result.external_subresource_desc.y), - size2i(result.external_subresource_desc.width, result.external_subresource_desc.height), - size2i(result.external_subresource_desc.external_handle->width(), result.external_subresource_desc.external_handle->height()), - remap, false); - } - else - { - helpers::convert_image_blit_to_clip_descriptor( - result, - remap, - false); - } - - if (!!result.ref_address && m_rtts.address_is_bound(result.ref_address)) - { - result.is_cyclic_reference = true; - - auto texptr = ensure(m_rtts.get_surface_at(result.ref_address)); - insert_texture_barrier(cmd, texptr); - } - return result; } if (options.skip_texture_merge) { - if (is_simple_subresource_copy) - { - return result; - } + overlapping_fbos.clear(); + } + } + } - return {}; + // Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache + const u32 lookup_mask = rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src; + overlapping_locals = find_texture_from_range(memory_range, attr.height > 1 ? attr.pitch : 0, lookup_mask & options.lookup_mask); + + // Search for exact match if possible + for (auto& cached_texture : overlapping_locals) + { + if (cached_texture->matches(attr.address, attr.gcm_format, attr.width, attr.height, attr.depth, 0)) + { +#ifdef TEXTURE_CACHE_DEBUG + if (!memory_range.inside(cached_texture->get_confirmed_range())) + { + // TODO. This is easily possible for blit_dst textures if the blit is incomplete in Y + // The possibility that a texture will be split into parts on the CPU like this is very rare + continue; + } +#endif + if (attr.swizzled != cached_texture->is_swizzled()) + { + // We can have the correct data in cached_texture but it needs decoding before it can be sampled. + // Usually a sign of a game bug where the developer forgot to mark the texture correctly the first time we see it. + // TODO: This section should execute under an exclusive lock, but we're not actually modifying any object references, only flags + rsx_log.warning("A texture was found in cache for address 0x%x, but swizzle flag does not match", attr.address); + cached_texture->unprotect(); + cached_texture->set_dirty(true); + break; } - if (const auto section_count = result.external_subresource_desc.sections_to_copy.size(); - section_count > 0) + return{ cached_texture->get_view(remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() }; + } + } + + if (!overlapping_locals.empty()) + { + // Remove everything that is not a transfer target + overlapping_locals.erase_if([](const auto& e) + { + return e->is_dirty() || (e->get_context() != rsx::texture_upload_context::blit_engine_dst); + }); + } + + if (!options.prefer_surface_cache) + { + // Now check for surface cache hits + const u16 block_h = (attr.depth * attr.slice_h); + overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read); + } + + if (!overlapping_fbos.empty() || !overlapping_locals.empty()) + { + int _pool = -1; + if (overlapping_locals.empty()) [[likely]] + { + _pool = 0; + } + else if (overlapping_fbos.empty()) + { + _pool = 1; + } + else + { + _pool = (overlapping_locals.back()->last_write_tag < overlapping_fbos.back().surface->last_use_tag) ? 0 : 1; + } + + if (_pool == 0) + { + // Surface cache data is newer, check if this thing fits our search parameters + if (!options.prefer_surface_cache) { - bool result_is_valid; - if (_pool == 0 && !g_cfg.video.write_color_buffers && !g_cfg.video.write_depth_buffer) + if (auto result = fast_fbo_check(); result.validate()) { - // HACK: Avoid WCB requirement for some games with wrongly declared sampler dimensions. - // TODO: Some games may render a small region (e.g 1024x256x2) and sample a huge texture (e.g 1024x1024). - // Seen in APF2k8 - this causes missing bits to be reuploaded from CPU which can cause WCB requirement. - // Properly fix this by introducing partial data upload into the surface cache in such cases and making RCB/RDB - // enabled by default. Blit engine already handles this correctly. - result_is_valid = true; - } - else - { - result_is_valid = result.atlas_covers_target_area(section_count == 1 ? 99 : 90); - } - - if (result_is_valid) - { - // Check for possible duplicates - usz max_overdraw_ratio = u32{ umax }; - usz max_safe_sections = u32{ umax }; - - switch (result.external_subresource_desc.op) - { - case deferred_request_command::atlas_gather: - max_overdraw_ratio = 150; - max_safe_sections = 8 + 2 * attr.mipmaps; - break; - case deferred_request_command::cubemap_gather: - max_overdraw_ratio = 150; - max_safe_sections = 6 * 2 * attr.mipmaps; - break; - case deferred_request_command::_3d_gather: - // 3D gather can have very many input sections, try to keep section count low - max_overdraw_ratio = 125; - max_safe_sections = (attr.depth * attr.mipmaps * 110) / 100; - break; - default: - break; - } - - if (overlapping_fbos.size() > max_safe_sections) - { - // Are we really over-budget? - u32 coverage_size = 0; - for (const auto& section : overlapping_fbos) - { - const auto area = section.surface->get_native_pitch() * section.surface->template get_surface_height(); - coverage_size += area; - } - - if (const auto coverage_ratio = (coverage_size * 100ull) / memory_range.length(); - coverage_ratio > max_overdraw_ratio) - { - rsx_log.warning("[Performance warning] Texture gather routine encountered too many objects! Operation=%d, Mipmaps=%d, Depth=%d, Sections=%zu, Ratio=%llu%", - static_cast(result.external_subresource_desc.op), attr.mipmaps, attr.depth, overlapping_fbos.size(), coverage_ratio); - m_rtts.check_for_duplicates(overlapping_fbos); - } - } - - // Optionally disallow caching if resource is being written to as it is being read from - for (const auto& section : overlapping_fbos) - { - if (m_rtts.address_is_bound(section.base_address)) - { - if (result.external_subresource_desc.op == deferred_request_command::copy_image_static) - { - result.external_subresource_desc.op = deferred_request_command::copy_image_dynamic; - } - else - { - result.external_subresource_desc.do_not_cache = true; - } - - break; - } - } - return result; } } } + else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d) + { + const auto last = overlapping_locals.back(); + const auto normalized_width = u16(last->get_width() * get_format_block_size_in_bytes(last->get_gcm_format())) / attr.bpp; + + if (last->get_section_base() == attr.address && + normalized_width >= attr.width && last->get_height() >= attr.height) + { + u32 gcm_format = attr.gcm_format; + const bool gcm_format_is_depth = helpers::is_gcm_depth_format(attr.gcm_format); + + if (!gcm_format_is_depth && last->is_depth_texture()) + { + // While the copy routines can perform a typeless cast, prefer to not cross the aspect barrier if possible + gcm_format = helpers::get_compatible_depth_format(attr.gcm_format); + } + + auto new_attr = attr; + new_attr.gcm_format = gcm_format; + + if (last->get_gcm_format() == attr.gcm_format && attr.edge_clamped) + { + // Clipped view + auto viewed_image = last->get_raw_texture(); + sampled_image_descriptor result = { viewed_image->get_view(remap), last->get_context(), + viewed_image->format_class(), scale, extended_dimension, false, viewed_image->samples() }; + + helpers::calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(normalized_width, last->get_height())); + return result; + } + + return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {}, + last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap }; + } + } + + auto result = helpers::merge_cache_resources( + cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, remap, _pool); + + const bool is_simple_subresource_copy = + (result.external_subresource_desc.op == deferred_request_command::copy_image_static) || + (result.external_subresource_desc.op == deferred_request_command::copy_image_dynamic) || + (result.external_subresource_desc.op == deferred_request_command::blit_image_static); + + if (attr.edge_clamped && + !g_cfg.video.strict_rendering_mode && + is_simple_subresource_copy && + render_target_format_is_compatible(result.external_subresource_desc.src0(), attr.gcm_format)) + { + if (result.external_subresource_desc.op != deferred_request_command::blit_image_static) [[ likely ]] + { + helpers::convert_image_copy_to_clip_descriptor( + result, + position2i(result.external_subresource_desc.x, result.external_subresource_desc.y), + size2i(result.external_subresource_desc.width, result.external_subresource_desc.height), + size2i(result.external_subresource_desc.external_handle->width(), result.external_subresource_desc.external_handle->height()), + remap, false); + } + else + { + helpers::convert_image_blit_to_clip_descriptor( + result, + remap, + false); + } + + if (!!result.ref_address && m_rtts.address_is_bound(result.ref_address)) + { + result.is_cyclic_reference = true; + + auto texptr = ensure(m_rtts.get_surface_at(result.ref_address)); + insert_texture_barrier(cmd, texptr); + } + + return result; + } + + if (options.skip_texture_merge) + { + if (is_simple_subresource_copy) + { + return result; + } + + return {}; + } + + const auto section_count = result.external_subresource_desc.sections_to_copy.size(); + if (section_count == 0) + { + // Fail + return {}; + } + + bool result_is_valid; + if (_pool == 0 && !g_cfg.video.write_color_buffers && !g_cfg.video.write_depth_buffer) + { + // HACK: Avoid WCB requirement for some games with wrongly declared sampler dimensions. + // TODO: Some games may render a small region (e.g 1024x256x2) and sample a huge texture (e.g 1024x1024). + // Seen in APF2k8 - this causes missing bits to be reuploaded from CPU which can cause WCB requirement. + // Properly fix this by introducing partial data upload into the surface cache in such cases and making RCB/RDB + // enabled by default. Blit engine already handles this correctly. + result_is_valid = true; + } + else + { + result_is_valid = result.atlas_covers_target_area(section_count == 1 ? 99 : 90); + } + + if (!result_is_valid) + { + return {}; + } + + // Check for possible duplicates + usz max_overdraw_ratio = u32{ umax }; + usz max_safe_sections = u32{ umax }; + + switch (result.external_subresource_desc.op) + { + case deferred_request_command::atlas_gather: + max_overdraw_ratio = 150; + max_safe_sections = 8 + 2 * attr.mipmaps; + break; + case deferred_request_command::cubemap_gather: + max_overdraw_ratio = 150; + max_safe_sections = 6 * 2 * attr.mipmaps; + break; + case deferred_request_command::_3d_gather: + // 3D gather can have very many input sections, try to keep section count low + max_overdraw_ratio = 125; + max_safe_sections = (attr.depth * attr.mipmaps * 110) / 100; + break; + default: + break; + } + + if (overlapping_fbos.size() > max_safe_sections) + { + // Are we really over-budget? + u32 coverage_size = 0; + for (const auto& section : overlapping_fbos) + { + const auto area = section.surface->get_native_pitch() * section.surface->template get_surface_height(); + coverage_size += area; + } + + if (const auto coverage_ratio = (coverage_size * 100ull) / memory_range.length(); + coverage_ratio > max_overdraw_ratio) + { + rsx_log.warning("[Performance warning] Texture gather routine encountered too many objects! Operation=%d, Mipmaps=%d, Depth=%d, Sections=%zu, Ratio=%llu%", + static_cast(result.external_subresource_desc.op), attr.mipmaps, attr.depth, overlapping_fbos.size(), coverage_ratio); + m_rtts.check_for_duplicates(overlapping_fbos); + } + } + + // Optionally disallow caching if resource is being written to as it is being read from + for (const auto& section : overlapping_fbos) + { + if (m_rtts.address_is_bound(section.base_address)) + { + if (result.external_subresource_desc.op == deferred_request_command::copy_image_static) + { + result.external_subresource_desc.op = deferred_request_command::copy_image_dynamic; + } + else + { + result.external_subresource_desc.do_not_cache = true; + } + + break; + } + } + + return result; } return {}; diff --git a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h index 52f6ff5591..1560f40ad4 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h @@ -262,32 +262,67 @@ namespace rsx u64 tag; // Timestamp u32 list; // List source, 0 = fbo, 1 = local u32 index; // Index in list + + utils::address_range32 bounds; }; - std::vector sort_list; + const u32 available_slices = fbos.size() + local.size(); + bool unordered_list = false; + + rsx::simple_array sort_list; + rsx::simple_array sort_ranges; + sort_list.reserve(available_slices); + sort_ranges.reserve(available_slices); + + // Generate sorting tree if both resources are available and overlapping + for (u32 index = 0; index < fbos.size(); ++index) + { + const auto range = fbos[index].surface->get_memory_range(); + sort_ranges.push_back(range); + sort_list.push_back({ + .tag = fbos[index].surface->last_use_tag, + .list = 0, + .index = index, + .bounds = range + }); + } + + for (u32 index = 0; index < local.size(); ++index) + { + if (local[index]->get_context() != rsx::texture_upload_context::blit_engine_dst) + continue; + + const auto range = local[index]->get_section_range(); + sort_ranges.push_back(range); + sort_list.push_back({ + .tag = local[index]->last_write_tag, + .list = 1, + .index = index, + .bounds = range + }); + } if (!fbos.empty() && !local.empty()) { - // Generate sorting tree if both resources are available and overlapping - sort_list.reserve(fbos.size() + local.size()); - - for (u32 index = 0; index < fbos.size(); ++index) - { - sort_list.push_back({ fbos[index].surface->last_use_tag, 0, index }); - } - - for (u32 index = 0; index < local.size(); ++index) - { - if (local[index]->get_context() != rsx::texture_upload_context::blit_engine_dst) - continue; - - sort_list.push_back({ local[index]->last_write_tag, 1, index }); - } - - std::sort(sort_list.begin(), sort_list.end(), FN(x.tag < y.tag)); + sort_list.sort(FN(x.tag < y.tag)); } - auto add_rtt_resource = [&](auto& section, u16 slice) + // Check if ordered + for (u32 i = 0; i < sort_list.size(); ++i) + { + if (i == 0) + { + continue; + } + + if (sort_ranges[i].start < sort_ranges[i - 1].end) + { + unordered_list = true; + break; + } + } + + auto add_rtt_resource = [&](auto& section, u16 slice) -> std::pair // [ input fully consumed, output fully covered ] { const u32 slice_begin = (slice * attr.slice_h); const u32 slice_end = (slice_begin + attr.height); @@ -296,7 +331,7 @@ namespace rsx if (section.dst_area.y >= slice_end || section_end <= slice_begin) { // Belongs to a different slice - return; + return { section_end <= slice_begin, false }; } // How much of this slice to read? @@ -346,9 +381,11 @@ namespace rsx .dst_w = dst_width, .dst_h = dst_height }); + + return { section_end <= slice_end, section_end >= slice_end }; }; - auto add_local_resource = [&](auto& section, u32 address, u16 slice, bool scaling = true) + auto add_local_resource = [&](auto& section, u32 address, u16 slice, bool scaling) -> std::pair // [ input fully consumed, output fully covered ] { // Intersect this resource with the original one. // Note that intersection takes place in a normalized coordinate space (bpp = 1) @@ -364,7 +401,7 @@ namespace rsx if (!dimensions.width || !dimensions.height) { // Out of bounds, invalid intersection - return; + return { false, false }; } // The intersection takes place in a normalized coordinate space. Now we convert back to domain-specific @@ -383,7 +420,7 @@ namespace rsx if (dst_y >= dst_slice_end || write_section_end <= dst_slice_begin) { // Belongs to a different slice - return; + return { write_section_end <= dst_slice_begin, false }; } const u16 dst_w = static_cast(dst_size.width); @@ -411,25 +448,27 @@ namespace rsx .dst_w = _dst_w, .dst_h = _dst_h }); + + return { write_section_end <= dst_slice_end, write_section_end >= dst_slice_end }; } - else - { - out.push_back - ({ - .src = section->get_raw_texture(), - .xform = surface_transform::identity, - .level = 0, - .src_x = static_cast(src_offset.x), // src.x - .src_y = static_cast(src_offset.y), // src.y - .dst_x = static_cast(dst_offset.x), // dst.x - .dst_y = static_cast(dst_y - dst_slice_begin), // dst.y - .dst_z = 0, - .src_w = src_w, - .src_h = height, - .dst_w = dst_w, - .dst_h = height - }); - } + + out.push_back + ({ + .src = section->get_raw_texture(), + .xform = surface_transform::identity, + .level = 0, + .src_x = static_cast(src_offset.x), // src.x + .src_y = static_cast(src_offset.y), // src.y + .dst_x = static_cast(dst_offset.x), // dst.x + .dst_y = static_cast(dst_y - dst_slice_begin), // dst.y + .dst_z = 0, + .src_w = src_w, + .src_h = height, + .dst_w = dst_w, + .dst_h = height + }); + + return { write_section_end <= dst_slice_end, write_section_end >= dst_slice_end }; }; u32 current_address = attr.address; @@ -442,34 +481,36 @@ namespace rsx for (u16 slice = 0; slice < count; ++slice) { - auto num_surface = out.size(); + const auto num_surface = out.size(); + const auto slice_range = utils::address_range32::start_length(current_address, slice_size); - if (local.empty()) [[likely]] + for (auto& e : sort_list) { - for (auto& section : fbos) + if (e.index == umax || !slice_range.overlaps(e.bounds)) { - add_rtt_resource(section, slice); + continue; } - } - else if (fbos.empty()) - { - for (auto& section : local) + + bool remove = false, slice_complete = false; + if (e.list == 0) { - add_local_resource(section, current_address, slice, false); + std::tie(remove, slice_complete) = add_rtt_resource(fbos[e.index], slice); } - } - else - { - for (const auto& e : sort_list) + else { - if (e.list == 0) - { - add_rtt_resource(fbos[e.index], slice); - } - else - { - add_local_resource(local[e.index], current_address, slice); - } + std::tie(remove, slice_complete) = add_local_resource(local[e.index], current_address, slice, !fbos.empty()); + } + + if (remove) + { + // If we got here, the section has been fully ingested by the current slice and will never match again. + e.index = umax; + } + + if (slice_complete && !unordered_list) + { + // Reached the end of the current slice and we are guaranteed to not match any other section since they lie after this one in memory. + break; } } diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp index 46edb42175..449900cef1 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -664,7 +664,21 @@ namespace rsx void draw_command_processor::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/) const { +#pragma pack(push, 1) + struct fragment_context_t + { + f32 fog_param0; + f32 fog_param1; + u32 rop_control; + f32 alpha_ref; + u32 fog_mode; + f32 wpos_scale; + f32 wpos_bias[2]; + }; +#pragma pack(pop) + ROP_control_t rop_control{}; + alignas(16) fragment_context_t payload{}; if (REGS(m_ctx)->alpha_test_enabled()) { @@ -720,10 +734,6 @@ namespace rsx } } - const f32 fog0 = REGS(m_ctx)->fog_params_0(); - const f32 fog1 = REGS(m_ctx)->fog_params_1(); - const u32 fog_mode = static_cast(REGS(m_ctx)->fog_equation()); - // Check if framebuffer is actually an XRGB format and not a WZYX format switch (REGS(m_ctx)->surface_color()) { @@ -745,21 +755,37 @@ namespace rsx } // Generate wpos coefficients - // wpos equation is now as follows: + // wpos equation is now as follows (ignoring pixel center offset): // wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0) // wpos.x = (frag_coord / resolution_scale) // wpos.zw = frag_coord.zw + payload.fog_param0 = REGS(m_ctx)->fog_params_0(); + payload.fog_param1 = REGS(m_ctx)->fog_params_1(); + payload.fog_mode = static_cast(REGS(m_ctx)->fog_equation()); + payload.rop_control = rop_control.value; + payload.alpha_ref = REGS(m_ctx)->alpha_ref(); + + const auto window_origin = REGS(m_ctx)->shader_window_origin(); const u32 window_height = REGS(m_ctx)->shader_window_height(); + const auto pixel_center = REGS(m_ctx)->pixel_center(); const f32 resolution_scale = (window_height <= static_cast(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale(); - const f32 wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale); - const f32 wpos_bias = (window_origin == rsx::window_origin::top) ? 0.f : window_height; - const f32 alpha_ref = REGS(m_ctx)->alpha_ref(); - u32* dst = static_cast(buffer); - utils::stream_vector(dst, std::bit_cast(fog0), std::bit_cast(fog1), rop_control.value, std::bit_cast(alpha_ref)); - utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast(wpos_scale), std::bit_cast(wpos_bias)); + payload.wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale); + payload.wpos_bias[0] = 0.f; + payload.wpos_bias[1] = (window_origin == rsx::window_origin::top) ? 0.f : window_height; + + if (pixel_center == window_pixel_center::integer) + { + // We could technically fix this shader side, but... + // 1. We have full control over gl_FragCoord consumption, so fix it using our own pipeline as it is an emulated input. + // 2. Vulkan does not support pixel_center_integer decoration. SPIR-V modules only permit pixel center at half offset. + payload.wpos_bias[0] -= 0.5f; + payload.wpos_bias[1] -= 0.5f; + } + + utils::stream_vector_from_memory<2>(buffer, &payload); } void draw_command_processor::fill_constants_instancing_buffer(rsx::io_buffer& indirection_table_buf, rsx::io_buffer& constants_data_array_buffer, const VertexProgramBase* prog) const diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index 213275b9c4..51bf257aef 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -7,36 +7,14 @@ namespace gl { - GLenum comparison_op(rsx::comparison_function op) + inline GLenum comparison_op(rsx::comparison_function op) { - switch (op) - { - case rsx::comparison_function::never: return GL_NEVER; - case rsx::comparison_function::less: return GL_LESS; - case rsx::comparison_function::equal: return GL_EQUAL; - case rsx::comparison_function::less_or_equal: return GL_LEQUAL; - case rsx::comparison_function::greater: return GL_GREATER; - case rsx::comparison_function::not_equal: return GL_NOTEQUAL; - case rsx::comparison_function::greater_or_equal: return GL_GEQUAL; - case rsx::comparison_function::always: return GL_ALWAYS; - } - fmt::throw_exception("Unsupported comparison op 0x%X", static_cast(op)); + return static_cast(op); } - GLenum stencil_op(rsx::stencil_op op) + inline GLenum stencil_op(rsx::stencil_op op) { - switch (op) - { - case rsx::stencil_op::invert: return GL_INVERT; - case rsx::stencil_op::keep: return GL_KEEP; - case rsx::stencil_op::zero: return GL_ZERO; - case rsx::stencil_op::replace: return GL_REPLACE; - case rsx::stencil_op::incr: return GL_INCR; - case rsx::stencil_op::decr: return GL_DECR; - case rsx::stencil_op::incr_wrap: return GL_INCR_WRAP; - case rsx::stencil_op::decr_wrap: return GL_DECR_WRAP; - } - fmt::throw_exception("Unsupported stencil op 0x%X", static_cast(op)); + return static_cast(op); } GLenum blend_equation(rsx::blend_equation op) @@ -62,76 +40,33 @@ namespace gl } } - GLenum blend_factor(rsx::blend_factor op) + inline GLenum blend_factor(rsx::blend_factor op) { - switch (op) - { - case rsx::blend_factor::zero: return GL_ZERO; - case rsx::blend_factor::one: return GL_ONE; - case rsx::blend_factor::src_color: return GL_SRC_COLOR; - case rsx::blend_factor::one_minus_src_color: return GL_ONE_MINUS_SRC_COLOR; - case rsx::blend_factor::dst_color: return GL_DST_COLOR; - case rsx::blend_factor::one_minus_dst_color: return GL_ONE_MINUS_DST_COLOR; - case rsx::blend_factor::src_alpha: return GL_SRC_ALPHA; - case rsx::blend_factor::one_minus_src_alpha: return GL_ONE_MINUS_SRC_ALPHA; - case rsx::blend_factor::dst_alpha: return GL_DST_ALPHA; - case rsx::blend_factor::one_minus_dst_alpha: return GL_ONE_MINUS_DST_ALPHA; - case rsx::blend_factor::src_alpha_saturate: return GL_SRC_ALPHA_SATURATE; - case rsx::blend_factor::constant_color: return GL_CONSTANT_COLOR; - case rsx::blend_factor::one_minus_constant_color: return GL_ONE_MINUS_CONSTANT_COLOR; - case rsx::blend_factor::constant_alpha: return GL_CONSTANT_ALPHA; - case rsx::blend_factor::one_minus_constant_alpha: return GL_ONE_MINUS_CONSTANT_ALPHA; - } - fmt::throw_exception("Unsupported blend factor 0x%X", static_cast(op)); + return static_cast(op); } - GLenum logic_op(rsx::logic_op op) + inline GLenum logic_op(rsx::logic_op op) { - switch (op) - { - case rsx::logic_op::logic_clear: return GL_CLEAR; - case rsx::logic_op::logic_and: return GL_AND; - case rsx::logic_op::logic_and_reverse: return GL_AND_REVERSE; - case rsx::logic_op::logic_copy: return GL_COPY; - case rsx::logic_op::logic_and_inverted: return GL_AND_INVERTED; - case rsx::logic_op::logic_noop: return GL_NOOP; - case rsx::logic_op::logic_xor: return GL_XOR; - case rsx::logic_op::logic_or: return GL_OR; - case rsx::logic_op::logic_nor: return GL_NOR; - case rsx::logic_op::logic_equiv: return GL_EQUIV; - case rsx::logic_op::logic_invert: return GL_INVERT; - case rsx::logic_op::logic_or_reverse: return GL_OR_REVERSE; - case rsx::logic_op::logic_copy_inverted: return GL_COPY_INVERTED; - case rsx::logic_op::logic_or_inverted: return GL_OR_INVERTED; - case rsx::logic_op::logic_nand: return GL_NAND; - case rsx::logic_op::logic_set: return GL_SET; - } - fmt::throw_exception("Unsupported logic op 0x%X", static_cast(op)); + return static_cast(op); } - GLenum front_face(rsx::front_face op) + inline GLenum front_face(rsx::front_face op) { - //NOTE: RSX face winding is always based off of upper-left corner like vulkan, but GL is bottom left - //shader_window_origin register does not affect this - //verified with Outrun Online Arcade (window_origin::top) and DS2 (window_origin::bottom) - //correctness of face winding checked using stencil test (GOW collection shadows) - switch (op) - { - case rsx::front_face::cw: return GL_CCW; - case rsx::front_face::ccw: return GL_CW; - } - fmt::throw_exception("Unsupported front face 0x%X", static_cast(op)); + // NOTE: RSX face winding is always based off of upper-left corner like vulkan, but GL is bottom left + // shader_window_origin register does not affect this + // verified with Outrun Online Arcade (window_origin::top) and DS2 (window_origin::bottom) + // correctness of face winding checked using stencil test (GOW collection shadows) + return static_cast(op) ^ 1u; } - GLenum cull_face(rsx::cull_face op) + inline GLenum cull_face(rsx::cull_face op) { - switch (op) - { - case rsx::cull_face::front: return GL_FRONT; - case rsx::cull_face::back: return GL_BACK; - case rsx::cull_face::front_and_back: return GL_FRONT_AND_BACK; - } - fmt::throw_exception("Unsupported cull face 0x%X", static_cast(op)); + return static_cast(op); + } + + inline GLenum polygon_mode(rsx::polygon_mode mode) + { + return static_cast(mode); } } @@ -335,6 +270,10 @@ void GLGSRender::update_draw_state() // Clip planes gl_state.clip_planes((current_vertex_program.output_mask >> CELL_GCM_ATTRIB_OUTPUT_UC0) & 0x3F); + // Polygon mode. We can only have one polygon mode active at one time, so we need to determine if any face is currently culled. + const bool show_back = REGS(m_ctx)->cull_face_enabled() && REGS(m_ctx)->cull_face_mode() == rsx::cull_face::front; + gl_state.polygon_mode(gl::polygon_mode(show_back ? REGS(m_ctx)->polygon_mode_back() : REGS(m_ctx)->polygon_mode_front())); + //TODO //NV4097_SET_ANISO_SPREAD //NV4097_SET_SPECULAR_ENABLE @@ -342,9 +281,6 @@ void GLGSRender::update_draw_state() //NV4097_SET_FLAT_SHADE_OP //NV4097_SET_EDGE_FLAG //NV4097_SET_COLOR_KEY_COLOR - //NV4097_SET_SHADER_CONTROL - //NV4097_SET_ZMIN_MAX_CONTROL - //NV4097_SET_ANTI_ALIASING_CONTROL //NV4097_SET_CLIP_ID_TEST_ENABLE // For OGL Z range is updated every draw as it is separate from viewport config diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index e59d27a2aa..4cbd92eecd 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -175,10 +175,9 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) " float fog_param1;\n" " uint rop_control;\n" " float alpha_ref;\n" - " uint reserved;\n" " uint fog_mode;\n" " float wpos_scale;\n" - " float wpos_bias;\n" + " vec2 wpos_bias;\n" "};\n\n" "layout(std140, binding = " << GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT << ") uniform TextureParametersBuffer\n" diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 3441fd438e..acf258cc56 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1411,6 +1411,12 @@ void GLGSRender::on_guest_texture_read() enqueue_host_context_write(::offset32(&rsx::host_gpu_context_t::texture_load_complete_event), 8, &event_id); } +void GLGSRender::write_barrier(u32 address, u32 range) +{ + ensure(is_current_thread()); + m_rtts.invalidate_range(utils::address_range32::start_length(address, range)); +} + void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query) { query->result = 0; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 465fa51584..5627216055 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -192,8 +192,12 @@ public: gl::work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data); + // NV3089 bool scaled_image_from_memory(const rsx::blit_src_info& src_info, const rsx::blit_dst_info& dst_info, bool interpolate) override; + // Sync + void write_barrier(u32 address, u32 range) override; + // ZCULL void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; void end_occlusion_query(rsx::reports::occlusion_query_info* query) override; diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.cpp b/rpcs3/Emu/RSX/GL/GLOverlays.cpp index 571adc9b97..7d36e5598b 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.cpp +++ b/rpcs3/Emu/RSX/GL/GLOverlays.cpp @@ -176,6 +176,8 @@ namespace gl cmd->disablei(GL_BLEND, 0); } + cmd->polygon_mode(GL_FILL); + // Render cmd->use_program(program_handle.id()); on_load(); diff --git a/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp b/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp index ffc4fe38f1..9835a5891f 100644 --- a/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp +++ b/rpcs3/Emu/RSX/GL/glutils/state_tracker.hpp @@ -17,6 +17,7 @@ namespace gl const u32 STENCIL_FRONT_OP = 0xFFFF0007; const u32 STENCIL_BACK_OP = 0xFFFF0008; const u32 STENCIL_BACK_MASK = 0xFFFF0009; + const u32 POLYGON_MODE = 0xFFFF000A; std::unordered_map properties = {}; std::unordered_map> indexed_properties = {}; @@ -353,6 +354,15 @@ namespace gl } } + void polygon_mode(GLenum mode) + { + if (!test_and_set_property(POLYGON_MODE, mode)) + { + // Note: GL4+ does not support separate polygon mode per-face-type + glPolygonMode(GL_FRONT_AND_BACK, mode); + } + } + void use_program(GLuint program) { if (current_program == program) diff --git a/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp b/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp index a41fae11dc..fcd30c45f8 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp @@ -5,12 +5,44 @@ #include "Emu/RSX/Core/RSXReservationLock.hpp" #include "Emu/RSX/Host/MM.h" +#include "Utilities/deferred_op.hpp" + #include "context_accessors.define.h" namespace rsx { namespace nv0039 { + // Transfer with stride + inline void block2d_copy_with_stride(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch, u8 src_stride, u8 dst_stride) + { + for (u32 row = 0; row < height; ++row) + { + auto dst_ptr = dst; + auto src_ptr = src; + while (src_ptr < src + width) + { + *dst_ptr = *src_ptr; + + src_ptr += src_stride; + dst_ptr += dst_stride; + } + + dst += dst_pitch; + src += src_pitch; + } + } + + inline void block2d_copy(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch) + { + for (u32 i = 0; i < height; ++i) + { + std::memcpy(dst, src, width); + dst += dst_pitch; + src += src_pitch; + } + } + void buffer_notify(context* ctx, u32, u32 arg) { s32 in_pitch = REGS(ctx)->nv0039_input_pitch(); @@ -56,6 +88,13 @@ namespace rsx } } + // Deferred write_barrier on RSX side + utils::deferred_op deferred([&]() + { + RSX(ctx)->write_barrier(write_address, write_length); + // res->release(0); + }); + auto res = ::rsx::reservation_lock(write_address, write_length, read_address, read_length); u8* dst = vm::_ptr(write_address); @@ -81,68 +120,34 @@ namespace rsx // The formats are just input channel strides. You can use this to do cool tricks like gathering channels // Very rare, only seen in use by Destiny // TODO: Hw accel - for (u32 row = 0; row < line_count; ++row) - { - auto dst_ptr = dst; - auto src_ptr = src; - while (src_ptr < src + line_length) - { - *dst_ptr = *src_ptr; - - src_ptr += in_format; - dst_ptr += out_format; - } - - dst += out_pitch; - src += in_pitch; - } + block2d_copy_with_stride(dst, src, line_length, line_count, in_pitch, out_pitch, in_format, out_format); + return; } - else if (is_overlapping) [[ unlikely ]] - { - if (is_block_transfer) - { - std::memmove(dst, src, read_length); - } - else - { - std::vector temp(line_length * line_count); - u8* buf = temp.data(); - for (u32 y = 0; y < line_count; ++y) - { - std::memcpy(buf, src, line_length); - buf += line_length; - src += in_pitch; - } - - buf = temp.data(); - - for (u32 y = 0; y < line_count; ++y) - { - std::memcpy(dst, buf, line_length); - buf += line_length; - dst += out_pitch; - } - } - } - else + if (!is_overlapping) { if (is_block_transfer) { std::memcpy(dst, src, read_length); + return; } - else - { - for (u32 i = 0; i < line_count; ++i) - { - std::memcpy(dst, src, line_length); - dst += out_pitch; - src += in_pitch; - } - } + + block2d_copy(dst, src, line_length, line_count, in_pitch, out_pitch); + return; } - //res->release(0); + if (is_block_transfer) + { + std::memmove(dst, src, read_length); + return; + } + + // Handle overlapping 2D range using double-copy to temp. + std::vector temp(line_length * line_count); + u8* buf = temp.data(); + + block2d_copy(buf, src, line_length, line_count, in_pitch, line_length); + block2d_copy(dst, buf, line_length, line_count, line_length, out_pitch); } } } diff --git a/rpcs3/Emu/RSX/NV47/HW/nv3089.cpp b/rpcs3/Emu/RSX/NV47/HW/nv3089.cpp index cbc2e54296..666b4ef53a 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv3089.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv3089.cpp @@ -333,68 +333,68 @@ namespace rsx return src_range.overlaps(dst_range); }(); - if (is_overlapping) [[ unlikely ]] + if (is_overlapping) [[ unlikely ]] + { + if (need_clip) { - if (need_clip) - { - temp2.resize(dst.pitch * dst.clip_height); - clip_image_may_overlap(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch, temp2.data()); - return; - } - - if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) - { - const u32 buffer_pitch = dst.bpp * out_w; - temp2.resize(buffer_pitch * out_h); - std::add_pointer_t buf = temp2.data(), pixels = src.pixels; - - // Read the whole buffer from source - for (u32 y = 0; y < out_h; ++y) - { - std::memcpy(buf, pixels, buffer_pitch); - pixels += src.pitch; - buf += buffer_pitch; - } - - buf = temp2.data(), pixels = dst.pixels; - - // Write to destination - for (u32 y = 0; y < out_h; ++y) - { - std::memcpy(pixels, buf, buffer_pitch); - pixels += dst.pitch; - buf += buffer_pitch; - } - - return; - } - - std::memmove(dst.pixels, src.pixels, dst.pitch * out_h); + temp2.resize(dst.pitch * dst.clip_height); + clip_image_may_overlap(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch, temp2.data()); return; } - if (need_clip) [[ unlikely ]] + if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) { - clip_image(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch); - return; - } - - if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) [[ unlikely ]] - { - u8* dst_pixels = dst.pixels, * src_pixels = src.pixels; + const u32 buffer_pitch = dst.bpp * out_w; + temp2.resize(buffer_pitch * out_h); + std::add_pointer_t buf = temp2.data(), pixels = src.pixels; + // Read the whole buffer from source for (u32 y = 0; y < out_h; ++y) { - std::memcpy(dst_pixels, src_pixels, out_w * dst.bpp); - dst_pixels += dst.pitch; - src_pixels += src.pitch; + std::memcpy(buf, pixels, buffer_pitch); + pixels += src.pitch; + buf += buffer_pitch; + } + + buf = temp2.data(), pixels = dst.pixels; + + // Write to destination + for (u32 y = 0; y < out_h; ++y) + { + std::memcpy(pixels, buf, buffer_pitch); + pixels += dst.pitch; + buf += buffer_pitch; } return; } - std::memcpy(dst.pixels, src.pixels, dst.pitch * out_h); + std::memmove(dst.pixels, src.pixels, dst.pitch * out_h); return; + } + + if (need_clip) [[ unlikely ]] + { + clip_image(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch); + return; + } + + if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) [[ unlikely ]] + { + u8* dst_pixels = dst.pixels, * src_pixels = src.pixels; + + for (u32 y = 0; y < out_h; ++y) + { + std::memcpy(dst_pixels, src_pixels, out_w * dst.bpp); + dst_pixels += dst.pitch; + src_pixels += src.pitch; + } + + return; + } + + std::memcpy(dst.pixels, src.pixels, dst.pitch * out_h); + return; } if (need_clip) [[ unlikely ]] diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp index c97373d8a0..de70c889c9 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp @@ -35,18 +35,31 @@ namespace rsx return page_navigation::exit; }); - if (!suspend_mode && boot_current_game_savestate(true, 1)) { - std::unique_ptr reload_state = std::make_unique( - get_localized_string(localized_string_id::HOME_MENU_RELOAD_SAVESTATE)); - - add_item(reload_state, [](pad_button btn) -> page_navigation + for (u32 save_index = 1; !suspend_mode && save_index <= 4; save_index++) + { + if (boot_current_game_savestate(true, save_index)) { - if (btn != pad_button::cross) return page_navigation::stay; - rsx_log.notice("User selected reload savestate in home menu"); - Emu.CallFromMainThread([]() { boot_current_game_savestate(true, 1); }); - return page_navigation::exit; - }); + const localized_string_id str_id = static_cast(static_cast(localized_string_id::HOME_MENU_RELOAD_SAVESTATE) + (save_index - 1)); + std::unique_ptr reload_state = std::make_unique(get_localized_string(str_id)); + + add_item(reload_state, [save_index](pad_button btn) -> page_navigation + { + if (btn != pad_button::cross) + { + return page_navigation::stay; + } + + rsx_log.notice("User selected reload savestate(%u) in home menu", save_index); + Emu.CallFromMainThread([save_index]() { boot_current_game_savestate(false, save_index); }); + return page_navigation::exit; + }); + } + else + { + break; + } } + apply_layout(); } } diff --git a/rpcs3/Emu/RSX/Program/GLSLInterpreter/FragmentInterpreter.glsl b/rpcs3/Emu/RSX/Program/GLSLInterpreter/FragmentInterpreter.glsl index 34a03343b3..928fc00b6b 100644 --- a/rpcs3/Emu/RSX/Program/GLSLInterpreter/FragmentInterpreter.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLInterpreter/FragmentInterpreter.glsl @@ -529,7 +529,7 @@ void initialize() // WPOS vr0 = vec4(abs(wpos_scale), wpos_scale, 1., 1.); - vr1 = vec4(0., wpos_bias, 0., 0.); + vr1 = vec4(wpos_bias, 0., 0.); wpos = gl_FragCoord * vr0 + vr1; // Other diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl index 62ec4a06c0..490b57850b 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXDefines2.glsl @@ -43,10 +43,9 @@ struct fragment_context_t float fog_param1; uint rop_control; float alpha_ref; - uint reserved; uint fog_mode; float wpos_scale; - float wpos_bias; + vec2 wpos_bias; }; )" diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl index de133b3a22..e40373f64c 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl @@ -38,7 +38,7 @@ bool _fragment_discard = false; vec4 get_wpos() { float abs_scale = abs(wpos_scale); - return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.); + return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(wpos_bias, 0., 0.); } #endif diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 73aacf360a..9e09c17d78 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -374,6 +374,7 @@ namespace rsx // sync void sync(); flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional); + virtual void write_barrier(u32 /*memory_address*/, u32 /*memory_range*/) {} virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload); virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8a3b63eed1..3bc240b768 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1613,6 +1613,12 @@ void VKGSRender::on_guest_texture_read(const vk::command_buffer& cmd) vkCmdUpdateBuffer(cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::texture_load_complete_event), sizeof(u64), &event_id); } +void VKGSRender::write_barrier(u32 address, u32 range) +{ + ensure(is_current_thread()); + m_rtts.invalidate_range(utils::address_range32::start_length(address, range)); +} + void VKGSRender::sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) { rsx::thread::sync_hint(hint, payload); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 6f52c9c1fc..b0edaa48f6 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -249,6 +249,8 @@ public: void set_scissor(bool clip_viewport); void bind_viewport(); + // Sync + void write_barrier(u32 address, u32 range) override; void sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) override; bool release_GCM_label(u32 address, u32 data) override; diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 96f45ae7a2..02d0f0f407 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -501,8 +501,11 @@ namespace vk if (auto ptr = std::get_if(&slot)) { + // We need to convert the VkDescriptorImageInfoEx entries back to the native vulkan variants since we're going to be flushing an array with no stride check + auto vk_data = ptr->map(FN(static_cast(x))); + writer.descriptorCount = ptr->size(); - m_descriptor_set.push(ptr->data(), ptr->size(), type, idx); + m_descriptor_set.push(vk_data.data(), vk_data.size(), type, idx); return; } @@ -552,8 +555,9 @@ namespace vk if (auto ptr = std::get_if(&slot)) { + auto vk_data = ptr->map(FN(static_cast(x))); // This can be optimized to update only changed ids but this is an interpreter-only feature for now ensure(m_descriptor_template[idx].descriptorCount == ptr->size()); - m_descriptor_template[idx].pImageInfo = m_descriptor_set.store(*ptr); + m_descriptor_template[idx].pImageInfo = m_descriptor_set.store(vk_data); return; } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp index b6743f1291..454038c962 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp @@ -403,6 +403,8 @@ namespace vk const auto dst_aspect = dst->aspect(); const auto dst_bpp = vk::get_format_texel_width(dst->format()); + std::unordered_set processed_input_images; + for (const auto& section : sections_to_transfer) { if (!section.src) @@ -436,11 +438,15 @@ namespace vk const bool typeless = section.src->aspect() != dst_aspect || !formats_are_bitcast_compatible(dst, section.src); - // Avoid inserting unnecessary barrier GENERAL->TRANSFER_SRC->GENERAL in active render targets - const auto preferred_layout = (section.src->current_layout != VK_IMAGE_LAYOUT_GENERAL) ? - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; + if (!processed_input_images.contains(section.src)) + { + // Avoid inserting unnecessary barrier GENERAL->TRANSFER_SRC->GENERAL in active render targets + const auto preferred_layout = (section.src->current_layout != VK_IMAGE_LAYOUT_GENERAL) ? + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; - section.src->push_layout(cmd, preferred_layout); + section.src->push_layout(cmd, preferred_layout); + processed_input_images.insert(section.src); + } auto src_image = section.src; auto src_x = section.src_x; @@ -479,8 +485,6 @@ namespace vk const areai src_rect = coordi{{ src_x, src_y }, { src_w, src_h }}; const areai dst_rect = coordi{{ section.dst_x, section.dst_y }, { section.dst_w, section.dst_h }}; vk::copy_image_typeless(cmd, section.src, dst, src_rect, dst_rect, 1); - - section.src->pop_layout(cmd); continue; } @@ -541,8 +545,12 @@ namespace vk vkCmdCopyImage(cmd, _dst->value, _dst->current_layout, dst->value, dst->current_layout, 1, ©_rgn); } } + } - section.src->pop_layout(cmd); + // Pop unique image layouts here + for (auto& image : processed_input_images) + { + image->pop_layout(cmd); } } diff --git a/rpcs3/Emu/RSX/rsx_decode.h b/rpcs3/Emu/RSX/rsx_decode.h index 10b58d59c4..bdb99f199f 100644 --- a/rpcs3/Emu/RSX/rsx_decode.h +++ b/rpcs3/Emu/RSX/rsx_decode.h @@ -3132,7 +3132,7 @@ struct registers_decoder return to_window_origin(window_shader_origin_raw()); } - auto window_shader_pixel_center() const + auto pixel_center() const { return to_window_pixel_center(window_shader_pixel_center_raw()); } @@ -3146,7 +3146,7 @@ struct registers_decoder static void dump(std::string& out, const decoded_type& decoded) { fmt::append(out, "Viewport: height: %u origin: %s pixel center: %s", decoded.window_shader_height() - , decoded.window_shader_origin(), decoded.window_shader_pixel_center()); + , decoded.window_shader_origin(), decoded.pixel_center()); } }; diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index 60be7ec14e..8e1dd698fe 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -226,9 +226,9 @@ namespace rsx return decode().window_shader_origin(); } - window_pixel_center shader_window_pixel() const + window_pixel_center pixel_center() const { - return decode().window_shader_pixel_center(); + return decode().pixel_center(); } u16 shader_window_height() const @@ -1321,6 +1321,16 @@ namespace rsx { return decode().enabled(); } + + polygon_mode polygon_mode_front() const + { + return decode().front_polygon_mode(); + } + + polygon_mode polygon_mode_back() const + { + return decode().back_polygon_mode(); + } }; extern rsx_state method_registers; diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 26b5bfff03..b4e0699fbc 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -2083,30 +2083,6 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, } } - // Check game updates - if (const std::string hdd0_boot = hdd0_game + m_title_id + "/USRDIR/EBOOT.BIN"; !m_ar - && recursion_count == 0 && disc.empty() && !bdvd_dir.empty() && !m_title_id.empty() - && resolved_path == GetCallbacks().resolve_path(vfs::get("/dev_bdvd/PS3_GAME/USRDIR/EBOOT.BIN")) - && resolved_path != GetCallbacks().resolve_path(hdd0_boot) && fs::is_file(hdd0_boot)) - { - if (const psf::registry update_sfo = psf::load(hdd0_game + m_title_id + "/PARAM.SFO").sfo; - psf::get_string(update_sfo, "TITLE_ID") == m_title_id && psf::get_string(update_sfo, "CATEGORY") == "GD") - { - // Booting game update - sys_log.success("Updates found at /dev_hdd0/game/%s/", m_title_id); - m_path = hdd0_boot; - - const game_boot_result boot_result = Load(m_title_id, true, recursion_count + 1); - if (boot_result == game_boot_result::no_errors) - { - return game_boot_result::no_errors; - } - - sys_log.error("Failed to boot update at \"%s\", game update may be corrupted! Consider uninstalling or reinstalling it. (reason: %s)", m_path, boot_result); - return boot_result; - } - } - // Check firmware version if (const std::string_view game_fw_version = psf::get_string(_psf, "PS3_SYSTEM_VER", ""); !game_fw_version.empty()) { @@ -2144,12 +2120,6 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, std::replace(m_title.begin(), m_title.end(), '\n', ' '); std::replace(m_localized_title.begin(), m_localized_title.end(), '\n', ' '); - // Mount /host_root/ if necessary (special value) - if (g_cfg.vfs.host_root) - { - vfs::mount("/host_root", "/"); - } - // Open SELF or ELF std::string elf_path = m_path; @@ -2214,9 +2184,40 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, return game_boot_result::decryption_error; } + // Check EBOOT.BIN (before updates - disc games) + ppu_exec_object ppu_exec; + ppu_exec.open(elf_file); + + // Check game updates + if (const std::string hdd0_boot = hdd0_game + m_title_id + "/USRDIR/EBOOT.BIN"; !m_ar + && recursion_count == 0 && disc.empty() && !bdvd_dir.empty() && !m_title_id.empty() + && resolved_path == GetCallbacks().resolve_path(vfs::get("/dev_bdvd/PS3_GAME/USRDIR/EBOOT.BIN")) + && resolved_path != GetCallbacks().resolve_path(hdd0_boot) && fs::is_file(hdd0_boot) + && ppu_exec == elf_error::ok) + { + if (const psf::registry update_sfo = psf::load(hdd0_game + m_title_id + "/PARAM.SFO").sfo; + psf::get_string(update_sfo, "TITLE_ID") == m_title_id && psf::get_string(update_sfo, "CATEGORY") == "GD") + { + ppu_exec = {}; + elf_file.close(); + + // Booting game update + sys_log.success("Updates found at /dev_hdd0/game/%s/", m_title_id); + m_path = hdd0_boot; + + const game_boot_result boot_result = Load(m_title_id, true, recursion_count + 1); + if (boot_result == game_boot_result::no_errors) + { + return game_boot_result::no_errors; + } + + sys_log.error("Failed to boot update at \"%s\", game update may be corrupted! Consider uninstalling or reinstalling it. (reason: %s)", m_path, boot_result); + return boot_result; + } + } + m_state = system_state::ready; - ppu_exec_object ppu_exec; ppu_prx_object ppu_prx; ppu_rel_object ppu_rel; spu_exec_object spu_exec; @@ -2224,19 +2225,25 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, vm::init(); - if (m_ar) + if (ppu_exec == elf_error::ok) { - vm::load(*m_ar); - } + if (m_ar) + { + vm::load(*m_ar); + } - if (!hdd1.empty()) - { - vfs::mount("/dev_hdd1", hdd1); - sys_log.notice("Hdd1: %s", vfs::get("/dev_hdd1")); - } + // Mount /host_root/ if necessary (special value) + if (g_cfg.vfs.host_root) + { + vfs::mount("/host_root", "/"); + } + + if (!hdd1.empty()) + { + vfs::mount("/dev_hdd1", hdd1); + sys_log.notice("Hdd1: %s", vfs::get("/dev_hdd1")); + } - if (ppu_exec.open(elf_file) == elf_error::ok) - { // PS3 executable GetCallbacks().on_ready(); @@ -2384,6 +2391,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, else if (ppu_prx.open(elf_file) == elf_error::ok) { // PPU PRX + m_ar.reset(); GetCallbacks().on_ready(); g_fxo->init(false); ppu_load_prx(ppu_prx, false, m_path); @@ -2392,6 +2400,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, else if (spu_exec.open(elf_file) == elf_error::ok) { // SPU executable + m_ar.reset(); GetCallbacks().on_ready(); g_fxo->init(false); spu_load_exec(spu_exec); @@ -2400,6 +2409,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, else if (spu_rel.open(elf_file) == elf_error::ok) { // SPU linker file + m_ar.reset(); GetCallbacks().on_ready(); g_fxo->init(false); spu_load_rel_exec(spu_rel); @@ -2408,6 +2418,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, else if (ppu_rel.open(elf_file) == elf_error::ok) { // PPU linker file + m_ar.reset(); GetCallbacks().on_ready(); g_fxo->init(false); ppu_load_rel_exec(ppu_rel); diff --git a/rpcs3/Emu/localized_string_id.h b/rpcs3/Emu/localized_string_id.h index 5ab9803591..399b45c437 100644 --- a/rpcs3/Emu/localized_string_id.h +++ b/rpcs3/Emu/localized_string_id.h @@ -274,6 +274,9 @@ enum class localized_string_id HOME_MENU_SAVESTATE_SAVE, HOME_MENU_SAVESTATE_AND_EXIT, HOME_MENU_RELOAD_SAVESTATE, + HOME_MENU_RELOAD_SECOND_SAVESTATE, + HOME_MENU_RELOAD_THIRD_SAVESTATE, + HOME_MENU_RELOAD_FOURTH_SAVESTATE, HOME_MENU_RECORDING, HOME_MENU_TROPHIES, HOME_MENU_TROPHY_LIST_TITLE, diff --git a/rpcs3/Input/ds4_pad_handler.cpp b/rpcs3/Input/ds4_pad_handler.cpp index 0eebc1183a..77c59fc48f 100644 --- a/rpcs3/Input/ds4_pad_handler.cpp +++ b/rpcs3/Input/ds4_pad_handler.cpp @@ -189,8 +189,6 @@ void ds4_pad_handler::init_config(cfg_pad* cfg) cfg->rstickdeadzone.def = 40; // between 0 and 255 cfg->ltriggerthreshold.def = 0; // between 0 and 255 cfg->rtriggerthreshold.def = 0; // between 0 and 255 - cfg->lpadsquircling.def = 8000; - cfg->rpadsquircling.def = 8000; // Set default color value cfg->colorR.def = 0; diff --git a/rpcs3/Input/dualsense_pad_handler.cpp b/rpcs3/Input/dualsense_pad_handler.cpp index 70ddecdb79..5585c0ef12 100644 --- a/rpcs3/Input/dualsense_pad_handler.cpp +++ b/rpcs3/Input/dualsense_pad_handler.cpp @@ -262,8 +262,6 @@ void dualsense_pad_handler::init_config(cfg_pad* cfg) cfg->rstickdeadzone.def = 40; // between 0 and 255 cfg->ltriggerthreshold.def = 0; // between 0 and 255 cfg->rtriggerthreshold.def = 0; // between 0 and 255 - cfg->lpadsquircling.def = 8000; - cfg->rpadsquircling.def = 8000; // Set default color value cfg->colorR.def = 0; diff --git a/rpcs3/Input/evdev_joystick_handler.cpp b/rpcs3/Input/evdev_joystick_handler.cpp index a9011af5cc..6ddb0ff989 100644 --- a/rpcs3/Input/evdev_joystick_handler.cpp +++ b/rpcs3/Input/evdev_joystick_handler.cpp @@ -120,8 +120,6 @@ void evdev_joystick_handler::init_config(cfg_pad* cfg) cfg->rstickdeadzone.def = 30; // between 0 and 255 cfg->ltriggerthreshold.def = 0; // between 0 and 255 cfg->rtriggerthreshold.def = 0; // between 0 and 255 - cfg->lpadsquircling.def = 8000; - cfg->rpadsquircling.def = 8000; // apply defaults cfg->from_default(); diff --git a/rpcs3/Input/gui_pad_thread.cpp b/rpcs3/Input/gui_pad_thread.cpp index e3401c94a9..666bf329ac 100644 --- a/rpcs3/Input/gui_pad_thread.cpp +++ b/rpcs3/Input/gui_pad_thread.cpp @@ -169,7 +169,7 @@ bool gui_pad_thread::init() usetup.id.bustype = BUS_USB; usetup.id.vendor = 0x1234; usetup.id.product = 0x1234; - std::strcpy(usetup.name, "RPCS3 GUI Input Device"); + strcpy_trunc(usetup.name, "RPCS3 GUI Input Device"sv); // The ioctls below will enable the device that is about to be created to pass events. CHECK_IOCTRL_RET(ioctl(m_uinput_fd, UI_SET_EVBIT, EV_KEY)); diff --git a/rpcs3/Input/keyboard_pad_handler.cpp b/rpcs3/Input/keyboard_pad_handler.cpp index d470d4a4e5..c9da8dcb29 100644 --- a/rpcs3/Input/keyboard_pad_handler.cpp +++ b/rpcs3/Input/keyboard_pad_handler.cpp @@ -68,8 +68,6 @@ void keyboard_pad_handler::init_config(cfg_pad* cfg) cfg->rstickdeadzone.def = 0; cfg->ltriggerthreshold.def = 0; cfg->rtriggerthreshold.def = 0; - cfg->lpadsquircling.def = 8000; - cfg->rpadsquircling.def = 8000; // apply defaults cfg->from_default(); diff --git a/rpcs3/Input/mm_joystick_handler.cpp b/rpcs3/Input/mm_joystick_handler.cpp index 299788e254..cbe9b60223 100644 --- a/rpcs3/Input/mm_joystick_handler.cpp +++ b/rpcs3/Input/mm_joystick_handler.cpp @@ -62,8 +62,6 @@ void mm_joystick_handler::init_config(cfg_pad* cfg) cfg->rstickdeadzone.def = 0; // between 0 and 255 cfg->ltriggerthreshold.def = 0; // between 0 and 255 cfg->rtriggerthreshold.def = 0; // between 0 and 255 - cfg->lpadsquircling.def = 8000; - cfg->rpadsquircling.def = 8000; // apply defaults cfg->from_default(); diff --git a/rpcs3/Input/sdl_pad_handler.cpp b/rpcs3/Input/sdl_pad_handler.cpp index 76af1df859..1b6ddbc40c 100644 --- a/rpcs3/Input/sdl_pad_handler.cpp +++ b/rpcs3/Input/sdl_pad_handler.cpp @@ -158,8 +158,6 @@ void sdl_pad_handler::init_config(cfg_pad* cfg) cfg->rstickdeadzone.def = 8000; // between 0 and SDL_JOYSTICK_AXIS_MAX cfg->ltriggerthreshold.def = 0; // between 0 and SDL_JOYSTICK_AXIS_MAX cfg->rtriggerthreshold.def = 0; // between 0 and SDL_JOYSTICK_AXIS_MAX - cfg->lpadsquircling.def = 8000; - cfg->rpadsquircling.def = 8000; // Set default color value cfg->colorR.def = 0; diff --git a/rpcs3/Input/xinput_pad_handler.cpp b/rpcs3/Input/xinput_pad_handler.cpp index 101e9dba67..4feec000e3 100644 --- a/rpcs3/Input/xinput_pad_handler.cpp +++ b/rpcs3/Input/xinput_pad_handler.cpp @@ -128,8 +128,6 @@ void xinput_pad_handler::init_config(cfg_pad* cfg) cfg->rstickdeadzone.def = XINPUT_GAMEPAD_RIGHT_THUMB_DEADZONE; // between 0 and 32767 cfg->ltriggerthreshold.def = XINPUT_GAMEPAD_TRIGGER_THRESHOLD; // between 0 and 255 cfg->rtriggerthreshold.def = XINPUT_GAMEPAD_TRIGGER_THRESHOLD; // between 0 and 255 - cfg->lpadsquircling.def = 8000; - cfg->rpadsquircling.def = 8000; // apply defaults cfg->from_default(); diff --git a/rpcs3/Loader/PSF.cpp b/rpcs3/Loader/PSF.cpp index ee88424608..28c08d7b8c 100644 --- a/rpcs3/Loader/PSF.cpp +++ b/rpcs3/Loader/PSF.cpp @@ -253,7 +253,10 @@ namespace psf if (indices[i].param_fmt == format::string) { // Find null terminator - value.resize(std::strlen(value.c_str())); + if (usz nts = value.find_first_of('\0'); nts != umax) + { + value.resize(nts); + } } result.sfo.emplace(std::piecewise_construct, diff --git a/rpcs3/Loader/TAR.cpp b/rpcs3/Loader/TAR.cpp index bfd937b3d3..d454eca76b 100644 --- a/rpcs3/Loader/TAR.cpp +++ b/rpcs3/Loader/TAR.cpp @@ -2,6 +2,7 @@ #include "Emu/VFS.h" #include "Emu/System.h" +#include "Emu/Cell/timers.hpp" #include "Crypto/unself.h" @@ -13,6 +14,7 @@ #include #include +#include LOG_CHANNEL(tar_log, "TAR"); @@ -200,9 +202,8 @@ std::unique_ptr tar_object::get_file(const std::string& path, std bool tar_object::extract(const std::string& prefix_path, bool is_vfs) { - std::vector filedata_buffer(0x80'0000); - std::span filedata_span{filedata_buffer.data(), filedata_buffer.size()}; - + std::vector> filedata_buffers; + auto iter = m_map.begin(); auto get_next = [&](bool is_first) @@ -294,6 +295,13 @@ bool tar_object::extract(const std::string& prefix_path, bool is_vfs) fs::file file; + const u64 current_time = get_system_time(); + + const usz filesize = file_data->get_size() - file_data->pos; + + constexpr usz chunk_size = 0x8 * 0x100000; + constexpr usz chunk_count = 16; + if (should_ignore) { file = fs::make_stream>(); @@ -301,34 +309,89 @@ bool tar_object::extract(const std::string& prefix_path, bool is_vfs) else { file.open(result, fs::rewrite); + + filedata_buffers.clear(); + + for (usz i = 0; i < std::min(utils::aligned_div(filesize, chunk_size), chunk_count); i++) + { + if (filedata_buffers.size() <= i) + { + filedata_buffers.resize(i + 1); + } + + filedata_buffers[i].resize(std::min(filesize - i * chunk_size, chunk_size)); + } } if (file && file_data) { - while (true) + std::unique_ptr>> async_reader; + + atomic_t filedata_read_pos = 0, filedata_write_pos = 0; + + while (!should_ignore && filesize) { - const usz unread_size = file_data->try_read(filedata_span); - - if (unread_size == 0) + auto get_span_at = [&](usz pos) { - file.write(filedata_span.data(), should_ignore ? 0 : filedata_span.size()); - continue; + auto& span = filedata_buffers[pos % filedata_buffers.size()]; + return std::span(span.data(), std::min(filesize - pos * chunk_size, chunk_size)); + }; + + // Feed itself if smaller than one chunk + if (filedata_buffers.size() == 1) + { + file_data->try_read(get_span_at(filedata_read_pos)); + filedata_read_pos++; + } + else if (!async_reader) + { + async_reader = std::make_unique>>("TAR Extract File Thread", [&]() + { + while (true) + { + while (filedata_read_pos - filedata_write_pos == filedata_buffers.size()) + { + thread_ctrl::wait_for(1000); + } + + const usz unread_size = file_data->try_read(get_span_at(filedata_read_pos)); + + if (unread_size) + { + ensure(unread_size == filedata_buffers[filedata_read_pos.load() % filedata_buffers.size()].size()); + break; + } + + filedata_read_pos++; + } + }); } - // Tail data - - if (usz read_size = filedata_span.size() - unread_size) + while (filedata_read_pos == filedata_write_pos) { - ensure(file_data->try_read(filedata_span.first(read_size)) == 0); - file.write(filedata_span.data(), should_ignore ? 0 : read_size); + std::this_thread::yield(); } - break; + const auto data_span = get_span_at(filedata_write_pos); + + file.write(data_span.data(), data_span.size()); + filedata_write_pos++; + + if (filedata_write_pos == utils::aligned_div(filesize, filedata_buffers[0].size())) + { + if (async_reader) + { + // Join thread + (*async_reader)(); + async_reader.reset(); + } + + break; + } } file.close(); - file_data->seek_pos(m_ar_tar_start + largest_offset, true); if (!m_file) @@ -349,7 +412,7 @@ bool tar_object::extract(const std::string& prefix_path, bool is_vfs) return false; } - tar_log.notice("TAR Loader: written file %s", name); + (m_ar && filesize > 1024 ? tar_log.success : tar_log.notice)("TAR Loader: written file %s (took: %f seconds)", name, (get_system_time() - current_time) / 1'000'000.); break; } diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 2cc3d31e39..c5f6673723 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -450,6 +450,7 @@ + @@ -545,6 +546,7 @@ + @@ -809,6 +811,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index c91fbb2a50..9c09dd791a 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -879,6 +879,9 @@ Emu\Io + + Emu\Io + Emu\Cell\lv2 @@ -2079,6 +2082,9 @@ Emu\Io + + Emu\Io + Crypto @@ -2749,6 +2755,9 @@ Emu\GPU\RSX\Common + + Utilities + diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index bca9d45f6a..fe26cc1968 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -328,6 +328,9 @@ true + + true + true @@ -616,6 +619,9 @@ true + + true + true @@ -819,6 +825,7 @@ + @@ -1665,6 +1672,16 @@ .\QTGeneratedFiles\$(ConfigurationName)\moc_%(Filename).cpp "$(QTDIR)\bin\moc.exe" "%(FullPath)" -o ".\QTGeneratedFiles\$(ConfigurationName)\moc_%(Filename).cpp" -D_WINDOWS -DUNICODE -DWIN32 -DWIN64 -DWITH_DISCORD_RPC -DQT_NO_DEBUG -DQT_WIDGETS_LIB -DQT_GUI_LIB -DQT_CORE_LIB -DNDEBUG -DQT_CONCURRENT_LIB -D%(PreprocessorDefinitions) "-I.\..\3rdparty\wolfssl\wolfssl" "-I.\..\3rdparty\curl\curl\include" "-I.\..\3rdparty\libusb\libusb\libusb" "-I$(VULKAN_SDK)\Include" "-I$(QTDIR)\include" "-I$(QTDIR)\include\QtWidgets" "-I$(QTDIR)\include\QtGui" "-I$(QTDIR)\include\QtCore" "-I.\release" "-I.\QTGeneratedFiles\$(ConfigurationName)" "-I.\QTGeneratedFiles" "-I$(QTDIR)\include\QtConcurrent" + + $(QTDIR)\bin\moc.exe;%(FullPath) + Moc%27ing %(Identity)... + .\QTGeneratedFiles\$(ConfigurationName)\moc_%(Filename).cpp + "$(QTDIR)\bin\moc.exe" "%(FullPath)" -o ".\QTGeneratedFiles\$(ConfigurationName)\moc_%(Filename).cpp" -D_WINDOWS -DUNICODE -DWIN32 -DWIN64 -DQT_WIDGETS_LIB -DQT_GUI_LIB -DQT_CORE_LIB -DQT_CONCURRENT_LIB -D%(PreprocessorDefinitions) "-I.\..\3rdparty\wolfssl\wolfssl" "-I.\..\3rdparty\curl\curl\include" "-I.\..\3rdparty\libusb\libusb\libusb" "-I$(VULKAN_SDK)\Include" "-I$(QTDIR)\include" "-I$(QTDIR)\include\QtWidgets" "-I$(QTDIR)\include\QtGui" "-I$(QTDIR)\include\QtCore" "-I.\debug" "-I.\QTGeneratedFiles\$(ConfigurationName)" "-I.\QTGeneratedFiles" "-I$(QTDIR)\include\QtConcurrent" + $(QTDIR)\bin\moc.exe;%(FullPath) + Moc%27ing %(Identity)... + .\QTGeneratedFiles\$(ConfigurationName)\moc_%(Filename).cpp + "$(QTDIR)\bin\moc.exe" "%(FullPath)" -o ".\QTGeneratedFiles\$(ConfigurationName)\moc_%(Filename).cpp" -D_WINDOWS -DUNICODE -DWIN32 -DWIN64 -DWITH_DISCORD_RPC -DQT_NO_DEBUG -DQT_WIDGETS_LIB -DQT_GUI_LIB -DQT_CORE_LIB -DNDEBUG -DQT_CONCURRENT_LIB -D%(PreprocessorDefinitions) "-I.\..\3rdparty\wolfssl\wolfssl" "-I.\..\3rdparty\curl\curl\include" "-I.\..\3rdparty\libusb\libusb\libusb" "-I$(VULKAN_SDK)\Include" "-I$(QTDIR)\include" "-I$(QTDIR)\include\QtWidgets" "-I$(QTDIR)\include\QtGui" "-I$(QTDIR)\include\QtCore" "-I.\release" "-I.\QTGeneratedFiles\$(ConfigurationName)" "-I.\QTGeneratedFiles" "-I$(QTDIR)\include\QtConcurrent" + Moc%27ing %(Identity)... .\QTGeneratedFiles\$(ConfigurationName)\moc_%(Filename).cpp diff --git a/rpcs3/rpcs3.vcxproj.filters b/rpcs3/rpcs3.vcxproj.filters index 2ffaca09b5..7c98b7c735 100644 --- a/rpcs3/rpcs3.vcxproj.filters +++ b/rpcs3/rpcs3.vcxproj.filters @@ -107,6 +107,9 @@ {f5fcca0d-918b-46ba-bb91-2f2f9d9ddbba} + + {8b4d2dff-2b4e-4794-9859-4379ef0e75c0} + {c25f8f80-cc74-4760-8488-a291b3026b1d} @@ -639,6 +642,9 @@ Gui\infinity + + Gui\infinity + Gui\skylanders @@ -753,6 +759,12 @@ Generated Files\Release + + Generated Files\Debug + + + Generated Files\Release + Generated Files\Debug @@ -1621,6 +1633,9 @@ Gui\infinity + + Gui\kamen_rider + Gui\skylanders diff --git a/rpcs3/rpcs3qt/CMakeLists.txt b/rpcs3/rpcs3qt/CMakeLists.txt index 7f7605cb74..f194b7550b 100644 --- a/rpcs3/rpcs3qt/CMakeLists.txt +++ b/rpcs3/rpcs3qt/CMakeLists.txt @@ -45,6 +45,7 @@ add_library(rpcs3_ui STATIC input_dialog.cpp instruction_editor_dialog.cpp ipc_settings_dialog.cpp + kamen_rider_dialog.cpp kernel_explorer.cpp localized.cpp localized_emu.cpp diff --git a/rpcs3/rpcs3qt/kamen_rider_dialog.cpp b/rpcs3/rpcs3qt/kamen_rider_dialog.cpp new file mode 100644 index 0000000000..71a8bd3f88 --- /dev/null +++ b/rpcs3/rpcs3qt/kamen_rider_dialog.cpp @@ -0,0 +1,422 @@ +#include "stdafx.h" +#include "Utilities/File.h" +#include "kamen_rider_dialog.h" +#include "Emu/Io/KamenRider.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +kamen_rider_dialog* kamen_rider_dialog::inst = nullptr; +std::array>, UI_FIG_NUM> kamen_rider_dialog::figure_slots = {}; +QString last_kamen_rider_path; + +static const std::map, const std::string> list_kamen_riders = { + {{0x10, 0x10}, "Kamen Rider Drive Wind"}, + {{0x10, 0x20}, "Kamen Rider Drive Water"}, + {{0x10, 0x30}, "Kamen Rider Drive Fire"}, + {{0x10, 0x40}, "Kamen Rider Drive Light"}, + {{0x10, 0x50}, "Kamen Rider Drive Dark"}, + {{0x11, 0x10}, "Kamen Rider Gaim Wind"}, + {{0x11, 0x20}, "Kamen Rider Gaim Water"}, + {{0x12, 0x20}, "Kamen Rider Wizard Water"}, + {{0x12, 0x30}, "Kamen Rider Wizard Fire"}, + {{0x13, 0x40}, "Kamen Rider Fourze Light"}, + {{0x14, 0x20}, "Kamen Rider 000 Water"}, + {{0x15, 0x10}, "Kamen Rider Double Wind"}, + {{0x16, 0x50}, "Kamen Rider Decade Dark"}, + {{0x17, 0x50}, "Kamen Rider Kiva Dark"}, + {{0x18, 0x40}, "Kamen Rider Den-O Light"}, + {{0x19, 0x30}, "Kamen Rider Kabuto Fire"}, + {{0x1A, 0x30}, "Kamen Rider Hibiki Fire"}, + {{0x1B, 0x50}, "Kamen Rider Blade Dark"}, + {{0x1C, 0x50}, "Kamen Rider Faiz Dark"}, + {{0x1D, 0x10}, "Kamen Rider Ryuki Wind"}, + {{0x1E, 0x20}, "Kamen Rider Agito Water"}, + {{0x1F, 0x40}, "Kamen Rider Kuuga Light"}, + {{0x20, 0x00}, "Type Wild"}, + {{0x21, 0x00}, "Kamen Rider Zangetsu"}, + {{0x22, 0x00}, "All Dragon"}, + {{0x31, 0x00}, "Kachidoki Arms"}, +}; + +static u32 kamen_rider_crc32(const std::array& buffer) +{ + static constexpr std::array CRC32_TABLE{ + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, + 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, + 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, + 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, + 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, + 0xb6662d3d, 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, + 0x086d3d2d, 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, + 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 0x4db26158, 0x3ab551ce, + 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, + 0xce61e49f, 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, + 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, 0x8708a3d2, 0x1e01f268, + 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, + 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, + 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, + 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, + 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, + 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 0x88085ae6, + 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, + 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, + 0x47b2cf7f, 0x30b5ffe9, 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d}; + + // Kamen Rider figures calculate their CRC32 based on 12 bytes in the block of 16 + u32 ret = 0; + for (u32 i = 0; i < 12; ++i) + { + const u8 index = u8(ret & 0xFF) ^ buffer[i]; + ret = ((ret >> 8) ^ CRC32_TABLE[index]); + } + + return ret; +} + +kamen_rider_creator_dialog::kamen_rider_creator_dialog(QWidget* parent) + : QDialog(parent) +{ + setWindowTitle(tr("Kamen Rider Creator")); + setObjectName("kamen_rider_creator"); + setMinimumSize(QSize(500, 150)); + + QVBoxLayout* vbox_panel = new QVBoxLayout(); + + QComboBox* combo_figlist = new QComboBox(); + QStringList filterlist; + for (const auto& [entry, figure_name] : list_kamen_riders) + { + const uint qvar = (entry.first << 8) | entry.second; + QString name = QString::fromStdString(figure_name); + combo_figlist->addItem(name, QVariant(qvar)); + filterlist << std::move(name); + } + combo_figlist->addItem(tr("--Unknown--"), QVariant(0xFFFF)); + combo_figlist->setEditable(true); + combo_figlist->setInsertPolicy(QComboBox::NoInsert); + combo_figlist->model()->sort(0, Qt::AscendingOrder); + + QCompleter* co_compl = new QCompleter(filterlist, this); + co_compl->setCaseSensitivity(Qt::CaseInsensitive); + co_compl->setCompletionMode(QCompleter::PopupCompletion); + co_compl->setFilterMode(Qt::MatchContains); + combo_figlist->setCompleter(co_compl); + + vbox_panel->addWidget(combo_figlist); + + QFrame* line = new QFrame(); + line->setFrameShape(QFrame::HLine); + line->setFrameShadow(QFrame::Sunken); + vbox_panel->addWidget(line); + + QHBoxLayout* hbox_idvar = new QHBoxLayout(); + QLabel* label_id = new QLabel(tr("ID:")); + QLabel* label_type = new QLabel(tr("Type:")); + QLineEdit* edit_id = new QLineEdit("0"); + QLineEdit* edit_type = new QLineEdit("0"); + QRegularExpressionValidator* rxv = new QRegularExpressionValidator(QRegularExpression("\\d*"), this); + edit_id->setValidator(rxv); + edit_type->setValidator(rxv); + hbox_idvar->addWidget(label_id); + hbox_idvar->addWidget(edit_id); + hbox_idvar->addWidget(label_type); + hbox_idvar->addWidget(edit_type); + vbox_panel->addLayout(hbox_idvar); + + QHBoxLayout* hbox_buttons = new QHBoxLayout(); + QPushButton* btn_create = new QPushButton(tr("Create"), this); + QPushButton* btn_cancel = new QPushButton(tr("Cancel"), this); + hbox_buttons->addStretch(); + hbox_buttons->addWidget(btn_create); + hbox_buttons->addWidget(btn_cancel); + vbox_panel->addLayout(hbox_buttons); + + setLayout(vbox_panel); + + connect(combo_figlist, QOverload::of(&QComboBox::currentIndexChanged), [=](int index) + { + const u16 fig_info = combo_figlist->itemData(index).toUInt(); + if (fig_info != 0xFFFF) + { + const u8 fig_id = fig_info >> 8; + const u8 fig_type = fig_info & 0xFF; + + edit_id->setText(QString::number(fig_id)); + edit_type->setText(QString::number(fig_type)); + } + }); + + connect(btn_create, &QAbstractButton::clicked, this, [=, this]() + { + bool ok_id = false, ok_var = false; + const u8 fig_id = edit_id->text().toUShort(&ok_id); + if (!ok_id) + { + QMessageBox::warning(this, tr("Error converting value"), tr("ID entered is invalid!"), QMessageBox::Ok); + return; + } + const u8 fig_type = edit_type->text().toUShort(&ok_var); + if (!ok_var) + { + QMessageBox::warning(this, tr("Error converting value"), tr("Variant entered is invalid!"), QMessageBox::Ok); + return; + } + + QString predef_name = last_kamen_rider_path; + const auto found_fig = list_kamen_riders.find(std::make_pair(fig_id, fig_type)); + if (found_fig != list_kamen_riders.cend()) + { + predef_name += QString::fromStdString(found_fig->second + ".bin"); + } + else + { + predef_name += QString("Unknown(%1 %2).bin").arg(fig_id).arg(fig_type); + } + + file_path = QFileDialog::getSaveFileName(this, tr("Create Kamen Rider File"), predef_name, tr("Kamen Rider Object (*.bin);;All Files (*)")); + if (file_path.isEmpty()) + { + return; + } + + fs::file fig_file(file_path.toStdString(), fs::read + fs::write + fs::create); + if (!fig_file) + { + QMessageBox::warning(this, tr("Failed to create kamen rider file!"), tr("Failed to create kamen rider file:\n%1").arg(file_path), QMessageBox::Ok); + return; + } + + std::array buf{}; + + buf[0] = 0x04; + buf[6] = 0x80; + + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_int_distribution dist(0, 255); + + buf[1] = dist(mt); + buf[2] = dist(mt); + buf[3] = dist(mt); + buf[4] = dist(mt); + buf[5] = dist(mt); + + buf[7] = 0x89; + buf[8] = 0x44; + buf[10] = 0xc2; + std::array figure_data = {u8(dist(mt)), 0x03, 0x00, 0x00, 0x01, 0x0e, 0x0a, 0x0a, 0x10, fig_type, 0x01, fig_id}; + write_to_ptr>(figure_data.data(), 0xC, kamen_rider_crc32(figure_data)); + memcpy(&buf[16], figure_data.data(), figure_data.size()); + fig_file.write(buf.data(), buf.size()); + fig_file.close(); + + last_kamen_rider_path = QFileInfo(file_path).absolutePath() + "/"; + accept(); + }); + + connect(btn_cancel, &QAbstractButton::clicked, this, &QDialog::reject); + + connect(co_compl, QOverload::of(&QCompleter::activated), [=](const QString& text) + { + combo_figlist->setCurrentText(text); + combo_figlist->setCurrentIndex(combo_figlist->findText(text)); + }); +} + +QString kamen_rider_creator_dialog::get_file_path() const +{ + return file_path; +} + +kamen_rider_dialog::kamen_rider_dialog(QWidget* parent) + : QDialog(parent) +{ + setWindowTitle(tr("Kamen Rider Manager")); + setObjectName("kamen_riders_manager"); + setAttribute(Qt::WA_DeleteOnClose); + setMinimumSize(QSize(700, 200)); + + QVBoxLayout* vbox_panel = new QVBoxLayout(); + + auto add_line = [](QVBoxLayout* vbox) + { + QFrame* line = new QFrame(); + line->setFrameShape(QFrame::HLine); + line->setFrameShadow(QFrame::Sunken); + vbox->addWidget(line); + }; + + QGroupBox* group_kamen_riders = new QGroupBox(tr("Active Kamen Riders:")); + QVBoxLayout* vbox_group = new QVBoxLayout(); + + for (auto i = 0; i < UI_FIG_NUM; i++) + { + if (i != 0) + { + add_line(vbox_group); + } + + QHBoxLayout* hbox_kamen_rider = new QHBoxLayout(); + QLabel* label_figname = new QLabel(QString(tr("Kamen Rider %1")).arg(i + 1)); + edit_kamen_riders[i] = new QLineEdit(); + edit_kamen_riders[i]->setEnabled(false); + + QPushButton* clear_btn = new QPushButton(tr("Clear")); + QPushButton* create_btn = new QPushButton(tr("Create")); + QPushButton* load_btn = new QPushButton(tr("Load")); + + connect(clear_btn, &QAbstractButton::clicked, this, [this, i]() + { + clear_kamen_rider(i); + }); + connect(create_btn, &QAbstractButton::clicked, this, [this, i]() + { + create_kamen_rider(i); + }); + connect(load_btn, &QAbstractButton::clicked, this, [this, i]() + { + load_kamen_rider(i); + }); + + hbox_kamen_rider->addWidget(label_figname); + hbox_kamen_rider->addWidget(edit_kamen_riders[i]); + hbox_kamen_rider->addWidget(clear_btn); + hbox_kamen_rider->addWidget(create_btn); + hbox_kamen_rider->addWidget(load_btn); + + vbox_group->addLayout(hbox_kamen_rider); + } + + group_kamen_riders->setLayout(vbox_group); + vbox_panel->addWidget(group_kamen_riders); + setLayout(vbox_panel); + + update_edits(); +} + +kamen_rider_dialog::~kamen_rider_dialog() +{ + inst = nullptr; +} + +kamen_rider_dialog* kamen_rider_dialog::get_dlg(QWidget* parent) +{ + if (inst == nullptr) + inst = new kamen_rider_dialog(parent); + + return inst; +} + +void kamen_rider_dialog::clear_kamen_rider(u8 slot) +{ + if (const auto& slot_infos = ::at32(figure_slots, slot)) + { + const auto& [cur_slot, id, var] = slot_infos.value(); + g_ridergate.remove_figure(cur_slot); + figure_slots[slot] = {}; + update_edits(); + } +} + +void kamen_rider_dialog::create_kamen_rider(u8 slot) +{ + kamen_rider_creator_dialog create_dlg(this); + if (create_dlg.exec() == Accepted) + { + load_kamen_rider_path(slot, create_dlg.get_file_path()); + } +} + +void kamen_rider_dialog::load_kamen_rider(u8 slot) +{ + const QString file_path = QFileDialog::getOpenFileName(this, tr("Select Kamen Rider File"), last_kamen_rider_path, tr("Kamen Rider (*.bin);;All Files (*)")); + if (file_path.isEmpty()) + { + return; + } + + last_kamen_rider_path = QFileInfo(file_path).absolutePath() + "/"; + + load_kamen_rider_path(slot, file_path); +} + +void kamen_rider_dialog::load_kamen_rider_path(u8 slot, const QString& path) +{ + fs::file fig_file(path.toStdString(), fs::read + fs::write + fs::lock); + if (!fig_file) + { + QMessageBox::warning(this, tr("Failed to open the kamen rider file!"), tr("Failed to open the kamen rider file(%1)!\nFile may already be in use on the portal.").arg(path), QMessageBox::Ok); + return; + } + + std::array data; + if (fig_file.read(data.data(), data.size()) != data.size()) + { + QMessageBox::warning(this, tr("Failed to read the kamen rider file!"), tr("Failed to read the kamen rider file(%1)!\nFile was too small.").arg(path), QMessageBox::Ok); + return; + } + + clear_kamen_rider(slot); + + u8 fig_id = data[0x1B]; + u8 fig_type = data[0x19]; + + u8 portal_slot = g_ridergate.load_figure(data, std::move(fig_file)); + figure_slots[slot] = std::tuple(portal_slot, fig_id, fig_type); + + update_edits(); +} + +void kamen_rider_dialog::update_edits() +{ + for (auto i = 0; i < UI_FIG_NUM; i++) + { + QString display_string; + if (const auto& sd = figure_slots[i]) + { + const auto& [portal_slot, fig_id, fig_type] = sd.value(); + const auto found_fig = list_kamen_riders.find(std::make_pair(fig_id, fig_type)); + if (found_fig != list_kamen_riders.cend()) + { + display_string = QString::fromStdString(found_fig->second); + } + else + { + display_string = QString(tr("Unknown (Id:%1 Var:%2)")).arg(fig_id).arg(fig_type); + } + } + else + { + display_string = tr("None"); + } + + edit_kamen_riders[i]->setText(display_string); + } +} diff --git a/rpcs3/rpcs3qt/kamen_rider_dialog.h b/rpcs3/rpcs3qt/kamen_rider_dialog.h new file mode 100644 index 0000000000..3eb040d2b6 --- /dev/null +++ b/rpcs3/rpcs3qt/kamen_rider_dialog.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include "util/types.hpp" + +#include +#include + +constexpr auto UI_FIG_NUM = 8; + +class kamen_rider_creator_dialog : public QDialog +{ + Q_OBJECT + +public: + explicit kamen_rider_creator_dialog(QWidget* parent); + QString get_file_path() const; + +protected: + QString file_path; +}; + +class kamen_rider_dialog : public QDialog +{ + Q_OBJECT + +public: + explicit kamen_rider_dialog(QWidget* parent); + ~kamen_rider_dialog(); + static kamen_rider_dialog* get_dlg(QWidget* parent); + + kamen_rider_dialog(kamen_rider_dialog const&) = delete; + void operator=(kamen_rider_dialog const&) = delete; + +protected: + void clear_kamen_rider(u8 slot); + void create_kamen_rider(u8 slot); + void load_kamen_rider(u8 slot); + void load_kamen_rider_path(u8 slot, const QString& path); + + void update_edits(); + +protected: + std::array edit_kamen_riders{}; + static std::array>, UI_FIG_NUM> figure_slots; + +private: + static kamen_rider_dialog* inst; +}; diff --git a/rpcs3/rpcs3qt/localized_emu.h b/rpcs3/rpcs3qt/localized_emu.h index bdc46c9f6b..7d4453a1bd 100644 --- a/rpcs3/rpcs3qt/localized_emu.h +++ b/rpcs3/rpcs3qt/localized_emu.h @@ -295,6 +295,9 @@ private: case localized_string_id::HOME_MENU_SAVESTATE_SAVE: return tr("Save Emulation State"); case localized_string_id::HOME_MENU_SAVESTATE_AND_EXIT: return tr("Save Emulation State And Exit"); case localized_string_id::HOME_MENU_RELOAD_SAVESTATE: return tr("Reload Last Emulation State"); + case localized_string_id::HOME_MENU_RELOAD_SECOND_SAVESTATE: return tr("Reload Second-To-Last Emulation State"); + case localized_string_id::HOME_MENU_RELOAD_THIRD_SAVESTATE: return tr("Reload Third-To-Last Emulation State"); + case localized_string_id::HOME_MENU_RELOAD_FOURTH_SAVESTATE: return tr("Reload Fourth-To-Last Emulation State"); case localized_string_id::HOME_MENU_RECORDING: return tr("Start/Stop Recording"); case localized_string_id::HOME_MENU_TROPHIES: return tr("Trophies"); case localized_string_id::HOME_MENU_TROPHY_LIST_TITLE: return tr("Trophy Progress: %0").arg(std::forward(args)...); diff --git a/rpcs3/rpcs3qt/main_window.cpp b/rpcs3/rpcs3qt/main_window.cpp index 2e7533cf83..38767ceabb 100644 --- a/rpcs3/rpcs3qt/main_window.cpp +++ b/rpcs3/rpcs3qt/main_window.cpp @@ -23,6 +23,7 @@ #include "skylander_dialog.h" #include "infinity_dialog.h" #include "dimensions_dialog.h" +#include "kamen_rider_dialog.h" #include "cheat_manager.h" #include "patch_manager_dialog.h" #include "patch_creator_dialog.h" @@ -2441,7 +2442,7 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri } } } - + if (!game_data_shortcuts.empty() && !locations.empty()) { m_game_list_frame->CreateShortcuts(game_data_shortcuts, locations); @@ -2999,6 +3000,12 @@ void main_window::CreateConnects() dim_dlg->show(); }); + connect(ui->actionManage_KamenRider_RideGate, &QAction::triggered, this, [this] + { + kamen_rider_dialog* kam_dlg = kamen_rider_dialog::get_dlg(this); + kam_dlg->show(); + }); + connect(ui->actionManage_Cheats, &QAction::triggered, this, [this] { cheat_manager_dialog* cheat_manager = cheat_manager_dialog::get_dlg(this); diff --git a/rpcs3/rpcs3qt/main_window.ui b/rpcs3/rpcs3qt/main_window.ui index 91654cbb67..e10abf4668 100644 --- a/rpcs3/rpcs3qt/main_window.ui +++ b/rpcs3/rpcs3qt/main_window.ui @@ -302,6 +302,7 @@ + @@ -1173,6 +1174,11 @@ Dimensions Toypad + + + Kamen Rider Ride Gate + + Cheats diff --git a/rpcs3/rpcs3qt/savestate_manager_dialog.cpp b/rpcs3/rpcs3qt/savestate_manager_dialog.cpp index 381debab01..93c87c01d8 100644 --- a/rpcs3/rpcs3qt/savestate_manager_dialog.cpp +++ b/rpcs3/rpcs3qt/savestate_manager_dialog.cpp @@ -552,7 +552,7 @@ void savestate_manager_dialog::StartSavestateLoadThreads() std::vector> game_data; qRegisterMetaType>("QVector"); - QList indices; + QList indices; for (int i = 0; i < count; ++i) { auto game_data_ptr = std::make_unique(); @@ -595,7 +595,7 @@ void savestate_manager_dialog::StartSavestateLoadThreads() }); atomic_t error_count{}; - future_watcher.setFuture(QtConcurrent::map(indices, [this, &error_count, &game_data](const int& i) + future_watcher.setFuture(QtConcurrent::map(indices, [this, &error_count, &game_data](u64 i) { gui_log.trace("Loading savestate dir: %s", game_data[i]->title_id); diff --git a/rpcs3/rpcs3qt/skylander_dialog.cpp b/rpcs3/rpcs3qt/skylander_dialog.cpp index 94d9025a17..86dfb61464 100644 --- a/rpcs3/rpcs3qt/skylander_dialog.cpp +++ b/rpcs3/rpcs3qt/skylander_dialog.cpp @@ -164,9 +164,9 @@ static const std::map, const std::string> {{205, 0x0000}, "Sky Iron Shield"}, {{206, 0x0000}, "Winged Boots"}, {{207, 0x0000}, "Sparx the Dragonfly"}, - {{208, 0x0000}, "Dragonfire Cannon"}, + {{208, 0x1206}, "Dragonfire Cannon"}, {{208, 0x1602}, "Golden Dragonfire Cannon"}, - {{209, 0x0000}, "Scorpion Striker"}, + {{209, 0x1206}, "Scorpion Striker"}, {{210, 0x3002}, "Biter's Bane"}, {{210, 0x3008}, "Sorcerous Skull"}, {{210, 0x300B}, "Axe of Illusion"}, @@ -301,15 +301,15 @@ static const std::map, const std::string> {{509, 0x0000}, "Small Fry"}, {{510, 0x0000}, "Drobit"}, {{519, 0x0000}, "Trigger Snappy"}, - {{526, 0x0000}, "Whisper Elf"}, - {{540, 0x0000}, "Barkley"}, + {{526, 0x3000}, "Whisper Elf"}, + {{540, 0x3000}, "Barkley"}, {{540, 0x3402}, "Gnarly Barkley"}, - {{541, 0x0000}, "Thumpling"}, + {{541, 0x3000}, "Thumpling"}, {{514, 0x0000}, "Gill Runt"}, - {{542, 0x0000}, "Mini-Jini"}, + {{542, 0x3000}, "Mini-Jini"}, {{503, 0x0000}, "Spry"}, {{504, 0x0000}, "Hijinx"}, - {{543, 0x0000}, "Eye Small"}, + {{543, 0x1000}, "Eye Small"}, {{601, 0x0000}, "King Pen"}, {{602, 0x0000}, "Tri-Tip"}, {{603, 0x0000}, "Chopscotch"}, @@ -419,15 +419,15 @@ static const std::map, const std::string> {{3013, 0x2206}, "LightCore Grim Creeper"}, {{3014, 0x0000}, "Rip Tide"}, {{3015, 0x0000}, "Punk Shock"}, - {{3200, 0x0000}, "Battle Hammer"}, - {{3201, 0x0000}, "Sky Diamond"}, - {{3202, 0x0000}, "Platinum Sheep"}, - {{3203, 0x0000}, "Groove Machine"}, + {{3200, 0x2000}, "Battle Hammer"}, + {{3201, 0x2000}, "Sky Diamond"}, + {{3202, 0x2000}, "Platinum Sheep"}, + {{3203, 0x2000}, "Groove Machine"}, {{3204, 0x0000}, "UFO Hat"}, - {{3300, 0x0000}, "Sheep Wreck Island"}, - {{3301, 0x0000}, "Tower of Time"}, - {{3302, 0x0000}, "Fiery Forge"}, - {{3303, 0x0000}, "Arkeyan Crossbow"}, + {{3300, 0x2000}, "Sheep Wreck Island"}, + {{3301, 0x2000}, "Tower of Time"}, + {{3302, 0x2206}, "Fiery Forge"}, + {{3303, 0x2206}, "Arkeyan Crossbow"}, {{3220, 0x0000}, "Jet Stream"}, {{3221, 0x0000}, "Tomb Buggy"}, {{3222, 0x0000}, "Reef Ripper"}, diff --git a/rpcs3/rpcs3qt/tooltips.h b/rpcs3/rpcs3qt/tooltips.h index 31af8dced0..ccd3e1cdf8 100644 --- a/rpcs3/rpcs3qt/tooltips.h +++ b/rpcs3/rpcs3qt/tooltips.h @@ -298,7 +298,7 @@ public: const QString sdl = tr("The SDL handler supports a variety of controllers across different platforms."); const QString orientation_reset = tr("Resets the sensor orientation when pressed.
Toggle the checkbox to enable or disable the orientation feature.
Currently only used for PS Move interactions."); - const QString analog_limiter = tr("Applies the stick multipliers while this special button is pressed.
Enable \"Toggle\" if you want to toggle the analog limiter on button press instead."); + const QString analog_limiter = tr("Applies the stick multipliers while this special button is pressed.
Enable \"Toggle\" if you want to toggle the analog limiter on button press instead.
If no button has been assigned, the stick multipliers are always applied."); const QString pressure_intensity = tr("Controls the intensity of pressure sensitive buttons while this special button is pressed.
Enable \"Toggle\" if you want to toggle the intensity on button press instead.
Use the percentage to change how hard you want to press a button."); const QString pressure_deadzone = tr("Controls the deadzone of pressure sensitive buttons. It determines how far the button has to be pressed until it is recognized by the game. The resulting range will be projected onto the full button sensitivity range."); const QString squircle_factor = tr("The actual DualShock 3's stick range is not circular but formed like a rounded square (or squircle) which represents the maximum range of the emulated sticks. You can use the squircle values to modify the stick input if your sticks can't reach the corners of that range. A value of 0 does not apply any so called squircling. A value of 8000 is usually recommended."); diff --git a/rpcs3/util/serialization_ext.cpp b/rpcs3/util/serialization_ext.cpp index 508cd79168..5e861d97d0 100644 --- a/rpcs3/util/serialization_ext.cpp +++ b/rpcs3/util/serialization_ext.cpp @@ -814,7 +814,7 @@ void compressed_zstd_serialization_file_handler::initialize(utils::serial& ar) // Make sure at least one thread is free // Limit thread count in order to make sure memory limits are under control (TODO: scale with RAM size) - const usz thread_count = std::min(std::max(utils::get_thread_count(), 2) - 1, 16); + const usz thread_count = std::min(std::max(utils::get_thread_count(), 2) - 1, 32); for (usz i = 0; i < thread_count; i++) { @@ -1132,50 +1132,37 @@ void compressed_zstd_serialization_file_handler::finalize(utils::serial& ar) const stx::shared_ptr> empty_data = stx::make_single>(); const stx::shared_ptr> null_ptr = stx::null_ptr; - for (auto& context : m_compression_threads) + for (bool has_pending_threads = true; has_pending_threads; thread_ctrl::wait_for(500)) { - // Try to notify all on the first iteration - if (context.m_input.compare_and_swap_test(null_ptr, empty_data)) + has_pending_threads = false; + + // Try to notify all in bulk + for (auto& context : m_compression_threads) { - context.notified = true; - context.m_input.notify_one(); + if (!context.notified && !context.m_input && context.m_input.compare_and_swap_test(null_ptr, empty_data)) + { + context.notify_pending = true; + } } - } - for (auto& context : m_compression_threads) - { - // Notify to abort - while (!context.notified) + for (auto& context : m_compression_threads) { - const auto data = context.m_input.compare_and_swap(null_ptr, empty_data); - - if (!data) + if (context.notify_pending) { context.notified = true; - context.m_input.notify_one(); - break; + context.notify_pending = false; + context.m_input.notify_all(); } - - // Wait until valid input is processed - thread_ctrl::wait_for(1000); } - } - for (auto& context : m_compression_threads) - { - // Wait for notification to be consumed - while (context.m_input) + for (auto& context : m_compression_threads) { - thread_ctrl::wait_for(1000); - } - } - - for (auto& context : m_compression_threads) - { - // Wait for data to be writen to be read by the thread - while (context.m_output) - { - thread_ctrl::wait_for(1000); + // Wait for notification to be sent and received + // And wait for data to be written to be read by the thread + if (!context.notified || context.m_input || context.m_output) + { + has_pending_threads = true; + } } } diff --git a/rpcs3/util/serialization_ext.hpp b/rpcs3/util/serialization_ext.hpp index 51071e7188..7f575c088b 100644 --- a/rpcs3/util/serialization_ext.hpp +++ b/rpcs3/util/serialization_ext.hpp @@ -175,6 +175,7 @@ private: { atomic_ptr> m_input; atomic_ptr> m_output; + bool notify_pending = false; bool notified = false; std::unique_ptr>> m_thread; }; diff --git a/rpcs3/util/simd.hpp b/rpcs3/util/simd.hpp index ff4e0eed78..a60f8732a3 100644 --- a/rpcs3/util/simd.hpp +++ b/rpcs3/util/simd.hpp @@ -2213,8 +2213,11 @@ inline v128 gv_cvtu32_tofs(const v128& src) #if defined(__AVX512VL__) return _mm_cvtepu32_ps(src); #elif defined(ARCH_X64) - const auto fix = _mm_and_ps(_mm_castsi128_ps(_mm_srai_epi32(src, 31)), _mm_set1_ps(0x80000000)); - return _mm_add_ps(_mm_cvtepi32_ps(_mm_and_si128(src, _mm_set1_epi32(0x7fffffff))), fix); + constexpr u64 bit_shift = 9; + const auto shifted = _mm_srli_epi32(src, bit_shift); + const auto cleared = _mm_slli_epi32(shifted, bit_shift); + const auto low_bits = _mm_sub_epi32(src, cleared); + return _mm_add_ps(_mm_cvtepi32_ps(low_bits), _mm_mul_ps(_mm_cvtepi32_ps(shifted), _mm_set_ps1(1u << bit_shift))); #elif defined(ARCH_ARM64) return vcvtq_f32_u32(src); #endif