diff --git a/.ci/build-mac.sh b/.ci/build-mac.sh index 2f43e05e74..9e16fce4ec 100755 --- a/.ci/build-mac.sh +++ b/.ci/build-mac.sh @@ -13,15 +13,16 @@ export HOMEBREW_NO_AUTO_UPDATE=1 export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 export HOMEBREW_NO_ENV_HINTS=1 export HOMEBREW_NO_INSTALL_CLEANUP=1 +brew update brew install -f --overwrite --quiet ccache "llvm@$LLVM_COMPILER_VER" brew link -f --overwrite --quiet "llvm@$LLVM_COMPILER_VER" if [ "$AARCH64" -eq 1 ]; then - brew install -f --overwrite --quiet googletest opencv@4 sdl3 vulkan-headers vulkan-loader molten-vk - brew unlink --quiet ffmpeg fmt qtbase qtsvg qtdeclarative protobuf + brew install -f --overwrite --quiet googletest opencv@4 sdl3 vulkan-headers vulkan-loader molten-vk + brew unlink --quiet ffmpeg fmt qtbase qtsvg qtdeclarative protobuf || true else arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" arch -x86_64 /usr/local/bin/brew install -f --overwrite --quiet python@3.14 opencv@4 "llvm@$LLVM_COMPILER_VER" sdl3 vulkan-headers vulkan-loader molten-vk - arch -x86_64 /usr/local/bin/brew unlink --quiet ffmpeg qtbase qtsvg qtdeclarative + arch -x86_64 /usr/local/bin/brew unlink --quiet ffmpeg qtbase qtsvg qtdeclarative protobuf || true fi export CXX=clang++ diff --git a/.ci/deploy-mac.sh b/.ci/deploy-mac.sh index 70db614b59..930a87eee2 100755 --- a/.ci/deploy-mac.sh +++ b/.ci/deploy-mac.sh @@ -4,11 +4,15 @@ cd build || exit 1 cd bin +git clone --revision=32dceb35e2c95b46cec501033cbc3a1ddf32d6e8 https://github.com/KhronosGroup/MoltenVK.git +cd MoltenVK +./fetchDependencies --macos +make macos MVK_USE_METAL_PRIVATE_API=1 +cd ../ + mkdir -p "rpcs3.app/Contents/Resources/vulkan/icd.d" || true -wget https://github.com/KhronosGroup/MoltenVK/releases/download/v1.4.1/MoltenVK-macos-privateapi.tar -tar -xvf MoltenVK-macos-privateapi.tar -cp "MoltenVK/MoltenVK/dynamic/dylib/macOS/libMoltenVK.dylib" "rpcs3.app/Contents/Frameworks/libMoltenVK.dylib" -cp "MoltenVK/MoltenVK/dynamic/dylib/macOS/MoltenVK_icd.json" "rpcs3.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json" +cp "MoltenVK/Package/Latest/MoltenVK/dynamic/dylib/macOS/libMoltenVK.dylib" "rpcs3.app/Contents/Frameworks/libMoltenVK.dylib" +cp "MoltenVK/Package/Latest/MoltenVK/dynamic/dylib/macOS/MoltenVK_icd.json" "rpcs3.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json" sed -i '' "s/.\//..\/..\/..\/Frameworks\//g" "rpcs3.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json" cp "$(realpath $BREW_PATH/opt/llvm@$LLVM_COMPILER_VER/lib/c++/libc++abi.1.0.dylib)" "rpcs3.app/Contents/Frameworks/libc++abi.1.dylib" @@ -21,7 +25,7 @@ rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \ "rpcs3.app/Contents/Frameworks/QtVirtualKeyboard.framework" \ "rpcs3.app/Contents/Plugins/platforminputcontexts" \ "rpcs3.app/Contents/Plugins/virtualkeyboard" \ -"rpcs3.app/Contents/Resources/git" +"rpcs3.app/Contents/Resources/git" || true ../../.ci/optimize-mac.sh rpcs3.app @@ -49,7 +53,7 @@ QT_TRANS="$WORKDIR/qt-downloader/$QT_VER/clang_64/translations" cp $QT_TRANS/qt_*.qm rpcs3.app/Contents/translations cp $QT_TRANS/qtbase_*.qm rpcs3.app/Contents/translations cp $QT_TRANS/qtmultimedia_*.qm rpcs3.app/Contents/translations -rm -f rpcs3.app/Contents/translations/qt_help_*.qm +rm -f rpcs3.app/Contents/translations/qt_help_*.qm || true # Need to do this rename hack due to case insensitive filesystem mv rpcs3.app RPCS3_.app diff --git a/.ci/deploy-windows-clang.sh b/.ci/deploy-windows-clang.sh index 07b4866fc4..04ba1bb20a 100644 --- a/.ci/deploy-windows-clang.sh +++ b/.ci/deploy-windows-clang.sh @@ -24,6 +24,7 @@ mkdir ./bin/config mkdir ./bin/config/input_configs curl -fsSL 'https://raw.githubusercontent.com/gabomdq/SDL_GameControllerDB/master/gamecontrollerdb.txt' 1> ./bin/config/input_configs/gamecontrollerdb.txt curl -fsSL 'https://rpcs3.net/compatibility?api=v1&export' | iconv -f ISO-8859-1 -t UTF-8 1> ./bin/GuiConfigs/compat_database.dat +curl -fsSL 'https://api.rpcs3.net/config/?api=v1' | iconv -f ISO-8859-1 -t UTF-8 1> ./bin/GuiConfigs/config_database.dat # Download translations mkdir -p ./bin/share/qt6/translations diff --git a/.ci/deploy-windows.sh b/.ci/deploy-windows.sh index 069f8fb637..3c59391a66 100755 --- a/.ci/deploy-windows.sh +++ b/.ci/deploy-windows.sh @@ -14,6 +14,7 @@ mkdir ./bin/config mkdir ./bin/config/input_configs curl -fsSL 'https://raw.githubusercontent.com/gabomdq/SDL_GameControllerDB/master/gamecontrollerdb.txt' 1> ./bin/config/input_configs/gamecontrollerdb.txt curl -fsSL 'https://rpcs3.net/compatibility?api=v1&export' | iconv -t UTF-8 1> ./bin/GuiConfigs/compat_database.dat +curl -fsSL 'https://api.rpcs3.net/config/?api=v1' | iconv -t UTF-8 1> ./bin/GuiConfigs/config_database.dat # Download translations mkdir -p ./bin/qt6/translations diff --git a/.ci/setup-llvm.sh b/.ci/setup-llvm.sh index d296d2a3e4..5d06222f04 100644 --- a/.ci/setup-llvm.sh +++ b/.ci/setup-llvm.sh @@ -1,7 +1,7 @@ #!/bin/sh -ex # Resource/dependency URLs -CCACHE_URL="https://github.com/ccache/ccache/releases/download/v4.11.2/ccache-4.11.2-windows-x86_64.zip" +CCACHE_URL="https://github.com/ccache/ccache/releases/download/v4.12.3/ccache-4.12.3-windows-x86_64.zip" DEP_URLS=" \ $CCACHE_URL" diff --git a/.ci/setup-windows.sh b/.ci/setup-windows.sh index aade55fc95..d8016d8c13 100755 --- a/.ci/setup-windows.sh +++ b/.ci/setup-windows.sh @@ -6,7 +6,7 @@ QT_HOST="http://qt.mirror.constant.com/" QT_URL_VER=$(echo "$QT_VER" | sed "s/\.//g") QT_VER_MSVC_UP=$(echo "${QT_VER_MSVC}" | tr '[:lower:]' '[:upper:]') -QT_PREFIX="online/qtsdkrepository/windows_x86/desktop/qt${QT_VER_MAIN}_${QT_URL_VER}/qt${QT_VER_MAIN}_${QT_URL_VER}/qt.qt${QT_VER_MAIN}.${QT_URL_VER}." +QT_PREFIX="online/qtsdkrepository/windows_x86/desktop/qt${QT_VER_MAIN}_${QT_URL_VER}/qt${QT_VER_MAIN}_${QT_URL_VER}_${QT_VER_MSVC}_64/qt.qt${QT_VER_MAIN}.${QT_URL_VER}." QT_PREFIX_2="win64_${QT_VER_MSVC}_64/${QT_VER}-0-${QT_DATE}" QT_SUFFIX="-Windows-Windows_11_24H2-${QT_VER_MSVC_UP}-Windows-Windows_11_24H2-X86_64.7z" QT_BASE_URL="${QT_HOST}${QT_PREFIX}${QT_PREFIX_2}qtbase${QT_SUFFIX}" @@ -17,7 +17,7 @@ QT_SVG_URL="${QT_HOST}${QT_PREFIX}${QT_PREFIX_2}qtsvg${QT_SUFFIX}" QT_TRANSLATIONS_URL="${QT_HOST}${QT_PREFIX}${QT_PREFIX_2}qttranslations${QT_SUFFIX}" LLVMLIBS_URL="https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-${LLVM_VER}/llvmlibs_mt.7z" VULKAN_SDK_URL="https://www.dropbox.com/scl/fi/sjjh0fc4ld281pjbl2xzu/VulkanSDK-${VULKAN_VER}-Installer.exe?rlkey=f6wzc0lvms5vwkt2z3qabfv9d&dl=1" -CCACHE_URL="https://github.com/ccache/ccache/releases/download/v4.11.2/ccache-4.11.2-windows-x86_64.zip" +CCACHE_URL="https://github.com/ccache/ccache/releases/download/v4.12.3/ccache-4.12.3-windows-x86_64.zip" DEP_URLS=" \ $QT_BASE_URL \ diff --git a/.github/workflows/llvm.yml b/.github/workflows/llvm.yml index e3e3e76c50..3ed584437a 100644 --- a/.github/workflows/llvm.yml +++ b/.github/workflows/llvm.yml @@ -20,7 +20,7 @@ jobs: runs-on: windows-2025 env: COMPILER: msvc - CCACHE_SHA: '1f39f3ad5aae3fe915e99ad1302633bc8f6718e58fa7c0de2b0ba7e080f0f08c' + CCACHE_SHA: '859141059ac950e1e8cd042c66f842f26b9e3a62a1669a69fe6ba180cb58bbdf' CCACHE_BIN_DIR: 'C:\ccache_bin' CCACHE_DIR: 'C:\ccache' CCACHE_INODECACHE: 'true' diff --git a/.github/workflows/rpcs3.yml b/.github/workflows/rpcs3.yml index 90ee8555ad..eb42b7aae1 100644 --- a/.github/workflows/rpcs3.yml +++ b/.github/workflows/rpcs3.yml @@ -30,23 +30,23 @@ jobs: matrix: include: - os: ubuntu-24.04 - docker_img: "rpcs3/rpcs3-ci-jammy:1.7" + docker_img: "rpcs3/rpcs3-ci-jammy:1.11" build_sh: "/rpcs3/.ci/build-linux.sh" compiler: clang UPLOAD_COMMIT_HASH: d812f1254a1157c80fd402f94446310560f54e5f UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-linux" - os: ubuntu-24.04 - docker_img: "rpcs3/rpcs3-ci-jammy:1.7" + docker_img: "rpcs3/rpcs3-ci-jammy:1.11" build_sh: "/rpcs3/.ci/build-linux.sh" compiler: gcc - os: ubuntu-24.04-arm - docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.7" + docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.11" build_sh: "/rpcs3/.ci/build-linux-aarch64.sh" compiler: clang UPLOAD_COMMIT_HASH: a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1 UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-linux-arm64" - os: ubuntu-24.04-arm - docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.7" + docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.11" build_sh: "/rpcs3/.ci/build-linux-aarch64.sh" compiler: gcc name: RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} @@ -134,7 +134,7 @@ jobs: runs-on: macos-14 env: CCACHE_DIR: /tmp/ccache_dir - QT_VER: '6.10.2' + QT_VER: '6.11.0' QT_VER_MAIN: '6' LLVM_COMPILER_VER: '21' RELEASE_MESSAGE: ../GitHubReleaseMessage.txt @@ -213,13 +213,13 @@ jobs: env: COMPILER: msvc QT_VER_MAIN: '6' - QT_VER: '6.10.2' + QT_VER: '6.11.0' QT_VER_MSVC: 'msvc2022' - QT_DATE: '202601261212' + QT_DATE: '202603180535' LLVM_VER: '19.1.7' VULKAN_VER: '1.3.268.0' VULKAN_SDK_SHA: '8459ef49bd06b697115ddd3d97c9aec729e849cd775f5be70897718a9b3b9db5' - CCACHE_SHA: '1f39f3ad5aae3fe915e99ad1302633bc8f6718e58fa7c0de2b0ba7e080f0f08c' + CCACHE_SHA: '859141059ac950e1e8cd042c66f842f26b9e3a62a1669a69fe6ba180cb58bbdf' CCACHE_BIN_DIR: 'C:\ccache_bin' CCACHE_DIR: 'C:\ccache' CCACHE_INODECACHE: 'true' diff --git a/.gitignore b/.gitignore index 4688d5fa52..a3911be3a0 100644 --- a/.gitignore +++ b/.gitignore @@ -69,6 +69,9 @@ CMakeSettings.json *PVS-Studio* PVS/* +# Zed Editor files +.zed/* + # Ignore other system generated files x64/* rpcs3/x64/* diff --git a/3rdparty/7zip/7zip b/3rdparty/7zip/7zip index 5e96a82794..839151eaaa 160000 --- a/3rdparty/7zip/7zip +++ b/3rdparty/7zip/7zip @@ -1 +1 @@ -Subproject commit 5e96a8279489832924056b1fa82f29d5837c9469 +Subproject commit 839151eaaad24771892afaae6bac690e31e58384 diff --git a/3rdparty/FAudio b/3rdparty/FAudio index 633bdb772a..0372329dbb 160000 --- a/3rdparty/FAudio +++ b/3rdparty/FAudio @@ -1 +1 @@ -Subproject commit 633bdb772a593104414b4b103ec752567d57c3c1 +Subproject commit 0372329dbb56e7814d0dea7b6eafa7a613bd8042 diff --git a/3rdparty/GL/glext.h b/3rdparty/GL/glext.h index 276a962a96..16c26be10f 100644 --- a/3rdparty/GL/glext.h +++ b/3rdparty/GL/glext.h @@ -6,7 +6,7 @@ extern "C" { #endif /* -** Copyright 2013-2020 The Khronos Group Inc. +** Copyright 2013-2026 The Khronos Group Inc. ** SPDX-License-Identifier: MIT ** ** This header is generated from the Khronos OpenGL / OpenGL ES XML @@ -32,7 +32,7 @@ extern "C" { #define GLAPI extern #endif -#define GL_GLEXT_VERSION 20250203 +#define GL_GLEXT_VERSION 20260126 #include @@ -7358,6 +7358,47 @@ GLAPI void APIENTRY glFogCoordPointerEXT (GLenum type, GLsizei stride, const voi #endif #endif /* GL_EXT_fog_coord */ +#ifndef GL_EXT_fragment_shading_rate +#define GL_EXT_fragment_shading_rate 1 +#define GL_SHADING_RATE_1X1_PIXELS_EXT 0x96A6 +#define GL_SHADING_RATE_1X2_PIXELS_EXT 0x96A7 +#define GL_SHADING_RATE_2X1_PIXELS_EXT 0x96A8 +#define GL_SHADING_RATE_2X2_PIXELS_EXT 0x96A9 +#define GL_SHADING_RATE_1X4_PIXELS_EXT 0x96AA +#define GL_SHADING_RATE_4X1_PIXELS_EXT 0x96AB +#define GL_SHADING_RATE_4X2_PIXELS_EXT 0x96AC +#define GL_SHADING_RATE_2X4_PIXELS_EXT 0x96AD +#define GL_SHADING_RATE_4X4_PIXELS_EXT 0x96AE +#define GL_SHADING_RATE_EXT 0x96D0 +#define GL_SHADING_RATE_ATTACHMENT_EXT 0x96D1 +#define GL_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_EXT 0x96D2 +#define GL_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_EXT 0x96D3 +#define GL_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_EXT 0x96D4 +#define GL_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_EXT 0x96D5 +#define GL_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_EXT 0x96D6 +#define GL_MIN_FRAGMENT_SHADING_RATE_ATTACHMENT_TEXEL_WIDTH_EXT 0x96D7 +#define GL_MAX_FRAGMENT_SHADING_RATE_ATTACHMENT_TEXEL_WIDTH_EXT 0x96D8 +#define GL_MIN_FRAGMENT_SHADING_RATE_ATTACHMENT_TEXEL_HEIGHT_EXT 0x96D9 +#define GL_MAX_FRAGMENT_SHADING_RATE_ATTACHMENT_TEXEL_HEIGHT_EXT 0x96DA +#define GL_MAX_FRAGMENT_SHADING_RATE_ATTACHMENT_TEXEL_ASPECT_RATIO_EXT 0x96DB +#define GL_MAX_FRAGMENT_SHADING_RATE_ATTACHMENT_LAYERS_EXT 0x96DC +#define GL_FRAGMENT_SHADING_RATE_WITH_SHADER_DEPTH_STENCIL_WRITES_SUPPORTED_EXT 0x96DD +#define GL_FRAGMENT_SHADING_RATE_WITH_SAMPLE_MASK_SUPPORTED_EXT 0x96DE +#define GL_FRAGMENT_SHADING_RATE_ATTACHMENT_WITH_DEFAULT_FRAMEBUFFER_SUPPORTED_EXT 0x96DF +#define GL_FRAGMENT_SHADING_RATE_NON_TRIVIAL_COMBINERS_SUPPORTED_EXT 0x8F6F +#define GL_FRAGMENT_SHADING_RATE_PRIMITIVE_RATE_WITH_MULTI_VIEWPORT_SUPPORTED_EXT 0x9780 +typedef void (APIENTRYP PFNGLGETFRAGMENTSHADINGRATESEXTPROC) (GLsizei samples, GLsizei maxCount, GLsizei *count, GLenum *shadingRates); +typedef void (APIENTRYP PFNGLSHADINGRATEEXTPROC) (GLenum rate); +typedef void (APIENTRYP PFNGLSHADINGRATECOMBINEROPSEXTPROC) (GLenum combinerOp0, GLenum combinerOp1); +typedef void (APIENTRYP PFNGLFRAMEBUFFERSHADINGRATEEXTPROC) (GLenum target, GLenum attachment, GLuint texture, GLint baseLayer, GLsizei numLayers, GLsizei texelWidth, GLsizei texelHeight); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glGetFragmentShadingRatesEXT (GLsizei samples, GLsizei maxCount, GLsizei *count, GLenum *shadingRates); +GLAPI void APIENTRY glShadingRateEXT (GLenum rate); +GLAPI void APIENTRY glShadingRateCombinerOpsEXT (GLenum combinerOp0, GLenum combinerOp1); +GLAPI void APIENTRY glFramebufferShadingRateEXT (GLenum target, GLenum attachment, GLuint texture, GLint baseLayer, GLsizei numLayers, GLsizei texelWidth, GLsizei texelHeight); +#endif +#endif /* GL_EXT_fragment_shading_rate */ + #ifndef GL_EXT_framebuffer_blit #define GL_EXT_framebuffer_blit 1 #define GL_READ_FRAMEBUFFER_EXT 0x8CA8 @@ -7816,6 +7857,86 @@ GLAPI void APIENTRY glImportMemoryWin32NameEXT (GLuint memory, GLuint64 size, GL #endif #endif /* GL_EXT_memory_object_win32 */ +#ifndef GL_EXT_mesh_shader +#define GL_EXT_mesh_shader 1 +#define GL_MESH_SHADER_EXT 0x9559 +#define GL_TASK_SHADER_EXT 0x955A +#define GL_MAX_MESH_UNIFORM_BLOCKS_EXT 0x8E60 +#define GL_MAX_MESH_TEXTURE_IMAGE_UNITS_EXT 0x8E61 +#define GL_MAX_MESH_IMAGE_UNIFORMS_EXT 0x8E62 +#define GL_MAX_MESH_UNIFORM_COMPONENTS_EXT 0x8E63 +#define GL_MAX_MESH_ATOMIC_COUNTER_BUFFERS_EXT 0x8E64 +#define GL_MAX_MESH_ATOMIC_COUNTERS_EXT 0x8E65 +#define GL_MAX_MESH_SHADER_STORAGE_BLOCKS_EXT 0x8E66 +#define GL_MAX_COMBINED_MESH_UNIFORM_COMPONENTS_EXT 0x8E67 +#define GL_MAX_TASK_UNIFORM_BLOCKS_EXT 0x8E68 +#define GL_MAX_TASK_TEXTURE_IMAGE_UNITS_EXT 0x8E69 +#define GL_MAX_TASK_IMAGE_UNIFORMS_EXT 0x8E6A +#define GL_MAX_TASK_UNIFORM_COMPONENTS_EXT 0x8E6B +#define GL_MAX_TASK_ATOMIC_COUNTER_BUFFERS_EXT 0x8E6C +#define GL_MAX_TASK_ATOMIC_COUNTERS_EXT 0x8E6D +#define GL_MAX_TASK_SHADER_STORAGE_BLOCKS_EXT 0x8E6E +#define GL_MAX_COMBINED_TASK_UNIFORM_COMPONENTS_EXT 0x8E6F +#define GL_MAX_TASK_WORK_GROUP_TOTAL_COUNT_EXT 0x9740 +#define GL_MAX_MESH_WORK_GROUP_TOTAL_COUNT_EXT 0x9741 +#define GL_MAX_MESH_WORK_GROUP_INVOCATIONS_EXT 0x9757 +#define GL_MAX_TASK_WORK_GROUP_INVOCATIONS_EXT 0x9759 +#define GL_MAX_TASK_PAYLOAD_SIZE_EXT 0x9742 +#define GL_MAX_TASK_SHARED_MEMORY_SIZE_EXT 0x9743 +#define GL_MAX_MESH_SHARED_MEMORY_SIZE_EXT 0x9744 +#define GL_MAX_TASK_PAYLOAD_AND_SHARED_MEMORY_SIZE_EXT 0x9745 +#define GL_MAX_MESH_PAYLOAD_AND_SHARED_MEMORY_SIZE_EXT 0x9746 +#define GL_MAX_MESH_OUTPUT_MEMORY_SIZE_EXT 0x9747 +#define GL_MAX_MESH_PAYLOAD_AND_OUTPUT_MEMORY_SIZE_EXT 0x9748 +#define GL_MAX_MESH_OUTPUT_VERTICES_EXT 0x9538 +#define GL_MAX_MESH_OUTPUT_PRIMITIVES_EXT 0x9756 +#define GL_MAX_MESH_OUTPUT_COMPONENTS_EXT 0x9749 +#define GL_MAX_MESH_OUTPUT_LAYERS_EXT 0x974A +#define GL_MAX_MESH_MULTIVIEW_VIEW_COUNT_EXT 0x9557 +#define GL_MESH_OUTPUT_PER_VERTEX_GRANULARITY_EXT 0x92DF +#define GL_MESH_OUTPUT_PER_PRIMITIVE_GRANULARITY_EXT 0x9543 +#define GL_MAX_PREFERRED_TASK_WORK_GROUP_INVOCATIONS_EXT 0x974B +#define GL_MAX_PREFERRED_MESH_WORK_GROUP_INVOCATIONS_EXT 0x974C +#define GL_MESH_PREFERS_LOCAL_INVOCATION_VERTEX_OUTPUT_EXT 0x974D +#define GL_MESH_PREFERS_LOCAL_INVOCATION_PRIMITIVE_OUTPUT_EXT 0x974E +#define GL_MESH_PREFERS_COMPACT_VERTEX_OUTPUT_EXT 0x974F +#define GL_MESH_PREFERS_COMPACT_PRIMITIVE_OUTPUT_EXT 0x9750 +#define GL_MAX_TASK_WORK_GROUP_COUNT_EXT 0x9751 +#define GL_MAX_MESH_WORK_GROUP_COUNT_EXT 0x9752 +#define GL_MAX_MESH_WORK_GROUP_SIZE_EXT 0x9758 +#define GL_MAX_TASK_WORK_GROUP_SIZE_EXT 0x975A +#define GL_MESH_WORK_GROUP_SIZE_EXT 0x953E +#define GL_TASK_WORK_GROUP_SIZE_EXT 0x953F +#define GL_MESH_VERTICES_OUT_EXT 0x9579 +#define GL_MESH_PRIMITIVES_OUT_EXT 0x957A +#define GL_MESH_OUTPUT_TYPE_EXT 0x957B +#define GL_UNIFORM_BLOCK_REFERENCED_BY_MESH_SHADER_EXT 0x959C +#define GL_UNIFORM_BLOCK_REFERENCED_BY_TASK_SHADER_EXT 0x959D +#define GL_REFERENCED_BY_MESH_SHADER_EXT 0x95A0 +#define GL_REFERENCED_BY_TASK_SHADER_EXT 0x95A1 +#define GL_TASK_SHADER_INVOCATIONS_EXT 0x9753 +#define GL_MESH_SHADER_INVOCATIONS_EXT 0x9754 +#define GL_MESH_PRIMITIVES_GENERATED_EXT 0x9755 +#define GL_MESH_SHADER_BIT_EXT 0x00000040 +#define GL_TASK_SHADER_BIT_EXT 0x00000080 +#define GL_MESH_SUBROUTINE_EXT 0x957C +#define GL_TASK_SUBROUTINE_EXT 0x957D +#define GL_MESH_SUBROUTINE_UNIFORM_EXT 0x957E +#define GL_TASK_SUBROUTINE_UNIFORM_EXT 0x957F +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_MESH_SHADER_EXT 0x959E +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TASK_SHADER_EXT 0x959F +typedef void (APIENTRYP PFNGLDRAWMESHTASKSEXTPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +typedef void (APIENTRYP PFNGLDRAWMESHTASKSINDIRECTEXTPROC) (GLintptr indirect); +typedef void (APIENTRYP PFNGLMULTIDRAWMESHTASKSINDIRECTEXTPROC) (GLintptr indirect, GLsizei drawcount, GLsizei stride); +typedef void (APIENTRYP PFNGLMULTIDRAWMESHTASKSINDIRECTCOUNTEXTPROC) (GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDrawMeshTasksEXT (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +GLAPI void APIENTRY glDrawMeshTasksIndirectEXT (GLintptr indirect); +GLAPI void APIENTRY glMultiDrawMeshTasksIndirectEXT (GLintptr indirect, GLsizei drawcount, GLsizei stride); +GLAPI void APIENTRY glMultiDrawMeshTasksIndirectCountEXT (GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +#endif +#endif /* GL_EXT_mesh_shader */ + #ifndef GL_EXT_misc_attribute #define GL_EXT_misc_attribute 1 #endif /* GL_EXT_misc_attribute */ diff --git a/3rdparty/OpenAL/openal-soft b/3rdparty/OpenAL/openal-soft index 75c0059630..c41d64c6a3 160000 --- a/3rdparty/OpenAL/openal-soft +++ b/3rdparty/OpenAL/openal-soft @@ -1 +1 @@ -Subproject commit 75c00596307bf05ba7bbc8c7022836bf52f17477 +Subproject commit c41d64c6a35f6174bf4a27010aeac52a8d3bb2c6 diff --git a/3rdparty/SoundTouch/soundtouch b/3rdparty/SoundTouch/soundtouch index 3982730833..a0fba77b6f 160000 --- a/3rdparty/SoundTouch/soundtouch +++ b/3rdparty/SoundTouch/soundtouch @@ -1 +1 @@ -Subproject commit 3982730833b6daefe77dcfb32b5c282851640c17 +Subproject commit a0fba77b6f9cfbdb71f8bbec58b6ac4e5e3b1097 diff --git a/3rdparty/cubeb/cubeb b/3rdparty/cubeb/cubeb index e495bee4cd..484857522c 160000 --- a/3rdparty/cubeb/cubeb +++ b/3rdparty/cubeb/cubeb @@ -1 +1 @@ -Subproject commit e495bee4cd630c9f99907a764e16edba37a4b564 +Subproject commit 484857522c73318c06f18ba0a3e17525fa98c608 diff --git a/3rdparty/curl/CMakeLists.txt b/3rdparty/curl/CMakeLists.txt index b20763af65..2b725169f9 100644 --- a/3rdparty/curl/CMakeLists.txt +++ b/3rdparty/curl/CMakeLists.txt @@ -18,9 +18,6 @@ else() set(USE_LIBIDN2 OFF CACHE BOOL "Use libidn2 for IDN support") # Disabled because MacOS CI doesn't work otherwise set(CURL_CA_PATH "none" CACHE STRING "Location of default CA path. Set 'none' to disable or 'auto' for auto-detection. Defaults to 'auto'.") option(CURL_DISABLE_INSTALL "Disable installation targets" ON) - if(USE_MSVC_STATIC_CRT) - set(CURL_STATIC_CRT ON CACHE BOOL "Use static crt to build curl") - endif() if(WIN32) set(ENABLE_UNICODE ON CACHE BOOL "enable Unicode") endif() diff --git a/3rdparty/curl/curl b/3rdparty/curl/curl index 400fffa90f..8c908d2d0a 160000 --- a/3rdparty/curl/curl +++ b/3rdparty/curl/curl @@ -1 +1 @@ -Subproject commit 400fffa90f30c7a2dc762fa33009d24851bd2016 +Subproject commit 8c908d2d0a6d32abdedda2c52e90bd56ec76c24d diff --git a/3rdparty/curl/libcurl.vcxproj b/3rdparty/curl/libcurl.vcxproj index dae28be346..4db28782a3 100644 --- a/3rdparty/curl/libcurl.vcxproj +++ b/3rdparty/curl/libcurl.vcxproj @@ -79,12 +79,16 @@ + + + + @@ -106,6 +110,7 @@ + @@ -169,14 +174,13 @@ - + - @@ -184,10 +188,8 @@ - - @@ -204,6 +206,7 @@ + @@ -224,13 +227,11 @@ - - @@ -272,6 +273,7 @@ + @@ -280,6 +282,9 @@ + + + @@ -300,9 +305,7 @@ - - @@ -312,6 +315,7 @@ + @@ -352,7 +356,6 @@ - @@ -367,7 +370,7 @@ - + @@ -376,7 +379,6 @@ - @@ -384,7 +386,6 @@ - @@ -405,6 +406,7 @@ + @@ -418,12 +420,10 @@ - - diff --git a/3rdparty/curl/libcurl.vcxproj.filters b/3rdparty/curl/libcurl.vcxproj.filters index 17f760c54b..d38316e767 100644 --- a/3rdparty/curl/libcurl.vcxproj.filters +++ b/3rdparty/curl/libcurl.vcxproj.filters @@ -204,9 +204,6 @@ Source Files - - Source Files - Source Files @@ -222,9 +219,6 @@ Source Files - - Source Files - Source Files @@ -246,18 +240,12 @@ Source Files - - Source Files - Source Files Source Files - - Source Files - Source Files @@ -318,9 +306,6 @@ Source Files - - Source Files - Source Files @@ -333,9 +318,6 @@ Source Files - - Source Files - Source Files @@ -549,6 +531,27 @@ Source Files + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + @@ -653,9 +656,6 @@ Header Files - - Header Files - Header Files @@ -758,9 +758,6 @@ Header Files - - Header Files - Header Files @@ -791,9 +788,6 @@ Header Files - - Header Files - Header Files @@ -812,9 +806,6 @@ Header Files - - Header Files - Header Files @@ -836,9 +827,6 @@ Header Files - - Header Files - Header Files @@ -887,9 +875,6 @@ Header Files - - Header Files - Header Files @@ -899,9 +884,6 @@ Header Files - - Header Files - Header Files @@ -1103,9 +1085,6 @@ Header Files - - Header Files - Header Files @@ -1121,6 +1100,27 @@ Header Files + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + diff --git a/3rdparty/libpng/libpng b/3rdparty/libpng/libpng index 02f2b4f469..95ab3fdca8 160000 --- a/3rdparty/libpng/libpng +++ b/3rdparty/libpng/libpng @@ -1 +1 @@ -Subproject commit 02f2b4f4699f0ef9111a6534f093b53732df4452 +Subproject commit 95ab3fdca83ea294efd3b092e9a53c5a39886444 diff --git a/3rdparty/libsdl-org/SDL b/3rdparty/libsdl-org/SDL index a962f40bbb..5848e584a1 160000 --- a/3rdparty/libsdl-org/SDL +++ b/3rdparty/libsdl-org/SDL @@ -1 +1 @@ -Subproject commit a962f40bbba175e9716557a25d5d7965f134a3d3 +Subproject commit 5848e584a1b606de26e3dbd1c7e4ecbc34f807a6 diff --git a/3rdparty/protobuf/CMakeLists.txt b/3rdparty/protobuf/CMakeLists.txt index 274e6110af..e1f82f146f 100644 --- a/3rdparty/protobuf/CMakeLists.txt +++ b/3rdparty/protobuf/CMakeLists.txt @@ -2,8 +2,8 @@ add_library(3rdparty_protobuf INTERFACE) if (USE_SYSTEM_PROTOBUF) pkg_check_modules(PROTOBUF REQUIRED IMPORTED_TARGET protobuf>=33.0.0) target_link_libraries(3rdparty_protobuf INTERFACE PkgConfig::PROTOBUF) - set(PROTOBUF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../rpcs3/Emu/NP/generated/") - execute_process(COMMAND protoc --cpp_out="${PROTOBUF_DIR}" --proto_path="${PROTOBUF_DIR}" np2_structs.proto RESULT_VARIABLE PROTOBUF_CMD_ERROR) + set(PROTOBUF_DIR "${CMAKE_SOURCE_DIR}/rpcs3/Emu/NP/generated") + execute_process(COMMAND protoc --cpp_out=${PROTOBUF_DIR} --proto_path=${PROTOBUF_DIR} np2_structs.proto RESULT_VARIABLE PROTOBUF_CMD_ERROR) if(PROTOBUF_CMD_ERROR AND NOT PROTOBUF_CMD_ERROR EQUAL 0) message(FATAL_ERROR "protoc failed to regenerate protobuf files.") endif() @@ -20,6 +20,8 @@ else() option(protobuf_DISABLE_RTTI "Remove runtime type information in the binaries" OFF) option(protobuf_FORCE_FETCH_DEPENDENCIES "Force all dependencies to be downloaded from GitHub. Local installations will be ignored." OFF) option(protobuf_LOCAL_DEPENDENCIES_ONLY "Prevent downloading any dependencies from GitHub. If this option is set, the dependency must be available locally as an installed package." OFF) + option(protobuf_BUILD_SHARED_LIBS "Build Shared Libraries" OFF) + option(protobuf_MSVC_STATIC_RUNTIME "Link static runtime libraries" OFF) add_subdirectory(protobuf EXCLUDE_FROM_ALL) target_include_directories(3rdparty_protobuf SYSTEM INTERFACE protobuf/src) diff --git a/3rdparty/qt6.cmake b/3rdparty/qt6.cmake index e15e0abdcb..969967fa40 100644 --- a/3rdparty/qt6.cmake +++ b/3rdparty/qt6.cmake @@ -6,15 +6,22 @@ find_package(Qt6 ${QT_MIN_VER} CONFIG COMPONENTS Widgets Concurrent Multimedia M if(WIN32) target_link_libraries(3rdparty_qt6 INTERFACE Qt6::Widgets Qt6::Concurrent Qt6::Multimedia Qt6::MultimediaWidgets Qt6::Svg Qt6::SvgWidgets) else() - set(QT_NO_PRIVATE_MODULE_WARNING ON) - find_package(Qt6 ${QT_MIN_VER} COMPONENTS DBus Gui GuiPrivate) + find_package(Qt6 ${QT_MIN_VER} COMPONENTS DBus Gui) + if(Qt6_VERSION VERSION_GREATER_EQUAL "6.10.0") + set(QT_NO_PRIVATE_MODULE_WARNING ON) + find_package(Qt6 ${QT_MIN_VER} COMPONENTS GuiPrivate) + endif() if(Qt6DBus_FOUND) target_link_libraries(3rdparty_qt6 INTERFACE Qt6::Widgets Qt6::DBus Qt6::Concurrent Qt6::Multimedia Qt6::MultimediaWidgets Qt6::Svg Qt6::SvgWidgets) target_compile_definitions(3rdparty_qt6 INTERFACE -DHAVE_QTDBUS) else() target_link_libraries(3rdparty_qt6 INTERFACE Qt6::Widgets Qt6::Concurrent Qt6::Multimedia Qt6::MultimediaWidgets Qt6::Svg Qt6::SvgWidgets) endif() - target_link_libraries(3rdparty_qt6 INTERFACE Qt6::GuiPrivate) + if(Qt6_VERSION VERSION_GREATER_EQUAL "6.10.0") + target_link_libraries(3rdparty_qt6 INTERFACE Qt6::GuiPrivate) + else() + target_include_directories(3rdparty_qt6 INTERFACE ${Qt6Gui_PRIVATE_INCLUDE_DIRS}) + endif() endif() if(Qt6Widgets_FOUND) diff --git a/3rdparty/wolfssl/wolfssl b/3rdparty/wolfssl/wolfssl index b077c81eb6..1d363f3adc 160000 --- a/3rdparty/wolfssl/wolfssl +++ b/3rdparty/wolfssl/wolfssl @@ -1 +1 @@ -Subproject commit b077c81eb635392e694ccedbab8b644297ec0285 +Subproject commit 1d363f3adceba9d1478230ede476a37b0dcdef24 diff --git a/3rdparty/yaml-cpp/yaml-cpp b/3rdparty/yaml-cpp/yaml-cpp index 456c68f452..51a5d623e3 160000 --- a/3rdparty/yaml-cpp/yaml-cpp +++ b/3rdparty/yaml-cpp/yaml-cpp @@ -1 +1 @@ -Subproject commit 456c68f452da09d8ca84b375faa2b1397713eaba +Subproject commit 51a5d623e3fde1f58829a56ba910f1cb33596222 diff --git a/3rdparty/yaml-cpp/yaml-cpp.vcxproj b/3rdparty/yaml-cpp/yaml-cpp.vcxproj index b1b732727c..4d10c90d7e 100644 --- a/3rdparty/yaml-cpp/yaml-cpp.vcxproj +++ b/3rdparty/yaml-cpp/yaml-cpp.vcxproj @@ -76,6 +76,7 @@ + diff --git a/3rdparty/yaml-cpp/yaml-cpp.vcxproj.filters b/3rdparty/yaml-cpp/yaml-cpp.vcxproj.filters index 60c75fa23e..f4b553ad72 100644 --- a/3rdparty/yaml-cpp/yaml-cpp.vcxproj.filters +++ b/3rdparty/yaml-cpp/yaml-cpp.vcxproj.filters @@ -94,5 +94,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/3rdparty/zlib/CMakeLists.txt b/3rdparty/zlib/CMakeLists.txt index 55d7353acf..47645d290c 100644 --- a/3rdparty/zlib/CMakeLists.txt +++ b/3rdparty/zlib/CMakeLists.txt @@ -6,7 +6,8 @@ if (USE_SYSTEM_ZLIB) target_link_libraries(3rdparty_zlib INTERFACE ZLIB::ZLIB) target_compile_definitions(3rdparty_zlib INTERFACE -DZLIB_CONST=1) else() - option(ZLIB_BUILD_EXAMPLES "Enable Zlib Examples" OFF) + option(ZLIB_BUILD_TESTING "Enable Zlib Examples as tests" OFF) + option(ZLIB_BUILD_SHARED "Enable building zlib shared library" OFF) message(STATUS "RPCS3: Using builtin ZLIB") set(SKIP_INSTALL_ALL ON) add_subdirectory(zlib EXCLUDE_FROM_ALL) diff --git a/3rdparty/zlib/zlib b/3rdparty/zlib/zlib index 51b7f2abda..da607da739 160000 --- a/3rdparty/zlib/zlib +++ b/3rdparty/zlib/zlib @@ -1 +1 @@ -Subproject commit 51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf +Subproject commit da607da739fa6047df13e66a2af6b8bec7c2a498 diff --git a/BUILDING.md b/BUILDING.md index b31b3fee4e..c1774908fd 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -20,26 +20,26 @@ The following tools are required to build RPCS3 on Windows 10 or later: with standalone **CMake** tool. - [Python 3.6+](https://www.python.org/downloads/) (add to PATH) -- [Qt 6.10.2](https://www.qt.io/download-qt-installer) In case you can't download from the official installer, you can use [Another Qt installer](https://github.com/miurahr/aqtinstall) (In that case you will need to manually add the "qtmultimedia" module when installing Qt) +- [Qt 6.11.0](https://www.qt.io/download-qt-installer) In case you can't download from the official installer, you can use [Another Qt installer](https://github.com/miurahr/aqtinstall) (In that case you will need to manually add the "qtmultimedia" module when installing Qt) - [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (see "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/windows/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0. The `sln` solution available only on **Visual Studio** is the preferred building solution. It easily allows to build the **RPCS3** application in `Release` and `Debug` mode. In order to build **RPCS3** with the `sln` solution (with **Visual Studio**), **Qt** libs need to be detected. To detect the libs: -- add and set the `QTDIR` environment variable, e.g. `\6.10.2\msvc2022_64\` +- add and set the `QTDIR` environment variable, e.g. `\6.11.0\msvc2022_64\` - or use the [Visual Studio Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.QtVisualStudioTools2022) **NOTE:** If you have issues with the **Visual Studio Qt Plugin**, you may want to uninstall it and install the [Legacy Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.LEGACYQtVisualStudioTools2022) instead. In order to build **RPCS3** with the `CMake` solution (with both **Visual Studio** and standalone **CMake** tool): -- add and set the `Qt6_ROOT` environment variable to the **Qt** libs path, e.g. `\6.10.2\msvc2022_64\` +- add and set the `Qt6_ROOT` environment variable to the **Qt** libs path, e.g. `\6.11.0\msvc2022_64\` ### Linux These are the essentials tools to build RPCS3 on Linux. Some of them can be installed through your favorite package manager: - Clang 17+ or GCC 13+ - [CMake 3.28.0+](https://www.cmake.org/download/) -- [Qt 6.10.2](https://www.qt.io/download-qt-installer) +- [Qt 6.11.0](https://www.qt.io/download-qt-installer) - [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0. - [SDL3](https://github.com/libsdl-org/SDL/releases) (for the FAudio backend) @@ -95,7 +95,7 @@ sudo apt-get install cmake #### Fedora - sudo dnf install alsa-lib-devel cmake ninja-build glew glew-devel libatomic libevdev-devel libudev-devel openal-devel qt6-qtbase-devel qt6-qtbase-private-devel vulkan-devel pipewire-jack-audio-connection-kit-devel qt6-qtmultimedia-devel qt6-qtsvg-devel llvm-devel + sudo dnf install alsa-lib-devel cmake ninja-build glew glew-devel libatomic libevdev-devel libudev-devel openal-soft-devel qt6-qtbase-devel qt6-qtbase-private-devel vulkan-devel pipewire-jack-audio-connection-kit-devel qt6-qtmultimedia-devel qt6-qtsvg-devel llvm-devel libcurl-devel #### OpenSUSE @@ -123,7 +123,7 @@ Start **Visual Studio**, click on `Open a project or solution` and select the `r ##### Configuring the Qt Plugin (if used) 1) go to `Extensions->Qt VS Tools->Qt Versions` -2) add the path to your Qt installation with compiler e.g. `\6.10.2\msvc2022_64`, version will fill in automatically +2) add the path to your Qt installation with compiler e.g. `\6.11.0\msvc2022_64`, version will fill in automatically 3) go to `Extensions->Qt VS Tools->Options->Legacy Project Format`. (Only available in the **Legacy Qt Plugin**) 4) set `Build: Run pre-build setup` to `true`. (Only available in the **Legacy Qt Plugin**) diff --git a/CMakeLists.txt b/CMakeLists.txt index 65e415bdb7..217c40f341 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,12 +13,12 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11) - message(FATAL_ERROR "RPCS3 requires at least gcc-11.") + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13) + message(FATAL_ERROR "RPCS3 requires at least gcc-13.") endif() elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.0) - message(FATAL_ERROR "RPCS3 requires at least clang-12.0.") + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0) + message(FATAL_ERROR "RPCS3 requires at least clang-19.0.") endif() endif() @@ -37,7 +37,7 @@ option(USE_LIBEVDEV "libevdev-based joystick support" ON) option(USE_DISCORD_RPC "Discord rich presence integration" OFF) option(USE_VULKAN "Vulkan render backend" ON) option(USE_PRECOMPILED_HEADERS "Use precompiled headers" OFF) -option(USE_SDL "Enables SDL input handler" OFF) +option(USE_SDL "Enables SDL input handler" ON) option(USE_SYSTEM_CUBEB "Prefer system cubeb instead of the builtin one" OFF) option(USE_SYSTEM_CURL "Prefer system Curl instead of the prebuild one" ON) option(USE_SYSTEM_FAUDIO "Prefer system FAudio instead of the builtin one" OFF) @@ -86,50 +86,7 @@ if(CMAKE_BUILD_TYPE MATCHES "Debug" AND NOT MSVC) endif() if(MSVC) - option(USE_MSVC_STATIC_CRT "Use static MSVC C runtime" OFF) - - # TODO(cjj19970505@live.cn) - # DiscordRPC binary in 3rdparty is compiled /MT - # So theoretically we should enable DiscordRPC in Release and static CRT build - # since we might encounter some rumtime issues when more than one CRT version are presented. - # https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=msvc-160#what-problems-exist-if-an-application-uses-more-than-one-crt-version - # Add other DiscordRPC binaries(compiled with /MTd, /MD, /MDd) or compile it from source may address this issue. - if(NOT IS_MULTI_CONFIG) - if(NOT(CMAKE_BUILD_TYPE MATCHES "Release" AND USE_MSVC_STATIC_CRT)) - set(USE_DISCORD_RPC OFF CACHE BOOL "Discord RPC is only available in Release and static CRT build." FORCE) - endif() - endif() - - if(USE_MSVC_STATIC_CRT) - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") - else() - # though doc ( https://cmake.org/cmake/help/latest/variable/CMAKE_MSVC_RUNTIME_LIBRARY.html ) - # says if that property is not set then CMake uses the default value MultiThreaded$<$:Debug>DLL - # to select a MSVC runtime library. - # But yaml-cpp set /MT(d) if CMAKE_MSVC_RUNTIME_LIBRARY is undefined - # So we have to define it explicitly - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") - endif() - - # TODO(cjj19970505@live.cn) - # offical QT uses dynamic CRT. - # When building our lib with static CRT and debug build type - # and linking with Qt with dynamic CRT and debug build, - # error is encountered in runtime (which is expected). - # But building our lib with static CRT and release build type, - # and linking with Qt with dynamic CRT and release build seems to be working, - # which is the same config with VS solution. - # (though technically it might still have some hidden errors). - # So we allow static CRT in both relase and debug build, but prompt warning in debug build. - # For more info: - # https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=msvc-160#what-problems-exist-if-an-application-uses-more-than-one-crt-version - # https://wiki.qt.io/Technical_FAQ#Why_does_a_statically_built_Qt_use_the_dynamic_Visual_Studio_runtime_libraries_.3F_Do_I_need_to_deploy_those_with_my_application_.3F - if(USE_MSVC_STATIC_CRT) - if(IS_MULTI_CONFIG OR CMAKE_BUILD_TYPE MATCHES "Debug") - message(AUTHOR_WARNING "Debug build currently can not work with static CRT.") - endif() - endif() - add_compile_options(/MP) + add_compile_options("$<$:/MP>") endif() if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8) @@ -142,7 +99,7 @@ if(APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64") endif() if(MSVC) - add_compile_options(/wd4530 /utf-8) # C++ exception handler used, but unwind semantics are not enabled + add_compile_options("$<$:/wd4530;/utf-8>") # C++ exception handler used, but unwind semantics are not enabled endif() add_subdirectory(3rdparty) @@ -161,10 +118,6 @@ if (NOT FOUND_LTO EQUAL -1) message(FATAL_ERROR "RPCS3 doesn't support building with LTO, use -DDISABLE_LTO=TRUE to force-disable it") endif() -if(NOT WIN32) - add_compile_options(-pthread) -endif() - ## Look for Gamemode if its installed on Linux if(LINUX) ## User chooses whether to Enable GameMode features or not diff --git a/Utilities/Config.cpp b/Utilities/Config.cpp index cee928def7..f242bd6172 100644 --- a/Utilities/Config.cpp +++ b/Utilities/Config.cpp @@ -40,7 +40,7 @@ namespace cfg owner->m_nodes.emplace_back(this); } - bool _base::from_string(std::string_view, bool) + bool _base::from_string(std::string_view /*value*/, bool /*dynamic*/) { cfg_log.fatal("cfg::_base::from_string() purecall"); return false; @@ -68,7 +68,7 @@ namespace cfg // Incrementally load config entries from YAML::Node. // The config value is preserved if the corresponding YAML node doesn't exist. - static void decode(const YAML::Node& data, class _base& rhs, bool dynamic = false); + [[nodiscard]] static bool decode(const YAML::Node& data, class _base& rhs, bool dynamic, bool strict); } std::vector cfg::make_int_range(s64 min, s64 max) @@ -76,11 +76,11 @@ std::vector cfg::make_int_range(s64 min, s64 max) return {std::to_string(min), std::to_string(max)}; } -bool try_to_int64(s64* out, std::string_view value, s64 min, s64 max) +bool try_to_int64(s64* out, std::string_view value, s64 min, s64 max, std::string_view name) { if (value.empty()) { - if (out) cfg_log.error("cfg::try_to_int64(): called with an empty string"); + if (out) cfg_log.error("cfg::try_to_int64('%s'): called with an empty string", name); return false; } @@ -107,7 +107,7 @@ bool try_to_int64(s64* out, std::string_view value, s64 min, s64 max) if (ret.ec != std::errc() || ret.ptr != end || (start[0] == '-' && sign < 0)) { - if (out) cfg_log.error("cfg::try_to_int64('%s'): invalid integer", value); + if (out) cfg_log.error("cfg::try_to_int64('%s', '%s'): invalid integer", value, name); return false; } @@ -115,7 +115,7 @@ bool try_to_int64(s64* out, std::string_view value, s64 min, s64 max) if (result < min || result > max) { - if (out) cfg_log.error("cfg::try_to_int64('%s'): out of bounds (val=%d, min=%d, max=%d)", value, result, min, max); + if (out) cfg_log.error("cfg::try_to_int64('%s', '%s'): out of bounds (val=%d, min=%d, max=%d)", value, name, result, min, max); return false; } @@ -128,11 +128,11 @@ std::vector cfg::make_uint_range(u64 min, u64 max) return {std::to_string(min), std::to_string(max)}; } -bool try_to_uint64(u64* out, std::string_view value, u64 min, u64 max) +bool try_to_uint64(u64* out, std::string_view value, u64 min, u64 max, std::string_view name) { if (value.empty()) { - if (out) cfg_log.error("cfg::try_to_uint64(): called with an empty string"); + if (out) cfg_log.error("cfg::try_to_uint64('%s'): called with an empty string", name); return false; } @@ -152,13 +152,13 @@ bool try_to_uint64(u64* out, std::string_view value, u64 min, u64 max) if (ret.ec != std::errc() || ret.ptr != end) { - if (out) cfg_log.error("cfg::try_to_uint64('%s'): invalid integer", value); + if (out) cfg_log.error("cfg::try_to_uint64('%s', '%s'): invalid integer", value, name); return false; } if (result < min || result > max) { - if (out) cfg_log.error("cfg::try_to_uint64('%s'): out of bounds (val=%u, min=%u, max=%u)", value, result, min, max); + if (out) cfg_log.error("cfg::try_to_uint64('%s', '%s'): out of bounds (val=%u, min=%u, max=%u)", value, name, result, min, max); return false; } @@ -166,11 +166,11 @@ bool try_to_uint64(u64* out, std::string_view value, u64 min, u64 max) return true; } -bool try_to_uint128(u128* out, std::string_view value) +bool try_to_uint128(u128* out, std::string_view value, std::string_view name) { if (value.empty()) { - if (out) cfg_log.error("cfg::try_to_uint128(): called with an empty string"); + if (out) cfg_log.error("cfg::try_to_uint128('%s'): called with an empty string", name); return false; } @@ -193,7 +193,7 @@ bool try_to_uint128(u128* out, std::string_view value) if (ret.ec != std::errc() || ret.ptr != end) { - if (out) cfg_log.error("cfg::try_to_uint128('%s'): invalid integer", value); + if (out) cfg_log.error("cfg::try_to_uint128('%s', '%s'): invalid integer", value, name); return false; } @@ -207,7 +207,7 @@ bool try_to_uint128(u128* out, std::string_view value) if (ret.ec != std::errc() || ret.ptr != start_low64) { - if (out) cfg_log.error("cfg::try_to_uint128('%s'): invalid integer", value); + if (out) cfg_log.error("cfg::try_to_uint128('%s', '%s'): invalid integer", value, name); return false; } @@ -220,11 +220,11 @@ std::vector cfg::make_float_range(f64 min, f64 max) return {std::to_string(min), std::to_string(max)}; } -bool try_to_float(f64* out, std::string_view value, f64 min, f64 max) +bool try_to_float(f64* out, std::string_view value, f64 min, f64 max, std::string_view name) { if (value.empty()) { - if (out) cfg_log.error("cfg::try_to_float(): called with an empty string"); + if (out) cfg_log.error("cfg::try_to_float('%s'): called with an empty string", name); return false; } @@ -237,13 +237,13 @@ bool try_to_float(f64* out, std::string_view value, f64 min, f64 max) if (end_check != str.data() + str.size()) { - if (out) cfg_log.error("cfg::try_to_float('%s'): invalid float", value); + if (out) cfg_log.error("cfg::try_to_float('%s', '%s'): invalid float", value, name); return false; } if (result < min || result > max) { - if (out) cfg_log.error("cfg::try_to_float('%s'): out of bounds (val=%f, min=%f, max=%f)", value, result, min, max); + if (out) cfg_log.error("cfg::try_to_float('%s', '%s'): out of bounds (val=%f, min=%f, max=%f)", value, name, result, min, max); return false; } @@ -251,7 +251,7 @@ bool try_to_float(f64* out, std::string_view value, f64 min, f64 max) return true; } -bool try_to_string(std::string* out, const f64& value) +bool try_to_string(std::string* out, f64 value, std::string_view name) { #ifdef __APPLE__ if (out) *out = std::to_string(value); @@ -266,13 +266,13 @@ bool try_to_string(std::string* out, const f64& value) } else { - if (out) cfg_log.error("cfg::try_to_string(): could not convert value '%f' to string. error='%s'", value, std::make_error_code(ec).message()); + if (out) cfg_log.error("cfg::try_to_string('%s'): could not convert value '%f' to string. error='%s'", name, value, std::make_error_code(ec).message()); return false; } #endif } -bool cfg::try_to_enum_value(u64* out, decltype(&fmt_class_string::format) func, std::string_view value) +bool cfg::try_to_enum_value(u64* out, decltype(&fmt_class_string::format) func, std::string_view value, std::string_view name) { u64 max = umax; @@ -313,13 +313,13 @@ bool cfg::try_to_enum_value(u64* out, decltype(&fmt_class_string::format) f if (ret.ec != std::errc() || ret.ptr != end) { - if (out) cfg_log.error("cfg::try_to_enum_value('%s'): invalid enum or integer", value); + if (out) cfg_log.error("cfg::try_to_enum_value('%s', '%s'): invalid enum or integer", value, name); return false; } if (result > max) { - if (out) cfg_log.error("cfg::try_to_enum_value('%s'): out of bounds(val=%u, min=0, max=%u)", value, result, max); + if (out) cfg_log.error("cfg::try_to_enum_value('%s', '%s'): out of bounds(val=%u, min=0, max=%u)", value, name, result, max); return false; } @@ -362,6 +362,27 @@ std::vector cfg::try_to_enum_list(decltype(&fmt_class_string:: return result; } +size_t cfg::try_to_enum_size(decltype(&fmt_class_string::format) func) +{ + size_t result = 0; + for (u64 i = 0;; i++) + { + std::string var; + func(var, i); + + std::string hex; + fmt_class_string::format(hex, i); + if (var == hex) + { + break; + } + + result++; + } + + return result; +} + void cfg::encode(YAML::Emitter& out, const cfg::_base& rhs) { switch (rhs.get_type()) @@ -416,7 +437,7 @@ void cfg::encode(YAML::Emitter& out, const cfg::_base& rhs) out << YAML::BeginMap; for (const auto& np : static_cast(rhs).get_map()) { - if (np.second == logs::level::notice) continue; + if (np.second == logs::level::_default) continue; out << YAML::Key << np.first; out << YAML::Value << fmt::format("%s", np.second); } @@ -447,37 +468,50 @@ void cfg::encode(YAML::Emitter& out, const cfg::_base& rhs) } } -void cfg::decode(const YAML::Node& data, cfg::_base& rhs, bool dynamic) +bool cfg::decode(const YAML::Node& data, cfg::_base& rhs, bool dynamic, bool strict) { if (dynamic && !rhs.get_is_dynamic()) { - return; + return true; } switch (rhs.get_type()) { case type::node: { - if (data.IsScalar() || data.IsSequence()) + if (!data.IsMap()) { - return; // ??? + cfg_log.error("node node is not a map"); + return false; } + bool success = true; + for (const auto& pair : data) { if (!pair.first.IsScalar()) continue; // Find the key among existing nodes - for (const auto& node : static_cast(rhs).get_nodes()) + const auto& nodes = static_cast(rhs).get_nodes(); + const auto it = std::find_if(nodes.cbegin(), nodes.cend(), [&pair](const auto& node) { return ensure(node)->get_name() == pair.first.Scalar(); }); + + if (it == nodes.cend()) { - if (node->get_name() == pair.first.Scalar()) + if (strict) { - decode(pair.second, *node, dynamic); + cfg_log.error("Unknown key found: '%s'", pair.first.Scalar()); + success = false; } + continue; + } + + if (!decode(pair.second, *ensure(*it), dynamic, strict) && strict) + { + success = false; } } - break; + return success; } case type::set: { @@ -485,7 +519,10 @@ void cfg::decode(const YAML::Node& data, cfg::_base& rhs, bool dynamic) if (YAML::convert::decode(data, values)) { - rhs.from_list(std::move(values)); + if (!rhs.from_list(std::move(values)) && strict) + { + return false; + } } break; @@ -495,7 +532,8 @@ void cfg::decode(const YAML::Node& data, cfg::_base& rhs, bool dynamic) { if (!data.IsMap()) { - return; + cfg_log.error("map node is not a map"); + return false; } map_of_type values; @@ -512,22 +550,36 @@ void cfg::decode(const YAML::Node& data, cfg::_base& rhs, bool dynamic) } case type::log: { - if (data.IsScalar() || data.IsSequence()) + if (!data.IsMap()) { - return; // ??? + cfg_log.error("log node is not a map"); + return false; } map_of_type values; for (const auto& pair : data) { - if (!pair.first.IsScalar() || !pair.second.IsScalar()) continue; + if (!pair.first.IsScalar() || !pair.second.IsScalar()) + { + if (strict) + { + if (!pair.first.IsScalar()) + cfg_log.error("Key in map is not a scalar"); + else + cfg_log.error("Value in map is not a scalar. key='%s'", pair.first.Scalar()); + return false; + } + continue; + } u64 value; - if (cfg::try_to_enum_value(&value, &fmt_class_string::format, pair.second.Scalar())) + if (!cfg::try_to_enum_value(&value, &fmt_class_string::format, pair.second.Scalar(), pair.first.Scalar()) && strict) { - values.emplace(pair.first.Scalar(), static_cast(static_cast(value))); + return false; } + + values.emplace(pair.first.Scalar(), static_cast(static_cast(value))); } static_cast(rhs).set_map(std::move(values)); @@ -537,20 +589,43 @@ void cfg::decode(const YAML::Node& data, cfg::_base& rhs, bool dynamic) { if (!data.IsMap()) { - return; // ??? + cfg_log.error("device node is not a map"); + return false; } map_of_type values; for (const auto& pair : data) { - if (!pair.first.IsScalar() || !pair.second.IsMap()) continue; + if (!pair.first.IsScalar() || !pair.second.IsMap()) + { + if (strict) + { + if (!pair.first.IsScalar()) + cfg_log.error("Key in device map is not a scalar"); + else + cfg_log.error("Value in device map is not a map. key='%s'", pair.first.Scalar()); + return false; + } + continue; + } device_info info{}; for (const auto& key_value : pair.second) { - if (!key_value.first.IsScalar() || !key_value.second.IsScalar()) continue; + if (!key_value.first.IsScalar() || !key_value.second.IsScalar()) + { + if (strict) + { + if (!key_value.first.IsScalar()) + cfg_log.error("Key in device info map is not a scalar"); + else + cfg_log.error("Value in device map is not a scalar. key='%s'", key_value.first.Scalar()); + return false; + } + continue; + } if (key_value.first.Scalar() == "Path") info.path = key_value.second.Scalar(); @@ -577,12 +652,17 @@ void cfg::decode(const YAML::Node& data, cfg::_base& rhs, bool dynamic) if (YAML::convert::decode(data, value)) { - rhs.from_string(value, dynamic); + if (!rhs.from_string(value, dynamic) && strict) + { + return false; + } } break; // ??? } } + + return true; } std::string cfg::node::to_string() const @@ -599,8 +679,7 @@ bool cfg::node::from_string(std::string_view value, bool dynamic) if (error.empty()) { - cfg::decode(result, *this, dynamic); - return true; + return cfg::decode(result, *this, dynamic, false); } cfg_log.error("Failed to load node: %s", error); @@ -623,6 +702,19 @@ void cfg::node::restore_defaults() } } +bool cfg::node::validate(std::string_view value) +{ + auto [result, error] = yaml_load(std::string(value)); + + if (error.empty()) + { + return cfg::decode(result, *this, false, true); + } + + cfg_log.error("Failed to load node: %s", error); + return false; +} + std::string cfg::map_entry::get_value(std::string_view key) { if (auto it = m_map.find(key); it != m_map.end()) diff --git a/Utilities/Config.h b/Utilities/Config.h index 35dcddc1a6..4f8e578fde 100644 --- a/Utilities/Config.h +++ b/Utilities/Config.h @@ -25,11 +25,14 @@ namespace cfg std::vector make_float_range(f64 min, f64 max); // Internal hack - bool try_to_enum_value(u64* out, decltype(&fmt_class_string::format) func, std::string_view); + bool try_to_enum_value(u64* out, decltype(&fmt_class_string::format) func, std::string_view value, std::string_view name = {}); // Internal hack std::vector try_to_enum_list(decltype(&fmt_class_string::format) func); + // Internal hack + size_t try_to_enum_size(decltype(&fmt_class_string::format) func); + // Config tree entry type. enum class type : unsigned { @@ -107,7 +110,7 @@ namespace cfg } // Try to convert from string (optional) - virtual bool from_string(std::string_view, bool /*dynamic*/ = false); + virtual bool from_string(std::string_view value, bool dynamic = false); // Get string list (optional) virtual std::vector to_list() const @@ -158,6 +161,9 @@ namespace cfg // Restore default members void restore_defaults() override; + + // Try to convert from string and validate + bool validate(std::string_view value); }; class _bool final : public _base @@ -298,7 +304,7 @@ namespace cfg { u64 result; - if (try_to_enum_value(&result, &fmt_class_string::format, value)) + if (try_to_enum_value(&result, &fmt_class_string::format, value, m_name)) { // No narrowing check, it's hard to do right there m_value = static_cast(static_cast>(result)); @@ -312,6 +318,11 @@ namespace cfg { return try_to_enum_list(&fmt_class_string::format); } + + size_t size() const + { + return try_to_enum_size(&fmt_class_string::format); + } }; // Signed 32/64-bit integer entry with custom Min/Max range. @@ -374,7 +385,7 @@ namespace cfg bool from_string(std::string_view value, bool /*dynamic*/ = false) override { s64 result; - if (try_to_int64(&result, value, Min, Max)) + if (try_to_int64(&result, value, Min, Max, m_name)) { m_value = static_cast(result); return true; @@ -385,7 +396,7 @@ namespace cfg void set(const s64& value) { - ensure(value >= Min && value <= Max); + if (value < Min || value > Max) fmt::throw_exception("'%s': value %d out of bounds (min=%d, max=%d)", m_name, value, Min, Max); m_value = static_cast(value); } @@ -443,7 +454,7 @@ namespace cfg std::string to_string() const override { std::string result; - if (try_to_string(&result, m_value)) + if (try_to_string(&result, m_value, m_name)) { return result; } @@ -454,7 +465,7 @@ namespace cfg std::string def_to_string() const override { std::string result; - if (try_to_string(&result, def)) + if (try_to_string(&result, def, m_name)) { return result; } @@ -465,7 +476,7 @@ namespace cfg bool from_string(std::string_view value, bool /*dynamic*/ = false) override { f64 result; - if (try_to_float(&result, value, Min, Max)) + if (try_to_float(&result, value, Min, Max, m_name)) { m_value = static_cast(result); return true; @@ -476,7 +487,7 @@ namespace cfg void set(const f64& value) { - ensure(value >= Min && value <= Max); + if (value < Min || value > Max) fmt::throw_exception("'%s': value %d out of bounds (min=%d, max=%d)", m_name, value, Min, Max); m_value = static_cast(value); } @@ -552,7 +563,7 @@ namespace cfg bool from_string(std::string_view value, bool /*dynamic*/ = false) override { u64 result; - if (try_to_uint64(&result, value, Min, Max)) + if (try_to_uint64(&result, value, Min, Max, m_name)) { m_value = static_cast(result); return true; @@ -563,7 +574,7 @@ namespace cfg void set(const u64& value) { - ensure(value >= Min && value <= Max); + if (value < Min || value > Max) fmt::throw_exception("'%s': value %d out of bounds (min=%d, max=%d)", m_name, value, Min, Max); m_value = static_cast(value); } @@ -638,7 +649,7 @@ namespace cfg bool from_string(std::string_view value, bool /*dynamic*/ = false) override { u128 result; - if (try_to_uint128(&result, value)) + if (try_to_uint128(&result, value, m_name)) { m_value = result; return true; diff --git a/Utilities/File.cpp b/Utilities/File.cpp index 30e6414675..aff4537dea 100644 --- a/Utilities/File.cpp +++ b/Utilities/File.cpp @@ -117,6 +117,7 @@ static fs::error to_error(DWORD e) case ERROR_NEGATIVE_SEEK: return fs::error::inval; case ERROR_DIRECTORY: return fs::error::inval; case ERROR_INVALID_NAME: return fs::error::inval; + case ERROR_INVALID_FUNCTION: return fs::error::inval; case ERROR_SHARING_VIOLATION: return fs::error::acces; case ERROR_DIR_NOT_EMPTY: return fs::error::notempty; case ERROR_NOT_READY: return fs::error::noent; @@ -165,6 +166,7 @@ static fs::error to_error(int e) case ENOTEMPTY: return fs::error::notempty; case EROFS: return fs::error::readonly; case EISDIR: return fs::error::isdir; + case ENOTDIR: return fs::error::notdir; case ENOSPC: return fs::error::nospace; case EXDEV: return fs::error::xdev; default: return fs::error::unknown; @@ -398,12 +400,11 @@ namespace fs class windows_file final : public file_base { HANDLE m_handle; - atomic_t m_pos; + atomic_t m_pos {0}; public: windows_file(HANDLE handle) : m_handle(handle) - , m_pos(0) { } @@ -417,10 +418,10 @@ namespace fs stat_t get_stat() override { - FILE_BASIC_INFO basic_info; + FILE_BASIC_INFO basic_info {}; ensure(GetFileInformationByHandleEx(m_handle, FileBasicInfo, &basic_info, sizeof(FILE_BASIC_INFO))); // "file::stat" - stat_t info; + stat_t info {}; info.is_directory = (basic_info.FileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; info.is_writable = (basic_info.FileAttributes & FILE_ATTRIBUTE_READONLY) == 0; info.size = this->size(); @@ -441,7 +442,7 @@ namespace fs bool trunc(u64 length) override { - FILE_END_OF_FILE_INFO _eof; + FILE_END_OF_FILE_INFO _eof {}; _eof.EndOfFile.QuadPart = length; if (!SetFileInformationByHandle(m_handle, FileEndOfFileInfo, &_eof, sizeof(_eof))) @@ -563,6 +564,7 @@ namespace fs u64 size() override { + // NOTE: this can fail if we access a mounted empty drive (e.g. after unmounting an iso). LARGE_INTEGER size; ensure(GetFileSizeEx(m_handle, &size)); // "file::size" @@ -579,12 +581,12 @@ namespace fs file_id id{"windows_file"}; id.data.resize(sizeof(FILE_ID_INFO)); - FILE_ID_INFO info; + FILE_ID_INFO info {}; if (!GetFileInformationByHandleEx(m_handle, FileIdInfo, &info, sizeof(info))) { // Try GetFileInformationByHandle as a fallback - BY_HANDLE_FILE_INFORMATION info2; + BY_HANDLE_FILE_INFORMATION info2{}; ensure(GetFileInformationByHandle(m_handle, &info2)); info = {}; @@ -625,7 +627,7 @@ namespace fs struct ::stat file_info; ensure(::fstat(m_fd, &file_info) == 0); // "file::stat" - stat_t info; + stat_t info {}; info.is_directory = S_ISDIR(file_info.st_mode); info.is_writable = file_info.st_mode & 0200; // HACK: approximation info.size = file_info.st_size; @@ -1656,6 +1658,45 @@ fs::file::file(const std::string& path, bs_t mode) return; } + // Check if the handle is actually valid. + // This can fail on empty mounted drives (e.g. with ERROR_NOT_READY or ERROR_INVALID_FUNCTION). + BY_HANDLE_FILE_INFORMATION info{}; + if (!GetFileInformationByHandle(handle, &info)) + { + const DWORD last_error = GetLastError(); + CloseHandle(handle); + + if (last_error == ERROR_INVALID_FUNCTION) + { + g_tls_error = fs::error::isdir; + return; + } + + g_tls_error = to_error(last_error); + return; + } + + if (info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + CloseHandle(handle); + g_tls_error = fs::error::isdir; + return; + } + + if (info.dwFileAttributes & FILE_ATTRIBUTE_SYSTEM) + { + CloseHandle(handle); + g_tls_error = fs::error::acces; + return; + } + + if ((mode & fs::write) && (info.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) + { + CloseHandle(handle); + g_tls_error = fs::error::readonly; + return; + } + m_file = std::make_unique(handle); #else int flags = O_CLOEXEC; // Ensures all files are closed on execl for auto updater @@ -2595,7 +2636,7 @@ bool fs::pending_file::commit(bool overwrite) while (file_handle != INVALID_HANDLE_VALUE) { // Get file ID (used to check for hardlinks) - BY_HANDLE_FILE_INFORMATION file_info; + BY_HANDLE_FILE_INFORMATION file_info{}; if (!GetFileInformationByHandle(file_handle, &file_info) || file_info.nNumberOfLinks == 1) { @@ -2793,6 +2834,7 @@ void fmt_class_string::format(std::string& out, u64 arg) case fs::error::notempty: return "Not empty"; case fs::error::readonly: return "Read only"; case fs::error::isdir: return "Is a directory"; + case fs::error::notdir: return "Not a directory"; case fs::error::toolong: return "Path too long"; case fs::error::nospace: return "Not enough space on the device"; case fs::error::xdev: return "Device mismatch"; diff --git a/Utilities/File.h b/Utilities/File.h index 7e6356da7b..3d332dd0be 100644 --- a/Utilities/File.h +++ b/Utilities/File.h @@ -66,13 +66,13 @@ namespace fs // File attributes (TODO) struct stat_t { - bool is_directory; - bool is_symlink; - bool is_writable; - u64 size; - s64 atime; - s64 mtime; - s64 ctime; + bool is_directory = false; + bool is_symlink = false; + bool is_writable = false; + u64 size = 0; + s64 atime = 0; + s64 mtime = 0; + s64 ctime = 0; using enable_bitcopy = std::true_type; @@ -683,6 +683,7 @@ namespace fs notempty, readonly, isdir, + notdir, toolong, nospace, xdev, diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 6dfa9e7cd0..86fc72ed55 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -493,6 +493,10 @@ inline FT build_function_asm(std::string_view name, F&& builder, ::jit_runtime* return reinterpret_cast(uptr(result)); } +#if defined(__INTELLISENSE__) && !defined(LLVM_AVAILABLE) +#define LLVM_AVAILABLE +#endif + #ifdef LLVM_AVAILABLE namespace llvm diff --git a/Utilities/JITASM.cpp b/Utilities/JITASM.cpp index acb5f40b04..90c09bb0bf 100644 --- a/Utilities/JITASM.cpp +++ b/Utilities/JITASM.cpp @@ -14,6 +14,10 @@ #define CAN_OVERCOMMIT #endif +#if defined(__APPLE__) +#include +#endif + LOG_CHANNEL(jit_log, "JIT"); void jit_announce(uptr func, usz size, std::string_view name) diff --git a/Utilities/StrFmt.cpp b/Utilities/StrFmt.cpp index 4431769f3a..d68ef51cb5 100644 --- a/Utilities/StrFmt.cpp +++ b/Utilities/StrFmt.cpp @@ -16,12 +16,12 @@ #include #endif -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4996) -#elif defined(__clang__) +#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" +#elif defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4996) #else #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" diff --git a/Utilities/StrUtil.h b/Utilities/StrUtil.h index d274cc074d..3fcfe98a8a 100644 --- a/Utilities/StrUtil.h +++ b/Utilities/StrUtil.h @@ -13,29 +13,29 @@ std::string wchar_to_utf8(std::wstring_view src); std::string utf16_to_utf8(std::u16string_view src); std::u16string utf8_to_utf16(std::string_view src); -// Copy null-terminated string from a std::string or a char array to a char array with truncation -template +// Copy null-terminated string from a std::basic_string or a char array to a char array with truncation +template requires requires (D& d, T& t) { std::declval() = &d[0]; } inline void strcpy_trunc(D&& dst, const T& src) { const usz count = std::size(src) >= std::size(dst) ? std::max(std::size(dst), 1) - 1 : std::size(src); - std::memcpy(std::data(dst), std::data(src), count); - std::memset(std::data(dst) + count, 0, std::size(dst) - count); + std::copy_n(std::data(src), count, std::data(dst)); + std::fill_n(std::data(dst) + count, std::size(dst) - count, std::remove_cvref_t{}); } // Convert string to signed integer -bool try_to_int64(s64* out, std::string_view value, s64 min, s64 max); +bool try_to_int64(s64* out, std::string_view value, s64 min, s64 max, std::string_view name = {}); // Convert string to unsigned integer -bool try_to_uint64(u64* out, std::string_view value, u64 min, u64 max); +bool try_to_uint64(u64* out, std::string_view value, u64 min, u64 max, std::string_view name = {}); // Convert string to unsigned int128_t -bool try_to_uint128(u128* out, std::string_view value); +bool try_to_uint128(u128* out, std::string_view value, std::string_view name = {}); // Convert string to float -bool try_to_float(f64* out, std::string_view value, f64 min, f64 max); +bool try_to_float(f64* out, std::string_view value, f64 min, f64 max, std::string_view name = {}); // Convert float to string locale independent -bool try_to_string(std::string* out, const f64& value); +bool try_to_string(std::string* out, f64 value, std::string_view name = {}); // Get the file extension of a file path ("png", "jpg", etc.) std::string get_file_extension(const std::string& file_path); diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 810b8fd7c5..57d7446daf 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -8,19 +8,24 @@ #include "Emu/RSX/RSXThread.h" #include "Thread.h" #include "Utilities/JIT.h" -#include #include #ifdef ARCH_ARM64 #include "Emu/CPU/Backends/AArch64/AArch64Signal.h" #endif +#ifdef __cpp_lib_stacktrace +#include "rpcs3_version.h" +#include +#endif + #ifdef _WIN32 #include #include #include #include +#include "stack_trace.h" #include "util/dyn_lib.hpp" DYNAMIC_IMPORT_RENAME("Kernel32.dll", SetThreadDescriptionImport, "SetThreadDescription", HRESULT(HANDLE hThread, PCWSTR lpThreadDescription)); @@ -103,7 +108,7 @@ thread_local u64 g_tls_fault_rsx = 0; thread_local u64 g_tls_fault_spu = 0; thread_local u64 g_tls_wait_time = 0; thread_local u64 g_tls_wait_fail = 0; -thread_local bool g_tls_access_violation_recovered = false; +thread_local u64 g_tls_access_violation_recovered = umax; extern thread_local std::string(*g_tls_log_prefix)(); namespace stx @@ -1265,7 +1270,7 @@ namespace rsx extern std::function g_access_violation_handler; } -bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noexcept +bool handle_access_violation(u32 addr, bool is_writing, bool is_exec, ucontext_t* context) noexcept { g_tls_fault_all++; @@ -1301,7 +1306,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe } } spu_protection{cpu}; - if (addr < RAW_SPU_BASE_ADDR && vm::check_addr(addr) && rsx::g_access_violation_handler) + if (!is_exec && addr < RAW_SPU_BASE_ADDR && vm::check_addr(addr) && rsx::g_access_violation_handler) { bool state_changed = false; @@ -1367,7 +1372,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe { auto thread = idm::get_unlocked>(spu_thread::find_raw_spu((addr - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET)); - if (!thread) + if (!thread || is_exec) { break; } @@ -1499,7 +1504,9 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe static_cast(context); #endif /* ARCH_ */ - if (vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable)) + const auto required_page_perms = (is_writing ? vm::page_writable : vm::page_readable) + (is_exec ? vm::page_executable : 0); + + if (vm::check_addr(addr, required_page_perms)) { return true; } @@ -1507,9 +1514,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe // Hack: allocate memory in case the emulator is stopping const auto hack_alloc = [&]() { - g_tls_access_violation_recovered = true; - - if (vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable)) + if (vm::check_addr(addr, required_page_perms)) { return true; } @@ -1521,17 +1526,45 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe return false; } + extern void ppu_register_range(u32 addr, u32 size); + + bool reprotected = false; + if (vm::writer_lock mlock; area->flags & vm::preallocated || vm::check_addr(addr, 0)) { // For allocated memory with protection lower than required (such as protection::no or read-only while writing to it) utils::memory_protect(vm::base(addr & -0x1000), 0x1000, utils::protection::rw); + reprotected = true; + } + + if (reprotected) + { + if (is_exec && !vm::check_addr(addr, vm::page_executable)) + { + ppu_register_range(addr & -0x10000, 0x10000); + } + + g_tls_access_violation_recovered = addr; return true; } - return area->falloc(addr & -0x10000, 0x10000) || vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable); + const bool allocated = area->falloc(addr & -0x10000, 0x10000); + + if (allocated) + { + if (is_exec && !vm::check_addr(addr, vm::page_executable)) + { + ppu_register_range(addr & -0x10000, 0x10000); + } + + g_tls_access_violation_recovered = addr; + return true; + } + + return false; }; - if (cpu && (cpu->get_class() == thread_class::ppu || cpu->get_class() == thread_class::spu)) + if (cpu && (cpu->get_class() == thread_class::ppu || cpu->get_class() == thread_class::spu) && !is_exec) { vm::temporary_unlock(*cpu); u32 pf_port_id = 0; @@ -1674,7 +1707,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe if (cpu->get_class() == thread_class::spu) { - if (!g_tls_access_violation_recovered) + if (g_tls_access_violation_recovered != addr) { vm_log.notice("\n%s", dump_useful_thread_info()); vm_log.always()("[%s] Access violation %s location 0x%x (%s)", cpu->get_name(), is_writing ? "writing" : "reading", addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory"); @@ -1710,10 +1743,10 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe // Note: a thread may access violate more than once after hack_alloc recovery // Do not log any further access violations in this case. - if (!g_tls_access_violation_recovered) + if (g_tls_access_violation_recovered != addr) { vm_log.notice("\n%s", dump_useful_thread_info()); - vm_log.fatal("Access violation %s location 0x%x (%s)", is_writing ? "writing" : (cpu && cpu->get_class() == thread_class::ppu && cpu->get_pc() == addr ? "executing" : "reading"), addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory"); + vm_log.fatal("Access violation %s location 0x%x (%s)", is_writing ? "writing" : (is_exec ? "executing" : "reading"), addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory"); } while (Emu.IsPausedOrReady()) @@ -1762,8 +1795,13 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe } } - if (Emu.IsStopped() && !hack_alloc()) + if (Emu.IsStopped()) { + while (!hack_alloc()) + { + thread_ctrl::wait_for(1000); + } + return false; } @@ -1802,6 +1840,7 @@ static LONG exception_handler(PEXCEPTION_POINTERS pExp) noexcept if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && !is_executing) { u32 addr = 0; + bool is_exec = false; if (auto [addr0, ok] = vm::try_get_addr(ptr); ok) { @@ -1809,14 +1848,21 @@ static LONG exception_handler(PEXCEPTION_POINTERS pExp) noexcept } else if (const usz exec64 = (ptr - vm::g_exec_addr) / 2; exec64 <= u32{umax}) { + is_exec = true; addr = static_cast(exec64); } - else + else if (const usz exec64 = (ptr - vm::g_exec_addr - vm::g_exec_addr_seg_offset); exec64 <= u32{umax}) { + is_exec = true; + addr = static_cast(exec64); + } + else + { + std::this_thread::sleep_for(1ms); return EXCEPTION_CONTINUE_SEARCH; } - if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, pExp->ContextRecord)) + if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, is_exec, pExp->ContextRecord)) { return EXCEPTION_CONTINUE_EXECUTION; } @@ -1936,9 +1982,39 @@ static LONG exception_filter(PEXCEPTION_POINTERS pExp) noexcept } fmt::append(msg, "RPCS3 image base: %p.\n", GetModuleHandle(NULL)); + +#if defined(ARCH_X64) + fmt::append(msg, "RAX: %016llX RBX: %016llX\n", pExp->ContextRecord->Rax, pExp->ContextRecord->Rbx); + fmt::append(msg, "RCX: %016llX RDX: %016llX\n", pExp->ContextRecord->Rcx, pExp->ContextRecord->Rdx); + fmt::append(msg, "RSI: %016llX RDI: %016llX\n", pExp->ContextRecord->Rsi, pExp->ContextRecord->Rdi); + fmt::append(msg, "RBP: %016llX RSP: %016llX\n", pExp->ContextRecord->Rbp, pExp->ContextRecord->Rsp); + fmt::append(msg, "R8: %016llX R9: %016llX\n", pExp->ContextRecord->R8, pExp->ContextRecord->R9); + fmt::append(msg, "R10: %016llX R11: %016llX\n", pExp->ContextRecord->R10, pExp->ContextRecord->R11); + fmt::append(msg, "R12: %016llX R13: %016llX\n", pExp->ContextRecord->R12, pExp->ContextRecord->R13); + fmt::append(msg, "R14: %016llX R15: %016llX\n", pExp->ContextRecord->R14, pExp->ContextRecord->R15); + fmt::append(msg, "RFLAGS: %08X\n", pExp->ContextRecord->EFlags); +#elif defined(ARCH_ARM64) + for (int i = 0; i < 29; i += 2) + { + if (i + 1 < 29) + fmt::append(msg, "X%-2d: %016llX X%-2d: %016llX\n", i, pExp->ContextRecord->X[i], i + 1, pExp->ContextRecord->X[i + 1]); + else + fmt::append(msg, "X%-2d: %016llX\n", i, pExp->ContextRecord->X[i]); + } + fmt::append(msg, "SP: %016llX FP: %016llX LR: %016llX\n", pExp->ContextRecord->Sp, pExp->ContextRecord->Fp, pExp->ContextRecord->Lr); + fmt::append(msg, "CPSR: %08X\n", pExp->ContextRecord->Cpsr); +#endif - // TODO: print registers and the callstack + const auto stack_trace = utils::get_backtrace(64, pExp->ContextRecord); + const auto stack_symbols = utils::get_backtrace_symbols(stack_trace); + msg += "Stack Trace:\n"; + + for (const auto& symbol : stack_symbols) + { + fmt::append(msg, "%s\n", symbol); + } + sys_log.fatal("\n%s", msg); logs::listener::sync_all(); @@ -2023,12 +2099,13 @@ static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept #endif const u64 exec64 = (reinterpret_cast(info->si_addr) - reinterpret_cast(vm::g_exec_addr)) / 2; + const u64 exec64_2 = (reinterpret_cast(info->si_addr) - reinterpret_cast(vm::g_exec_addr)) - vm::g_exec_addr_seg_offset; const auto cause = is_executing ? "executing" : is_writing ? "writing" : "reading"; if (auto [addr, ok] = vm::try_get_addr(info->si_addr); ok && !is_executing) { // Try to process access violation - if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, context)) + if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, false, context)) { return; } @@ -2036,7 +2113,14 @@ static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept if (exec64 < 0x100000000ull && !is_executing) { - if (thread_ctrl::get_current() && handle_access_violation(static_cast(exec64), is_writing, context)) + if (thread_ctrl::get_current() && handle_access_violation(static_cast(exec64), is_writing, true, context)) + { + return; + } + } + else if (exec64_2 < 0x100000000ull && !is_executing) + { + if (thread_ctrl::get_current() && handle_access_violation(static_cast(exec64_2), is_writing, true, context)) { return; } @@ -2355,7 +2439,7 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept g_tls_fault_spu = 0; g_tls_wait_time = 0; g_tls_wait_fail = 0; - g_tls_access_violation_recovered = false; + g_tls_access_violation_recovered = umax; g_tls_log_prefix = []() -> std::string { return {}; }; @@ -2799,8 +2883,25 @@ void thread_base::exec() } } +void thread_ctrl::set_name(std::string name) +{ + ensure(g_tls_this_thread); + g_tls_this_thread->m_tname.store(make_single(name)); + g_tls_this_thread->set_name(std::move(name)); +} + [[noreturn]] void thread_ctrl::emergency_exit(std::string_view reason) { + // Print stacktrace +#ifdef __cpp_lib_stacktrace + if (rpcs3::is_local_build()) + { + std::ostringstream oss; + oss << std::stacktrace::current(); + sys_log.notice("StackTrace\n\n%s\n", oss.str()); + } +#endif + if (const std::string info = dump_useful_thread_info(); !info.empty()) { sys_log.notice("\n%s", info); @@ -2816,6 +2917,16 @@ void thread_base::exec() } } + if (auto [total, current] = utils::get_memory_usage(); total - current <= 256 * 1024 * 1024) + { + if (reason_buf.empty()) + { + reason_buf = std::string{reason}; + } + + fmt::append(reason_buf, " (Possible RAM deficiency: free RAM: %dMB)", (total - current) / (1024 * 1024)); + } + if (!reason_buf.empty()) { reason = reason_buf; diff --git a/Utilities/Thread.h b/Utilities/Thread.h index 02e5db56ff..bafcea0b9f 100644 --- a/Utilities/Thread.h +++ b/Utilities/Thread.h @@ -4,6 +4,7 @@ #include "util/atomic.hpp" #include "util/shared_ptr.hpp" +#include #include // Hardware core layout @@ -128,7 +129,7 @@ public: const native_entry entry_point; // Set name for debugger - static void set_name(std::string); + static void set_name(std::string name); private: // Thread handle (platform-specific) @@ -231,11 +232,7 @@ public: } // Set current thread name (not recommended) - static void set_name(std::string name) - { - g_tls_this_thread->m_tname.store(make_single(name)); - g_tls_this_thread->set_name(std::move(name)); - } + static void set_name(std::string name); // Set thread name (not recommended) template diff --git a/Utilities/bin_patch.cpp b/Utilities/bin_patch.cpp index 9449d808c0..fd94b830e0 100644 --- a/Utilities/bin_patch.cpp +++ b/Utilities/bin_patch.cpp @@ -329,7 +329,7 @@ bool patch_engine::load(patch_map& patches_map, const std::string& path, std::st is_valid = false; continue; } - else if (serial.size() != 9 || !std::all_of(serial.begin(), serial.end(), [](char c) { return std::isalnum(c); })) + else if (serial.size() != 9 || !std::all_of(serial.begin(), serial.end(), [](char c) { return std::isalnum(static_cast(c)); })) { append_log_message(log_messages, fmt::format("Error: Serial '%s' invalid (patch: %s, key: %s, location: %s, file: %s)", serial, description, main_key, get_yaml_node_location(serial_node), path), &patch_log.error); is_valid = false; diff --git a/Utilities/geometry.h b/Utilities/geometry.h index faace6c77e..3ffbc04dd3 100644 --- a/Utilities/geometry.h +++ b/Utilities/geometry.h @@ -821,6 +821,14 @@ struct color4_base a *= rhs; } + void operator += (const color4_base& rhs) + { + r += rhs.r; + g += rhs.g; + b += rhs.b; + a += rhs.a; + } + constexpr color4_base operator * (const color4_base& rhs) const { return { r * rhs.r, g * rhs.g, b * rhs.b, a * rhs.a }; diff --git a/Utilities/stack_trace.cpp b/Utilities/stack_trace.cpp index f44751fcda..049e0f1805 100644 --- a/Utilities/stack_trace.cpp +++ b/Utilities/stack_trace.cpp @@ -30,42 +30,61 @@ namespace utils return out.data(); } - std::vector get_backtrace(int max_depth) + std::vector get_backtrace(int max_depth, PCONTEXT ctx) { + static struct sym_initer_t + { + sym_initer_t() noexcept + { + SymInitialize(GetCurrentProcess(), NULL, TRUE); + } + ~sym_initer_t() noexcept + { + SymCleanup(GetCurrentProcess()); + } + } s_initer{}; + std::vector result = {}; const auto hProcess = ::GetCurrentProcess(); const auto hThread = ::GetCurrentThread(); CONTEXT context{}; - RtlCaptureContext(&context); + if (ctx) + context = *ctx; + else + RtlCaptureContext(&context); STACKFRAME64 stack = {}; stack.AddrPC.Mode = AddrModeFlat; stack.AddrStack.Mode = AddrModeFlat; stack.AddrFrame.Mode = AddrModeFlat; #if defined(ARCH_X64) + const DWORD machineType = IMAGE_FILE_MACHINE_AMD64; stack.AddrPC.Offset = context.Rip; stack.AddrStack.Offset = context.Rsp; stack.AddrFrame.Offset = context.Rbp; #elif defined(ARCH_ARM64) + const DWORD machineType = IMAGE_FILE_MACHINE_ARM64; stack.AddrPC.Offset = context.Pc; stack.AddrStack.Offset = context.Sp; stack.AddrFrame.Offset = context.Fp; +#else +#error "Unsupported architecture" #endif while (max_depth--) { if (!StackWalk64( - IMAGE_FILE_MACHINE_AMD64, - hProcess, - hThread, - &stack, - &context, - NULL, - SymFunctionTableAccess64, - SymGetModuleBase64, - NULL)) + machineType, + hProcess, + hThread, + &stack, + &context, + NULL, + SymFunctionTableAccess64, + SymGetModuleBase64, + NULL)) { break; } diff --git a/Utilities/stack_trace.h b/Utilities/stack_trace.h index f57175611f..d0cec0cf4c 100644 --- a/Utilities/stack_trace.h +++ b/Utilities/stack_trace.h @@ -2,6 +2,11 @@ #include #include +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + namespace utils { namespace stack_trace @@ -30,7 +35,12 @@ namespace utils }; } +#ifdef _WIN32 + std::vector get_backtrace(int max_depth = 255, PCONTEXT ctx = nullptr); +#else std::vector get_backtrace(int max_depth = 255); +#endif + std::vector get_backtrace_symbols(const std::vector& stack); FORCE_INLINE void print_trace(stack_trace::Logger auto& logger, int max_depth = 255) diff --git a/bin/GuiConfigs/Classic (Bright).qss b/bin/GuiConfigs/Classic (Bright).qss index 143e42df94..f961e63e00 100644 --- a/bin/GuiConfigs/Classic (Bright).qss +++ b/bin/GuiConfigs/Classic (Bright).qss @@ -37,10 +37,6 @@ QSlider#sizeSlider::handle:horizontal { QLabel#toolbar_icon_color { color: rgba(64,64,64,255); } -/* thumbnail icon color stylesheet */ -QLabel#thumbnail_icon_color { - color: rgba(0,100,231,255); -} /* gamelist icon color stylesheet */ QLabel#gamelist_icon_background_color { color: rgba(209,209,209,255); diff --git a/bin/GuiConfigs/Darker Style by TheMitoSan.qss b/bin/GuiConfigs/Darker Style by TheMitoSan.qss index 47270a211e..8797ffd3c9 100644 --- a/bin/GuiConfigs/Darker Style by TheMitoSan.qss +++ b/bin/GuiConfigs/Darker Style by TheMitoSan.qss @@ -237,11 +237,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #262626; -} - /* Set Log colors */ QPlainTextEdit#log_frame { background-color: #000; /* Black */ diff --git a/bin/GuiConfigs/Envy.qss b/bin/GuiConfigs/Envy.qss index a78ba04f58..3e2e1ddc35 100644 --- a/bin/GuiConfigs/Envy.qss +++ b/bin/GuiConfigs/Envy.qss @@ -573,11 +573,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #23262d; -} - /* Log colors */ QPlainTextEdit#log_frame { background-color: #23262d; diff --git a/bin/GuiConfigs/Kuroi (Dark) by Ani.qss b/bin/GuiConfigs/Kuroi (Dark) by Ani.qss index 54c667213b..2cd81267f1 100644 --- a/bin/GuiConfigs/Kuroi (Dark) by Ani.qss +++ b/bin/GuiConfigs/Kuroi (Dark) by Ani.qss @@ -265,11 +265,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #444444; -} - /* Memory Viewer */ QLabel#memory_viewer_address_panel { color: #00cbff; /* Font Color: Blue */ diff --git a/bin/GuiConfigs/ModernBlue Theme by TheMitoSan.qss b/bin/GuiConfigs/ModernBlue Theme by TheMitoSan.qss index 410db682f7..96b84a9196 100644 --- a/bin/GuiConfigs/ModernBlue Theme by TheMitoSan.qss +++ b/bin/GuiConfigs/ModernBlue Theme by TheMitoSan.qss @@ -244,11 +244,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #262626; -} - /* Set Log colors */ QPlainTextEdit#log_frame { background-color: #181d24; /* Black */ diff --git a/bin/GuiConfigs/Nekotekina by GooseWing.qss b/bin/GuiConfigs/Nekotekina by GooseWing.qss index 435d550755..93fadcdd68 100755 --- a/bin/GuiConfigs/Nekotekina by GooseWing.qss +++ b/bin/GuiConfigs/Nekotekina by GooseWing.qss @@ -397,12 +397,6 @@ QLabel#gamelist_icon_background_color { } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #ffd785; -} - - QLabel#log_level_always { color: #00ffff; /* Cyan */ } diff --git a/bin/GuiConfigs/Skyline (Nightfall).qss b/bin/GuiConfigs/Skyline (Nightfall).qss index 625a6a28b6..4729a59173 100644 --- a/bin/GuiConfigs/Skyline (Nightfall).qss +++ b/bin/GuiConfigs/Skyline (Nightfall).qss @@ -656,11 +656,6 @@ QLabel#color_button { background: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #370048; -} - /* Debugger colors */ QLabel#debugger_frame_breakpoint { color: #000; /* Font Color: Black */ diff --git a/bin/GuiConfigs/Skyline.qss b/bin/GuiConfigs/Skyline.qss index ef3c7c6857..1004dc0016 100644 --- a/bin/GuiConfigs/Skyline.qss +++ b/bin/GuiConfigs/Skyline.qss @@ -664,11 +664,6 @@ QLabel#color_button { background: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #8500ae; -} - /* Debugger colors */ QLabel#debugger_frame_breakpoint { color: #000; /* Font Color: Black */ diff --git a/bin/GuiConfigs/YoRHa by Ani.qss b/bin/GuiConfigs/YoRHa by Ani.qss index c772f25196..51f09897b1 100644 --- a/bin/GuiConfigs/YoRHa by Ani.qss +++ b/bin/GuiConfigs/YoRHa by Ani.qss @@ -379,11 +379,6 @@ QLabel#gamelist_icon_background_color { color: transparent; } -/* Set Windows Taskbar Thumbnail colors */ -QLabel#thumbnail_icon_color { - color: #4d4940; -} - QLabel#log_level_always { color: #00ffff; /* Cyan */ } diff --git a/bin/Icons/ui/home/256/bug-solid.png b/bin/Icons/ui/home/256/bug-solid.png new file mode 100644 index 0000000000..2c88980603 Binary files /dev/null and b/bin/Icons/ui/home/256/bug-solid.png differ diff --git a/bin/Icons/ui/home/256/circle-left-solid.png b/bin/Icons/ui/home/256/circle-left-solid.png new file mode 100644 index 0000000000..f6b2e58768 Binary files /dev/null and b/bin/Icons/ui/home/256/circle-left-solid.png differ diff --git a/bin/Icons/ui/home/256/display-solid.png b/bin/Icons/ui/home/256/display-solid.png new file mode 100644 index 0000000000..84cc043b07 Binary files /dev/null and b/bin/Icons/ui/home/256/display-solid.png differ diff --git a/bin/Icons/ui/home/256/floppy-disk-solid.png b/bin/Icons/ui/home/256/floppy-disk-solid.png new file mode 100644 index 0000000000..3701361569 Binary files /dev/null and b/bin/Icons/ui/home/256/floppy-disk-solid.png differ diff --git a/bin/Icons/ui/home/256/gamepad-solid.png b/bin/Icons/ui/home/256/gamepad-solid.png new file mode 100644 index 0000000000..fe08f1bc58 Binary files /dev/null and b/bin/Icons/ui/home/256/gamepad-solid.png differ diff --git a/bin/Icons/ui/home/256/gauge-solid.png b/bin/Icons/ui/home/256/gauge-solid.png new file mode 100644 index 0000000000..5b6d731a9d Binary files /dev/null and b/bin/Icons/ui/home/256/gauge-solid.png differ diff --git a/bin/Icons/ui/home/256/headphones-solid.png b/bin/Icons/ui/home/256/headphones-solid.png new file mode 100644 index 0000000000..feb015695d Binary files /dev/null and b/bin/Icons/ui/home/256/headphones-solid.png differ diff --git a/bin/Icons/ui/home/256/home.png b/bin/Icons/ui/home/256/home.png new file mode 100644 index 0000000000..622984c913 Binary files /dev/null and b/bin/Icons/ui/home/256/home.png differ diff --git a/bin/Icons/ui/home/256/maximize-solid.png b/bin/Icons/ui/home/256/maximize-solid.png new file mode 100644 index 0000000000..65eaa43308 Binary files /dev/null and b/bin/Icons/ui/home/256/maximize-solid.png differ diff --git a/bin/Icons/ui/home/256/play-button-arrowhead.png b/bin/Icons/ui/home/256/play-button-arrowhead.png new file mode 100644 index 0000000000..37e9cefe93 Binary files /dev/null and b/bin/Icons/ui/home/256/play-button-arrowhead.png differ diff --git a/bin/Icons/ui/home/256/power-off-solid.png b/bin/Icons/ui/home/256/power-off-solid.png new file mode 100644 index 0000000000..cf0ab44839 Binary files /dev/null and b/bin/Icons/ui/home/256/power-off-solid.png differ diff --git a/bin/Icons/ui/home/256/rotate-left-solid.png b/bin/Icons/ui/home/256/rotate-left-solid.png new file mode 100644 index 0000000000..5eaf29edba Binary files /dev/null and b/bin/Icons/ui/home/256/rotate-left-solid.png differ diff --git a/bin/Icons/ui/home/256/screenshot.png b/bin/Icons/ui/home/256/screenshot.png new file mode 100644 index 0000000000..091e2bbc2e Binary files /dev/null and b/bin/Icons/ui/home/256/screenshot.png differ diff --git a/bin/Icons/ui/home/256/settings.png b/bin/Icons/ui/home/256/settings.png new file mode 100644 index 0000000000..d682b9b27c Binary files /dev/null and b/bin/Icons/ui/home/256/settings.png differ diff --git a/bin/Icons/ui/home/256/sliders-solid.png b/bin/Icons/ui/home/256/sliders-solid.png new file mode 100644 index 0000000000..cfb030721e Binary files /dev/null and b/bin/Icons/ui/home/256/sliders-solid.png differ diff --git a/bin/Icons/ui/home/256/trophy-solid.png b/bin/Icons/ui/home/256/trophy-solid.png new file mode 100644 index 0000000000..1bd98a9ac9 Binary files /dev/null and b/bin/Icons/ui/home/256/trophy-solid.png differ diff --git a/bin/Icons/ui/home/256/user-group-solid.png b/bin/Icons/ui/home/256/user-group-solid.png new file mode 100644 index 0000000000..a154fa5ba9 Binary files /dev/null and b/bin/Icons/ui/home/256/user-group-solid.png differ diff --git a/bin/Icons/ui/home/256/video-camera.png b/bin/Icons/ui/home/256/video-camera.png new file mode 100644 index 0000000000..9f5460fba6 Binary files /dev/null and b/bin/Icons/ui/home/256/video-camera.png differ diff --git a/bin/Icons/ui/home/32/bug-solid.png b/bin/Icons/ui/home/32/bug-solid.png new file mode 100644 index 0000000000..401fa0ea27 Binary files /dev/null and b/bin/Icons/ui/home/32/bug-solid.png differ diff --git a/bin/Icons/ui/home/32/circle-left-solid.png b/bin/Icons/ui/home/32/circle-left-solid.png new file mode 100644 index 0000000000..1acf201d63 Binary files /dev/null and b/bin/Icons/ui/home/32/circle-left-solid.png differ diff --git a/bin/Icons/ui/home/32/display-solid.png b/bin/Icons/ui/home/32/display-solid.png new file mode 100644 index 0000000000..8a48c89494 Binary files /dev/null and b/bin/Icons/ui/home/32/display-solid.png differ diff --git a/bin/Icons/ui/home/32/floppy-disk-solid.png b/bin/Icons/ui/home/32/floppy-disk-solid.png new file mode 100644 index 0000000000..836c6523c3 Binary files /dev/null and b/bin/Icons/ui/home/32/floppy-disk-solid.png differ diff --git a/bin/Icons/ui/home/32/gamepad-solid.png b/bin/Icons/ui/home/32/gamepad-solid.png new file mode 100644 index 0000000000..b783fee792 Binary files /dev/null and b/bin/Icons/ui/home/32/gamepad-solid.png differ diff --git a/bin/Icons/ui/home/32/gauge-solid.png b/bin/Icons/ui/home/32/gauge-solid.png new file mode 100644 index 0000000000..9853811ae2 Binary files /dev/null and b/bin/Icons/ui/home/32/gauge-solid.png differ diff --git a/bin/Icons/ui/home/32/headphones-solid.png b/bin/Icons/ui/home/32/headphones-solid.png new file mode 100644 index 0000000000..e2cfabb07c Binary files /dev/null and b/bin/Icons/ui/home/32/headphones-solid.png differ diff --git a/bin/Icons/ui/home/32/home.png b/bin/Icons/ui/home/32/home.png new file mode 100644 index 0000000000..2ec05eab24 Binary files /dev/null and b/bin/Icons/ui/home/32/home.png differ diff --git a/bin/Icons/ui/home/32/maximize-solid.png b/bin/Icons/ui/home/32/maximize-solid.png new file mode 100644 index 0000000000..7a0a9e4fb6 Binary files /dev/null and b/bin/Icons/ui/home/32/maximize-solid.png differ diff --git a/bin/Icons/ui/home/32/play-button-arrowhead.png b/bin/Icons/ui/home/32/play-button-arrowhead.png new file mode 100644 index 0000000000..6ce5505f4d Binary files /dev/null and b/bin/Icons/ui/home/32/play-button-arrowhead.png differ diff --git a/bin/Icons/ui/home/32/power-off-solid.png b/bin/Icons/ui/home/32/power-off-solid.png new file mode 100644 index 0000000000..21ebad8e69 Binary files /dev/null and b/bin/Icons/ui/home/32/power-off-solid.png differ diff --git a/bin/Icons/ui/home/32/rotate-left-solid.png b/bin/Icons/ui/home/32/rotate-left-solid.png new file mode 100644 index 0000000000..b466d28a8e Binary files /dev/null and b/bin/Icons/ui/home/32/rotate-left-solid.png differ diff --git a/bin/Icons/ui/home/32/screenshot.png b/bin/Icons/ui/home/32/screenshot.png new file mode 100644 index 0000000000..97255279b4 Binary files /dev/null and b/bin/Icons/ui/home/32/screenshot.png differ diff --git a/bin/Icons/ui/home/32/settings.png b/bin/Icons/ui/home/32/settings.png new file mode 100644 index 0000000000..eeaeceef7d Binary files /dev/null and b/bin/Icons/ui/home/32/settings.png differ diff --git a/bin/Icons/ui/home/32/sliders-solid.png b/bin/Icons/ui/home/32/sliders-solid.png new file mode 100644 index 0000000000..ca4c75df23 Binary files /dev/null and b/bin/Icons/ui/home/32/sliders-solid.png differ diff --git a/bin/Icons/ui/home/32/trophy-solid.png b/bin/Icons/ui/home/32/trophy-solid.png new file mode 100644 index 0000000000..9d94e14b97 Binary files /dev/null and b/bin/Icons/ui/home/32/trophy-solid.png differ diff --git a/bin/Icons/ui/home/32/user-group-solid.png b/bin/Icons/ui/home/32/user-group-solid.png new file mode 100644 index 0000000000..114c45599d Binary files /dev/null and b/bin/Icons/ui/home/32/user-group-solid.png differ diff --git a/bin/Icons/ui/home/32/video-camera.png b/bin/Icons/ui/home/32/video-camera.png new file mode 100644 index 0000000000..d8fa8dfaaf Binary files /dev/null and b/bin/Icons/ui/home/32/video-camera.png differ diff --git a/bin/Icons/ui/loading.png b/bin/Icons/ui/loading.png new file mode 100644 index 0000000000..33288035c7 Binary files /dev/null and b/bin/Icons/ui/loading.png differ diff --git a/buildfiles/cmake/ConfigureCompiler.cmake b/buildfiles/cmake/ConfigureCompiler.cmake index be900d13c2..ea8cd3200b 100644 --- a/buildfiles/cmake/ConfigureCompiler.cmake +++ b/buildfiles/cmake/ConfigureCompiler.cmake @@ -96,11 +96,6 @@ else() # This hides our LLVM from mesa's LLVM, otherwise we get some unresolvable conflicts. add_link_options(-Wl,--exclude-libs,ALL) elseif(WIN32) - add_compile_definitions(__STDC_FORMAT_MACROS=1) - - # Workaround for mingw64 (MSYS2) - add_link_options(-Wl,--allow-multiple-definition) - # Increase stack limit to 8 MB add_link_options(-Wl,--stack -Wl,8388608) endif() diff --git a/buildfiles/cmake/FindWolfSSL.cmake b/buildfiles/cmake/FindWolfSSL.cmake index d2e30be60b..35f316837c 100644 --- a/buildfiles/cmake/FindWolfSSL.cmake +++ b/buildfiles/cmake/FindWolfSSL.cmake @@ -1,4 +1,3 @@ -set(WOLFSSL_LIBRARY ON) -set(WOLFSSL_INCLUDE_DIR ON) -set(WOLFSSL_LIBRARIES wolfssl) +set(WOLFSSL_LIBRARY wolfssl) +set(WOLFSSL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/wolfssl) set(WOLFSSL_FOUND TRUE) diff --git a/buildfiles/cmake/FindZLIB.cmake b/buildfiles/cmake/FindZLIB.cmake index 0a29abafa9..ff5869a5f7 100644 --- a/buildfiles/cmake/FindZLIB.cmake +++ b/buildfiles/cmake/FindZLIB.cmake @@ -3,9 +3,9 @@ if(USE_SYSTEM_ZLIB) find_package(ZLIB) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}) else() - add_library(ZLIB::ZLIB INTERFACE IMPORTED) + add_library(ZLIB::ZLIB STATIC IMPORTED) set_target_properties(ZLIB::ZLIB PROPERTIES - INTERFACE_LINK_LIBRARIES zlibstatic + IMPORTED_LOCATION "${CMAKE_BINARY_DIR}/3rdparty/zlib/zlib/libzlibstatic.a" INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/3rdparty/zlib/zlib;${CMAKE_BINARY_DIR}/3rdparty/zlib/zlib") set(ZLIB_FOUND TRUE) endif() diff --git a/buildfiles/msvc/common_default.props b/buildfiles/msvc/common_default.props index 04f6502839..bfddbb5465 100644 --- a/buildfiles/msvc/common_default.props +++ b/buildfiles/msvc/common_default.props @@ -13,7 +13,7 @@ stdcpplatest - stdcpp20 + stdcpp23 _SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING=1;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions) false -d2FH4- %(AdditionalOptions) diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index e32597f792..ba65a16eaf 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -8,7 +8,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/git-version.cmake) include(ConfigureCompiler) include(CheckFunctionExists) -set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD 23) if(UNIX AND NOT APPLE AND NOT ANDROID) add_compile_definitions(DATADIR="${CMAKE_INSTALL_FULL_DATADIR}/rpcs3") @@ -87,12 +87,6 @@ if (NOT ANDROID) message(FATAL_ERROR "RPCS3 requires either X11 or Wayland (or both) for Vulkan.") endif() - if(UNIX) - set(CMAKE_THREAD_PREFER_PTHREAD TRUE) - find_package(Threads REQUIRED) - target_link_libraries(rpcs3_lib PRIVATE Threads::Threads) - endif() - if(WIN32) target_link_libraries(rpcs3_lib PRIVATE ws2_32 Iphlpapi Winmm Psapi gdi32 setupapi) else() @@ -193,8 +187,10 @@ if(BUILD_RPCS3_TESTS) tests/test_tuple.cpp tests/test_simple_array.cpp tests/test_address_range.cpp + tests/test_sys_fs.cpp tests/test_rsx_cfg.cpp tests/test_rsx_fp_asm.cpp + tests/test_dmux_pamf.cpp ) target_link_libraries(rpcs3_test @@ -202,6 +198,7 @@ if(BUILD_RPCS3_TESTS) rpcs3_lib rpcs3_emu GTest::gtest + GTest::gmock ) target_include_directories(rpcs3_test diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index edb98a6fa8..8591399ce8 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -126,6 +126,7 @@ target_sources(rpcs3_emu PRIVATE ../Loader/PUP.cpp ../Loader/TAR.cpp ../Loader/ISO.cpp + ../Loader/iso_cache.cpp ../Loader/TROPUSR.cpp ../Loader/TRP.cpp ) @@ -160,7 +161,7 @@ if(WIN32) Audio/XAudio2/xaudio2_enumerator.cpp ) target_compile_definitions(rpcs3_emu PRIVATE UNICODE _UNICODE _WIN32_WINNT=0x0A00) - target_link_libraries(rpcs3_emu PRIVATE pdh bcrypt) + target_link_libraries(rpcs3_emu PRIVATE pdh bcrypt dbghelp) endif() # Cell @@ -487,6 +488,7 @@ target_sources(rpcs3_emu PRIVATE RSX/NV47/HW/nv406e.cpp RSX/NV47/HW/nv4097.cpp RSX/Overlays/FriendsList/overlay_friends_list_dialog.cpp + RSX/Overlays/HomeMenu/overlay_home_icons.cpp RSX/Overlays/HomeMenu/overlay_home_menu.cpp RSX/Overlays/HomeMenu/overlay_home_menu_components.cpp RSX/Overlays/HomeMenu/overlay_home_menu_main_menu.cpp @@ -500,6 +502,8 @@ target_sources(rpcs3_emu PRIVATE RSX/Overlays/overlays.cpp RSX/Overlays/overlay_animated_icon.cpp RSX/Overlays/overlay_animation.cpp + RSX/Overlays/overlay_audio.cpp + RSX/Overlays/overlay_checkbox.cpp RSX/Overlays/overlay_compile_notification.cpp RSX/Overlays/overlay_controls.cpp RSX/Overlays/overlay_cursor.cpp @@ -516,6 +520,9 @@ target_sources(rpcs3_emu PRIVATE RSX/Overlays/overlay_perf_metrics.cpp RSX/Overlays/overlay_progress_bar.cpp RSX/Overlays/overlay_save_dialog.cpp + RSX/Overlays/overlay_select.cpp + RSX/Overlays/overlay_slider.cpp + RSX/Overlays/overlay_tabs.cpp RSX/Overlays/overlay_trophy_notification.cpp RSX/Overlays/overlay_user_list_dialog.cpp RSX/Overlays/overlay_utils.cpp @@ -639,6 +646,9 @@ if(TARGET 3rdparty_vulkan) endif() endif() +if(NOT WIN32) + set(THREADS_PREFER_PTHREAD_FLAG ON) +endif() find_package(Threads REQUIRED) target_link_libraries(rpcs3_emu diff --git a/rpcs3/Emu/CPU/Backends/AArch64/AArch64Common.h b/rpcs3/Emu/CPU/Backends/AArch64/AArch64Common.h index dff06dfb81..2ce4fa68b3 100644 --- a/rpcs3/Emu/CPU/Backends/AArch64/AArch64Common.h +++ b/rpcs3/Emu/CPU/Backends/AArch64/AArch64Common.h @@ -20,19 +20,19 @@ namespace aarch64 sp }; - static const char* gpr_names[] = + [[maybe_unused]] static const char* gpr_names[] = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30" }; - static const char* spr_names[] = + [[maybe_unused]] static const char* spr_names[] = { "xzr", "pc", "sp" }; - static const char* spr_asm_names[] = + [[maybe_unused]] static const char* spr_asm_names[] = { "xzr", ".", "sp" }; diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index afec56f7e1..78a0a3bd7b 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -61,6 +61,7 @@ void fmt_class_string::format(std::string& out, u64 arg) case cpu_flag::notify: return "ntf"; case cpu_flag::yield: return "y"; case cpu_flag::preempt: return "PREEMPT"; + case cpu_flag::req_exit: return "REQ-EXIT"; case cpu_flag::dbg_global_pause: return "G-PAUSE"; case cpu_flag::dbg_pause: return "PAUSE"; case cpu_flag::dbg_step: return "STEP"; @@ -206,11 +207,7 @@ struct cpu_prof // Print only 7 hash characters out of 11 (which covers roughly 48 bits) if (type_id == 2) { - fmt::append(results, "\n\t[%s", fmt::base57(be_t{name})); - results.resize(results.size() - 4); - - // Print chunk address from lowest 16 bits - fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count); + fmt::append(results, "\n\t[%s]: %.4f%% (%u)", spu_block_hash{name}, _frac * 100., count); } else { @@ -733,8 +730,14 @@ void cpu_thread::operator()() { if (_this) { - sys_log.warning("CPU Thread '%s' terminated abnormally!", name); cleanup(); + + auto log_thread = named_thread("CPU Thread Cleanup Logger", [name = name]() + { + sys_log.warning("CPU Thread '%s' terminated abnormally!", name); + }); + + log_thread(); } } } cleanup; @@ -892,6 +895,14 @@ bool cpu_thread::check_state() noexcept store = true; } + if (flags & cpu_flag::req_exit) + { + // A request for the thread to quit has been made + flags -= cpu_flag::req_exit; + flags += cpu_flag::exit; + store = true; + } + // Can't process dbg_step if we only paused temporarily if (cpu_can_stop && flags & cpu_flag::dbg_step) { @@ -1161,13 +1172,13 @@ void cpu_thread::notify() cpu_thread& cpu_thread::operator=(thread_state) { - if (state & cpu_flag::exit) + if (state & (cpu_flag::exit + cpu_flag::req_exit)) { // Must be notified elsewhere or self-raised return *this; } - const auto old = state.fetch_add(cpu_flag::exit); + const auto old = state.fetch_add(cpu_flag::req_exit); if (old & cpu_flag::wait && old.none_of(cpu_flag::again + cpu_flag::exit)) { @@ -1326,8 +1337,9 @@ extern std::shared_ptr make_disasm(const cpu_thread* cpu, shared_ptr< void cpu_thread::dump_all(std::string& ret) const { std::any func_data; + std::any misc_data; - ret += dump_misc(); + dump_misc(ret, misc_data); ret += '\n'; dump_regs(ret, func_data); ret += '\n'; @@ -1375,9 +1387,9 @@ std::vector> cpu_thread::dump_callstack_list() const return {}; } -std::string cpu_thread::dump_misc() const +void cpu_thread::dump_misc(std::string& ret, std::any& /*custom_data*/) const { - return fmt::format("Type: %s; State: %s\n", get_class() == thread_class::ppu ? "PPU" : get_class() == thread_class::spu ? "SPU" : "RSX", state.load()); + fmt::append(ret, "%s[0x%x]; State: %s\n", get_class() == thread_class::ppu ? "PPU" : get_class() == thread_class::spu ? "SPU" : "RSX", id, state.load()); } bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 5e3484f7f5..e723fd2d4b 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -29,6 +29,7 @@ enum class cpu_flag : u32 yield, // Thread is being requested to yield its execution time if it's running preempt, // Thread is being requested to preempt the execution of all CPU threads + req_exit, // Request the thread to exit dbg_global_pause, // Emulation paused dbg_pause, // Thread paused dbg_step, // Thread forced to pause after one step (one instruction, etc) @@ -39,7 +40,7 @@ enum class cpu_flag : u32 // Test stopped state constexpr bool is_stopped(bs_t state) { - return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::again)); + return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::again + cpu_flag::req_exit)); } // Test paused state @@ -176,7 +177,7 @@ public: virtual std::vector> dump_callstack_list() const; // Get CPU dump of misc information - virtual std::string dump_misc() const; + virtual void dump_misc(std::string& ret, std::any& /*custom_data*/) const; // Thread entry point function virtual void cpu_task() = 0; diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index f799e4b6be..66b5c69af0 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -201,9 +201,16 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin m_use_vnni = true; m_use_gfni = true; } + +#ifdef ARCH_ARM64 + if (utils::has_dotprod()) + { + m_use_dotprod = true; + } +#endif } -llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) const +llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type, std::source_location src_loc) const { uint s1 = type->getScalarSizeInBits(); uint s2 = val->getType()->getScalarSizeInBits(); @@ -215,15 +222,81 @@ llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) const if (s1 != s2) { - fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)", s1, s2); + fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)\nCalled from: %s", s1, s2, src_loc); } - if (const auto c1 = llvm::dyn_cast(val)) + if (val->getType() == type) + { + return val; + } + + llvm::CastInst* i; + llvm::Value* source_val = val; + + // Try to reuse older bitcasts + while ((i = llvm::dyn_cast_or_null(source_val)) && i->getOpcode() == llvm::Instruction::BitCast) + { + source_val = i->getOperand(0); + + if (source_val->getType() == type) + { + return source_val; + } + } + + // Skip use iteration for values that don't have use lists +#if LLVM_VERSION_MAJOR >= 21 + if (source_val->hasUseList()) +#endif + { + for (llvm::Value* it_val : source_val->uses()) + { + if (!it_val) + { + continue; + } + + llvm::CastInst* bci = llvm::dyn_cast_or_null(it_val); + + // Walk through bitcasts + while (bci && bci->getOpcode() == llvm::Instruction::BitCast) + { + if (bci->getParent() != m_ir->GetInsertBlock()) + { + break; + } + + if (bci->getType() == type) + { + return bci; + } + + // Check if bci has use list before accessing use_begin() +#if LLVM_VERSION_MAJOR >= 21 + if (!bci->hasUseList()) + { + break; + } +#endif + + if (bci->use_begin() == bci->use_end()) + { + break; + } + + bci = llvm::dyn_cast_or_null(*bci->use_begin()); + } + } + } + + // Do bitcast on the source + + if (const auto c1 = llvm::dyn_cast(source_val)) { return ensure(llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c1, type, m_module->getDataLayout())); } - return m_ir->CreateBitCast(val, type); + return m_ir->CreateBitCast(source_val, type); } template <> @@ -492,14 +565,25 @@ void cpu_translator::erase_stores(llvm::ArrayRef args) { for (auto v : args) { - for (auto it = v->use_begin(); it != v->use_end(); ++it) + // Skip use iteration for values that don't have use lists +#if LLVM_VERSION_MAJOR >= 21 + if (!v->hasUseList()) + continue; +#endif + + for (llvm::Value* i : v->uses()) { - llvm::Value* i = *it; llvm::CastInst* bci = nullptr; // Walk through bitcasts while (i && (bci = llvm::dyn_cast(i)) && bci->getOpcode() == llvm::Instruction::BitCast) { + // Check if bci has use list before accessing use_begin() +#if LLVM_VERSION_MAJOR >= 21 + if (!bci->hasUseList()) + break; +#endif + i = *bci->use_begin(); } diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 99ddafde0a..ab2aed8156 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -43,6 +43,7 @@ #include #include +#include // Helper function llvm::Value* peek_through_bitcasts(llvm::Value*); @@ -567,6 +568,32 @@ struct llvm_placeholder_t } }; +template >> +struct llvm_place_stealer_t +{ + // TODO: placeholder extracting actual constant values (u64, f64, vector, etc) + + using type = T; + + static constexpr bool is_ok = true; + + llvm::Value* eval(llvm::IRBuilder<>*) const + { + return nullptr; + } + + std::tuple<> match(llvm::Value*& value, llvm::Module*) const + { + if (value && value->getType() == llvm_value_t::get_type(value->getContext())) + { + return {}; + } + + value = nullptr; + return {}; + } +}; + template struct llvm_const_int { @@ -3090,6 +3117,9 @@ protected: // For now, setting this flag will speed up SPU verification // but I will remove this later with explicit parralelism - Whatcookie bool m_use_avx = true; + + // ARMv8 SDOT/UDOT + bool m_use_dotprod = false; #else // Allow FMA bool m_use_fma = false; @@ -3210,7 +3240,7 @@ public: } // Bitcast with immediate constant folding - llvm::Value* bitcast(llvm::Value* val, llvm::Type* type) const; + llvm::Value* bitcast(llvm::Value* val, llvm::Type* type, std::source_location src_loc = std::source_location::current()) const; template llvm::Value* bitcast(llvm::Value* val) @@ -3224,6 +3254,12 @@ public: return {}; } + template + static llvm_place_stealer_t match_stealer() + { + return {}; + } + template requires requires { typename llvm_common_t; } static auto match_expr(llvm::Value* v, llvm::Module* _m, T&& expr) @@ -3647,10 +3683,59 @@ public: const auto data0 = a.eval(m_ir); const auto data1 = b.eval(m_ir); const auto data2 = c.eval(m_ir); + +#if LLVM_VERSION_MAJOR >= 22 + // LLVM 22+ changed the intrinsic signature from v4i32 to v16i8 for operands 2 and 3 + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_vpdpbusd_128), + {data0, m_ir->CreateBitCast(data1, get_type()), m_ir->CreateBitCast(data2, get_type())}); +#else result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_vpdpbusd_128), {data0, data1, data2}); +#endif return result; } +#ifdef ARCH_ARM64 +template + value_t udot(T1 a, T2 b, T3 c) + { + value_t result; + + const auto data0 = a.eval(m_ir); + const auto data1 = b.eval(m_ir); + const auto data2 = c.eval(m_ir); + + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::aarch64_neon_udot), {data0, data1, data2}); + return result; + } + + template + value_t sdot(T1 a, T2 b, T3 c) + { + value_t result; + + const auto data0 = a.eval(m_ir); + const auto data1 = b.eval(m_ir); + const auto data2 = c.eval(m_ir); + + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::aarch64_neon_sdot), {data0, data1, data2}); + return result; + } + +template + auto addp(T1 a, T2 b) + { + using T_vector = typename is_llvm_expr::type; + const auto data1 = a.eval(m_ir); + const auto data2 = b.eval(m_ir); + + const auto func = get_intrinsic(llvm::Intrinsic::aarch64_neon_addp); + + value_t result; + result.value = m_ir->CreateCall(func, {data1, data2}); + return result; + } +#endif + template value_t vpermb(T1 a, T2 b) { @@ -3899,6 +3984,15 @@ public: erase_stores({args.value...}); } + // Debug breakpoint + void debugtrap() + { + const auto _rty = llvm::Type::getVoidTy(m_context); + const auto type = llvm::FunctionType::get(_rty, {}, false); + const auto func = llvm::cast(m_ir->GetInsertBlock()->getParent()->getParent()->getOrInsertFunction("llvm.debugtrap", type).getCallee()); + m_ir->CreateCall(func); + } + template static auto pshufb(T&& a, U&& b) { diff --git a/rpcs3/Emu/Cell/Modules/cellAdec.cpp b/rpcs3/Emu/Cell/Modules/cellAdec.cpp index dfc91c8d2f..0d0c4481e9 100644 --- a/rpcs3/Emu/Cell/Modules/cellAdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAdec.cpp @@ -261,7 +261,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) savestate = lpcm_dec_state::waiting_for_output_mutex_lock; output_mutex_lock: - error_occurred |= static_cast(sys_mutex_lock(ppu, output_mutex, 0) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex, 0) != CELL_OK); if (ppu.state & cpu_flag::again) { @@ -273,7 +273,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) savestate = lpcm_dec_state::waiting_for_output_cond_wait; output_cond_wait: - ensure(sys_cond_wait(ppu, output_consumed, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, output_consumed, 0) == CELL_OK); // Error code isn't checked on LLE if (ppu.state & cpu_flag::again) { @@ -287,7 +287,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) savestate = lpcm_dec_state::queue_mutex_lock; queue_mutex_lock: - ensure(sys_mutex_lock(ppu, queue_mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (ppu.state & cpu_flag::again) { @@ -296,7 +296,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) cmd_queue.pop(cmd); - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); // Error code isn't checked on LLE cellAdec.trace("Command type: %d", static_cast(cmd.type.get())); @@ -307,7 +307,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) { case LpcmDecCmdType::start_seq: // LLE sends a command to the SPU thread. The SPU thread consumes the command without doing anything, however - error_occurred |= static_cast(sys_mutex_unlock(ppu, output_mutex) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex) != CELL_OK); break; case LpcmDecCmdType::end_seq: @@ -324,11 +324,11 @@ void LpcmDecContext::exec(ppu_thread& ppu) // Doesn't do anything else notify_seq_done.cbFunc(ppu, notify_seq_done.cbArg); - error_occurred |= static_cast(sys_mutex_unlock(ppu, output_mutex) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex) != CELL_OK); break; } case LpcmDecCmdType::close: - ensure(sys_mutex_unlock(ppu, output_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, output_mutex) == CELL_OK); // Error code isn't checked on LLE return; case LpcmDecCmdType::decode_au: @@ -685,7 +685,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) notify_au_done.cbFunc(ppu, cmd.pcm_handle, notify_au_done.cbArg); output_locked = true; - error_occurred |= static_cast(sys_mutex_unlock(ppu, output_mutex) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex) != CELL_OK); const vm::var bsi_info{{ lpcm_param->channelNumber, lpcm_param->sampleRate, static_cast(output_size) }}; @@ -703,14 +703,14 @@ error_code LpcmDecContext::send_command(ppu_thread& ppu, auto&&... args) { ppu.state += cpu_flag::wait; - if (error_code ret = sys_mutex_lock(ppu, queue_size_mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, queue_size_mutex, 0); ret != CELL_OK) { return ret; } if (cmd_queue.full()) { - ensure(sys_mutex_unlock(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_BUSY; } @@ -720,39 +720,39 @@ error_code LpcmDecContext::send_command(ppu_thread& ppu, auto&&... args) *lpcm_param = { args... }; } - if (error_code ret = sys_mutex_lock(ppu, queue_mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, queue_mutex, 0); ret != CELL_OK) { - ensure(sys_mutex_unlock(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } cmd_queue.emplace(type, std::forward(args)...); - if (error_code ret = sys_mutex_unlock(ppu, queue_mutex); ret != CELL_OK + if (error_code ret = lv2_syscall(ppu, queue_mutex); ret != CELL_OK || (ret = cmd_available.release(ppu)) != CELL_OK) { - ensure(sys_mutex_unlock(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } - return sys_mutex_unlock(ppu, queue_size_mutex); + return lv2_syscall(ppu, queue_size_mutex); } inline error_code LpcmDecContext::release_output(ppu_thread& ppu) { - if (error_code ret = sys_mutex_lock(ppu, output_mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, output_mutex, 0); ret != CELL_OK) { return ret; } output_locked = false; - if (error_code ret = sys_cond_signal(ppu, output_consumed); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, output_consumed); ret != CELL_OK) { return ret; // LLE doesn't unlock the mutex } - return sys_mutex_unlock(ppu, output_mutex); + return lv2_syscall(ppu, output_mutex); } void lpcmDecEntry(ppu_thread& ppu, vm::ptr lpcm_dec) @@ -820,13 +820,13 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr const vm::var queue_mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem06"_u64 } }}; const vm::var cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec03"_u64 } }}; - error_code ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_size_mutex), mutex_attr); - ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::queue_size_cond), handle->queue_size_mutex, cond_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::unk_mutex), mutex_attr); - ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::unk_cond), handle->unk_mutex, cond_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::output_mutex), output_mutex_attr); - ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::output_consumed), handle->output_mutex, cond_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_mutex), queue_mutex_attr); + error_code ret = lv2_syscall(ppu, handle.ptr(&LpcmDecContext::queue_size_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::queue_size_cond), handle->queue_size_mutex, cond_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::unk_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::unk_cond), handle->unk_mutex, cond_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::output_mutex), output_mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::output_consumed), handle->output_mutex, cond_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::queue_mutex), queue_mutex_attr); ret = ret ? ret : handle->release_output(ppu); ret = ret ? ret : handle->cmd_available.init(ppu, handle.ptr(&LpcmDecContext::cmd_available), 0); ret = ret ? ret : handle->reserved2.init(ppu, handle.ptr(&LpcmDecContext::reserved2), 0); @@ -844,8 +844,8 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr const auto entry = g_fxo->get().func_addr(FIND_FUNC(lpcmDecEntry)); ret = ppu_execute<&sys_ppu_thread_create>(ppu, handle.ptr(&LpcmDecContext::thread_id), entry, handle.addr(), +res->ppuThreadPriority, +res->ppuThreadStackSize, SYS_PPU_THREAD_CREATE_JOINABLE, +_name); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_pop_mutex), mutex_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_push_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::spurs_queue_pop_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::spurs_queue_push_mutex), mutex_attr); return ret; } @@ -865,8 +865,8 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr ha cellAdec.notice("_CellAdecCoreOpClose_lpcm(handle=*0x%x)", handle); - if (error_code ret = sys_mutex_lock(ppu, handle->queue_size_mutex, 0); ret != CELL_OK - || (ret = sys_mutex_lock(ppu, handle->queue_mutex, 0)) != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->queue_size_mutex, 0); ret != CELL_OK + || (ret = lv2_syscall(ppu, handle->queue_mutex, 0)) != CELL_OK) { return ret; } @@ -875,14 +875,14 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr ha { handle->cmd_queue.emplace(LpcmDecCmdType::close); - if (error_code ret = sys_mutex_unlock(ppu, handle->queue_mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->queue_mutex); ret != CELL_OK) { return ret; // LLE doesn't unlock the queue size mutex } if (error_code ret = handle->cmd_available.release(ppu); ret != CELL_OK) { - ensure(sys_mutex_unlock(ppu, handle->queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, handle->queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } } @@ -893,29 +893,29 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr ha cmd.type = LpcmDecCmdType::close; } - if (error_code ret = sys_mutex_unlock(ppu, handle->queue_mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->queue_mutex); ret != CELL_OK) { return ret; // LLE doesn't unlock the queue size mutex } } - error_code ret = sys_mutex_unlock(ppu, handle->queue_size_mutex); + error_code ret = lv2_syscall(ppu, handle->queue_size_mutex); ret = ret ? ret : handle->release_output(ppu); vm::var thread_ret; - ret = ret ? ret : sys_ppu_thread_join(ppu, static_cast(handle->thread_id), +thread_ret); + ret = ret ? ret : lv2_syscall(ppu, static_cast(handle->thread_id), +thread_ret); - ret = ret ? ret : sys_cond_destroy(ppu, handle->queue_size_cond); - ret = ret ? ret : sys_cond_destroy(ppu, handle->unk_cond); - ret = ret ? ret : sys_cond_destroy(ppu, handle->output_consumed); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_size_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->unk_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->output_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_size_cond); + ret = ret ? ret : lv2_syscall(ppu, handle->unk_cond); + ret = ret ? ret : lv2_syscall(ppu, handle->output_consumed); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_size_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->unk_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->output_mutex); ret = ret ? ret : handle->cmd_available.finalize(ppu); ret = ret ? ret : handle->reserved2.finalize(ppu); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->spurs_queue_pop_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->spurs_queue_push_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->spurs_queue_pop_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->spurs_queue_push_mutex); return ret; } @@ -1091,11 +1091,11 @@ error_code AdecContext::set_pcm_item(s32 pcm_handle, vm::ptr pcm_addr, u32 error_code AdecContext::link_frame(ppu_thread& ppu, s32 pcm_handle) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (verify_pcm_handle(pcm_handle) == static_cast(CELL_ADEC_ERROR_FATAL)) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } @@ -1115,27 +1115,27 @@ error_code AdecContext::link_frame(ppu_thread& ppu, s32 pcm_handle) } else { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_OK; } error_code AdecContext::unlink_frame(ppu_thread& ppu, s32 pcm_handle) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (verify_pcm_handle(pcm_handle) == static_cast(CELL_ADEC_ERROR_FATAL)) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } if (frames_head == -1 || frames_tail == -1) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } @@ -1146,7 +1146,7 @@ error_code AdecContext::unlink_frame(ppu_thread& ppu, s32 pcm_handle) { if (pcm_handle != frames_tail) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } @@ -1171,7 +1171,7 @@ error_code AdecContext::unlink_frame(ppu_thread& ppu, s32 pcm_handle) frames[prev].next = next; } - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_OK; } @@ -1557,7 +1557,7 @@ error_code adecOpen(ppu_thread& ppu, vm::ptr type, vm::cptr(ppu, _this.ptr(&AdecContext::mutex), _this.ptr(&AdecContext::mutex_attribute)) == CELL_OK); // Error code isn't checked on LLE *handle = _this; @@ -1626,7 +1626,7 @@ error_code cellAdecClose(ppu_thread& ppu, vm::ptr handle) return ret; } - if (error_code ret = sys_mutex_destroy(ppu, handle->mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->mutex); ret != CELL_OK) { return ret; } diff --git a/rpcs3/Emu/Cell/Modules/cellAdec.h b/rpcs3/Emu/Cell/Modules/cellAdec.h index a43c3f0aef..2216561516 100644 --- a/rpcs3/Emu/Cell/Modules/cellAdec.h +++ b/rpcs3/Emu/Cell/Modules/cellAdec.h @@ -485,6 +485,19 @@ struct AdecFrame CHECK_SIZE(AdecFrame, 0x68); +template +static auto lv2_syscall(ppu_thread& ppu, auto&&... args) +{ + const auto ret = Syscall(ppu, std::forward(args)...); + + if (ppu.test_stopped()) + { + ppu.state += cpu_flag::again; + } + + return ret; +} + class AdecOutputQueue { struct entry @@ -511,10 +524,10 @@ public: this->size = 0; const vm::var mutex_attr = {{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem07"_u64 } }}; - ensure(sys_mutex_create(ppu, _this.ptr(&AdecOutputQueue::mutex), mutex_attr) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, _this.ptr(&AdecOutputQueue::mutex), mutex_attr) == CELL_OK); // Error code isn't checked on LLE const vm::var cond_attr = {{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec05"_u64 } }}; - ensure(sys_cond_create(ppu, _this.ptr(&AdecOutputQueue::cond), mutex, cond_attr) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, _this.ptr(&AdecOutputQueue::cond), mutex, cond_attr) == CELL_OK); // Error code isn't checked on LLE for (s32 i = 0; i < 4; i++) { @@ -524,12 +537,12 @@ public: error_code finalize(ppu_thread& ppu) const { - if (error_code ret = sys_cond_destroy(ppu, cond); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond); ret != CELL_OK) { return ret; } - if (error_code ret = sys_mutex_destroy(ppu, mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, mutex); ret != CELL_OK) { return ret; } @@ -539,11 +552,11 @@ public: error_code push(ppu_thread& ppu, vm::ptr pcm_item, s32 pcm_handle) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (entries[back].state != 0xff) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return true; // LLE returns the result of the comparison above } @@ -554,13 +567,13 @@ public: back = (back + 1) & 3; size++; - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_OK; } const entry* pop(ppu_thread& ppu) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (ppu.state & cpu_flag::again) // Savestate was created while waiting on the mutex { @@ -569,7 +582,7 @@ public: if (entries[front].state == 0xff) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return nullptr; } @@ -581,15 +594,15 @@ public: front = (front + 1) & 3; size--; - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } const entry& peek(ppu_thread& ppu) const { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE const entry& ret = entries[front]; - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } }; @@ -708,39 +721,39 @@ public: const vm::var mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem01"_u64 } }}; const vm::var cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec01"_u64 } }}; - if (error_code ret = sys_mutex_create(ppu, _this.ptr(&LpcmDecSemaphore::mutex), mutex_attr); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, _this.ptr(&LpcmDecSemaphore::mutex), mutex_attr); ret != CELL_OK) { return ret; } - return sys_cond_create(ppu, _this.ptr(&LpcmDecSemaphore::cond), mutex, cond_attr); + return lv2_syscall(ppu, _this.ptr(&LpcmDecSemaphore::cond), mutex, cond_attr); } error_code finalize(ppu_thread& ppu) const { - if (error_code ret = sys_cond_destroy(ppu, cond); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond); ret != CELL_OK) { return ret; } - return sys_mutex_destroy(ppu, mutex); + return lv2_syscall(ppu, mutex); } error_code release(ppu_thread& ppu) { - if (error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, mutex, 0); ret != CELL_OK) { return ret; } value++; - if (error_code ret = sys_cond_signal(ppu, cond); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond); ret != CELL_OK) { return ret; // LLE doesn't unlock the mutex } - return sys_mutex_unlock(ppu, mutex); + return lv2_syscall(ppu, mutex); } error_code acquire(ppu_thread& ppu, lpcm_dec_state& savestate) @@ -752,7 +765,7 @@ public: savestate = lpcm_dec_state::waiting_for_cmd_mutex_lock; - if (error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, mutex, 0); ret != CELL_OK) { return ret; } @@ -767,7 +780,7 @@ public: savestate = lpcm_dec_state::waiting_for_cmd_cond_wait; cond_wait: - if (error_code ret = sys_cond_wait(ppu, cond, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond, 0); ret != CELL_OK) { return ret; // LLE doesn't unlock the mutex } @@ -780,7 +793,7 @@ public: value--; - return sys_mutex_unlock(ppu, mutex); + return lv2_syscall(ppu, mutex); } }; diff --git a/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp b/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp index c55cf7b60f..fb3191bcb4 100644 --- a/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp @@ -295,7 +295,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) { savestate = atracxdec_state::initial; - ensure(sys_mutex_lock(ppu, queue_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -310,24 +310,24 @@ void AtracXdecContext::exec(ppu_thread& ppu) savestate = atracxdec_state::waiting_for_cmd; label1_wait_for_cmd_state: - ensure(sys_cond_wait(ppu, queue_not_empty, 0) == CELL_OK); + ensure(lv2_syscall(ppu, queue_not_empty, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { return; } - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); } cmd_queue.pop(cmd); - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); savestate = atracxdec_state::checking_run_thread_1; label2_check_run_thread_1_state: - ensure(sys_mutex_lock(ppu, run_thread_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -336,11 +336,11 @@ void AtracXdecContext::exec(ppu_thread& ppu) if (!run_thread) { - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); return; } - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); savestate = atracxdec_state::executing_cmd; label3_execute_cmd_state: @@ -392,7 +392,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) cellAtracXdec.trace("Waiting for output to be consumed..."); - ensure(sys_mutex_lock(ppu, output_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, output_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -404,7 +404,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) savestate = atracxdec_state::waiting_for_output; label4_wait_for_output_state: - ensure(sys_cond_wait(ppu, output_consumed, 0) == CELL_OK); + ensure(lv2_syscall(ppu, output_consumed, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -417,7 +417,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) savestate = atracxdec_state::checking_run_thread_2; label5_check_run_thread_2_state: - ensure(sys_mutex_lock(ppu, run_thread_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -426,12 +426,12 @@ void AtracXdecContext::exec(ppu_thread& ppu) if (!run_thread) { - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); - ensure(sys_mutex_unlock(ppu, output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, output_mutex) == CELL_OK); return; } - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); savestate = atracxdec_state::decoding; label6_decode_state: @@ -645,7 +645,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) notify_au_done.cbFunc(ppu, cmd.pcm_handle, notify_au_done.cbArg); output_locked = true; - ensure(sys_mutex_unlock(ppu, output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, output_mutex) == CELL_OK); const u32 output_size = decoded_samples_num * (decoder.bw_pcm & 0x7fu) * decoder.nch_out; @@ -680,7 +680,7 @@ error_code AtracXdecContext::send_command(ppu_thread& ppu, auto&&... args) if (!signal) { - ensure(sys_mutex_lock(ppu, queue_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -692,23 +692,23 @@ error_code AtracXdecContext::send_command(ppu_thread& ppu, auto&&... args) // Close command is only sent if the queue is empty on LLE if (!cmd_queue.empty()) { - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); return {}; } } if (cmd_queue.full()) { - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); return CELL_ADEC_ERROR_ATX_BUSY; } cmd_queue.emplace(std::forward(type), std::forward(args)...); - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); } - ensure(sys_cond_signal(ppu, queue_not_empty) == CELL_OK); + ensure(lv2_syscall(ppu, queue_not_empty) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -779,25 +779,25 @@ error_code _CellAdecCoreOpOpenExt_atracx(ppu_thread& ppu, vm::ptr mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_atd001"_u64 } }}; const vm::var cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_atd002"_u64 } }}; - ensure(sys_mutex_create(ppu, handle.ptr(&AtracXdecContext::queue_mutex), mutex_attr) == CELL_OK); - ensure(sys_cond_create(ppu, handle.ptr(&AtracXdecContext::queue_not_empty), handle->queue_mutex, cond_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::queue_mutex), mutex_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::queue_not_empty), handle->queue_mutex, cond_attr) == CELL_OK); mutex_attr->name_u64 = "_atd003"_u64; cond_attr->name_u64 = "_atd004"_u64; - ensure(sys_mutex_create(ppu, handle.ptr(&AtracXdecContext::run_thread_mutex), mutex_attr) == CELL_OK); - ensure(sys_cond_create(ppu, handle.ptr(&AtracXdecContext::run_thread_cond), handle->run_thread_mutex, cond_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::run_thread_mutex), mutex_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::run_thread_cond), handle->run_thread_mutex, cond_attr) == CELL_OK); mutex_attr->name_u64 = "_atd005"_u64; cond_attr->name_u64 = "_atd006"_u64; - ensure(sys_mutex_create(ppu, handle.ptr(&AtracXdecContext::output_mutex), mutex_attr) == CELL_OK); - ensure(sys_cond_create(ppu, handle.ptr(&AtracXdecContext::output_consumed), handle->output_mutex, cond_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::output_mutex), mutex_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::output_consumed), handle->output_mutex, cond_attr) == CELL_OK); - ensure(sys_mutex_lock(ppu, handle->output_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex, 0) == CELL_OK); handle->output_locked = false; - ensure(sys_cond_signal(ppu, handle->output_consumed) == CELL_OK); - ensure(sys_mutex_unlock(ppu, handle->output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_consumed) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex) == CELL_OK); const vm::var _name = vm::make_str("HLE ATRAC3plus decoder"); const auto entry = g_fxo->get().func_addr(FIND_FUNC(atracXdecEntry)); @@ -829,26 +829,26 @@ error_code _CellAdecCoreOpClose_atracx(ppu_thread& ppu, vm::ptrrun_thread_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->run_thread_mutex, 0) == CELL_OK); handle->run_thread = false; - ensure(sys_mutex_unlock(ppu, handle->run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->run_thread_mutex) == CELL_OK); handle->send_command(ppu); - ensure(sys_mutex_lock(ppu, handle->output_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex, 0) == CELL_OK); handle->output_locked = false; - ensure(sys_mutex_unlock(ppu, handle->output_mutex) == CELL_OK); - ensure(sys_cond_signal(ppu, handle->output_consumed) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_consumed) == CELL_OK); vm::var thread_ret; - ensure(sys_ppu_thread_join(ppu, static_cast(handle->thread_id), +thread_ret) == CELL_OK); + ensure(lv2_syscall(ppu, static_cast(handle->thread_id), +thread_ret) == CELL_OK); - error_code ret = sys_cond_destroy(ppu, handle->queue_not_empty); - ret = ret ? ret : sys_cond_destroy(ppu, handle->run_thread_cond); - ret = ret ? ret : sys_cond_destroy(ppu, handle->output_consumed); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->run_thread_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->output_mutex); + error_code ret = lv2_syscall(ppu, handle->queue_not_empty); + ret = ret ? ret : lv2_syscall(ppu, handle->run_thread_cond); + ret = ret ? ret : lv2_syscall(ppu, handle->output_consumed); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->run_thread_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->output_mutex); return ret != CELL_OK ? static_cast(CELL_ADEC_ERROR_FATAL) : CELL_OK; } @@ -921,7 +921,7 @@ error_code _CellAdecCoreOpReleasePcm_atracx(ppu_thread& ppu, vm::ptroutput_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -931,7 +931,7 @@ error_code _CellAdecCoreOpReleasePcm_atracx(ppu_thread& ppu, vm::ptroutput_locked = false; } - ensure(sys_cond_signal(ppu, handle->output_consumed) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_consumed) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -939,7 +939,7 @@ error_code _CellAdecCoreOpReleasePcm_atracx(ppu_thread& ppu, vm::ptroutput_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex) == CELL_OK); return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellAvconfExt.cpp b/rpcs3/Emu/Cell/Modules/cellAvconfExt.cpp index 4851895537..f57e51d2da 100644 --- a/rpcs3/Emu/Cell/Modules/cellAvconfExt.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAvconfExt.cpp @@ -524,19 +524,11 @@ error_code cellVideoOutGetScreenSize(u32 videoOut, vm::ptr screenSize) { // Return Playstation 3D display value // Some games call this function when 3D is enabled - *screenSize = 24.f; + *screenSize = static_cast(g_cfg.video.screen_size.get()); return CELL_OK; } - // TODO: Use virtual screen size -#ifdef _WIN32 - // HDC screen = GetDC(NULL); - // float diagonal = roundf(sqrtf((powf(float(GetDeviceCaps(screen, HORZSIZE)), 2) + powf(float(GetDeviceCaps(screen, VERTSIZE)), 2))) * 0.0393f); -#else - // TODO: Linux implementation, without using wx - // float diagonal = roundf(sqrtf((powf(wxGetDisplaySizeMM().GetWidth(), 2) + powf(wxGetDisplaySizeMM().GetHeight(), 2))) * 0.0393f); -#endif - + // Let's just return not set for now return CELL_VIDEO_OUT_ERROR_VALUE_IS_NOT_SET; } diff --git a/rpcs3/Emu/Cell/Modules/cellCamera.cpp b/rpcs3/Emu/Cell/Modules/cellCamera.cpp index 096f9330f4..10f9a89cf0 100644 --- a/rpcs3/Emu/Cell/Modules/cellCamera.cpp +++ b/rpcs3/Emu/Cell/Modules/cellCamera.cpp @@ -919,7 +919,7 @@ error_code cellCameraGetAttribute(s32 dev_num, s32 attrib, vm::ptr arg1, vm if (!check_dev_num(dev_num)) { - return CELL_CAMERA_ERROR_PARAM; + return { CELL_CAMERA_ERROR_PARAM, "dev_num=%d", dev_num }; } if (g_cfg.io.camera == camera_handler::null) @@ -935,7 +935,7 @@ error_code cellCameraGetAttribute(s32 dev_num, s32 attrib, vm::ptr arg1, vm if (!arg1) { - return CELL_CAMERA_ERROR_PARAM; + return { CELL_CAMERA_ERROR_PARAM, "arg1=null" }; } if (error_code error = check_resolution(dev_num)) @@ -952,7 +952,7 @@ error_code cellCameraGetAttribute(s32 dev_num, s32 attrib, vm::ptr arg1, vm if (!attr_name) // invalid attributes don't have a name { - return CELL_CAMERA_ERROR_PARAM; + return { CELL_CAMERA_ERROR_PARAM, "attrib=0x%x", attrib }; } if (arg1) @@ -983,7 +983,7 @@ error_code cellCameraSetAttribute(s32 dev_num, s32 attrib, u32 arg1, u32 arg2) if (!check_dev_num(dev_num)) { - return CELL_CAMERA_ERROR_PARAM; + return { CELL_CAMERA_ERROR_PARAM, "dev_num=%d", dev_num }; } if (g_cfg.io.camera == camera_handler::null) @@ -1004,7 +1004,7 @@ error_code cellCameraSetAttribute(s32 dev_num, s32 attrib, u32 arg1, u32 arg2) if (!attr_name) // invalid attributes don't have a name { - return CELL_CAMERA_ERROR_PARAM; + return { CELL_CAMERA_ERROR_PARAM, "attrib=0x%x", attrib }; } g_camera.set_attr(attrib, arg1, arg2); diff --git a/rpcs3/Emu/Cell/Modules/cellDmux.cpp b/rpcs3/Emu/Cell/Modules/cellDmux.cpp index d7f6f84f3f..7c2fe6f1a9 100644 --- a/rpcs3/Emu/Cell/Modules/cellDmux.cpp +++ b/rpcs3/Emu/Cell/Modules/cellDmux.cpp @@ -1,16 +1,14 @@ #include "stdafx.h" -#include "Emu/System.h" -#include "Emu/IdManager.h" -#include "Emu/Cell/PPUModule.h" +#include "Emu/Cell/lv2/sys_mutex.h" #include "Emu/Cell/lv2/sys_sync.h" +#include "Emu/Cell/lv2/sys_timer.h" +#include "Emu/Cell/PPUModule.h" +#include "Emu/savestate_utils.hpp" +#include "util/asm.hpp" #include "cellPamf.h" #include "cellDmux.h" -#include "util/asm.hpp" - -#include - LOG_CHANNEL(cellDmux); template <> @@ -31,1331 +29,1209 @@ void fmt_class_string::format(std::string& out, u64 arg) }); } -/* Demuxer Thread Classes */ - -enum +static error_code get_error(u32 internal_error) { - /* http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html */ - - PACKET_START_CODE_MASK = 0xffffff00, - PACKET_START_CODE_PREFIX = 0x00000100, - - PACK_START_CODE = 0x000001ba, - SYSTEM_HEADER_START_CODE = 0x000001bb, - PRIVATE_STREAM_1 = 0x000001bd, - PADDING_STREAM = 0x000001be, - PRIVATE_STREAM_2 = 0x000001bf, -}; - -struct DemuxerStream -{ - u32 addr; - u32 size; - u64 userdata; - bool discontinuity; - - template - bool get(T& out) + switch (internal_error) { - if (sizeof(T) > size) return false; + case 0: return CELL_OK; + case 1: return CELL_DMUX_ERROR_FATAL; + case 2: // Error values two to five are all converted to CELL_DMUX_ERROR_ARG. + case 3: + case 4: + case 5: return CELL_DMUX_ERROR_ARG; + default: return CELL_DMUX_ERROR_FATAL; + } +} - std::memcpy(&out, vm::base(addr), sizeof(T)); - addr += sizeof(T); - size -= sizeof(T); +static inline std::span> get_es_handles(vm::ptr handle) +{ + return { vm::pptr::make(handle.addr() + sizeof(DmuxContext)).get_ptr(), static_cast(handle->enabled_es_num) }; +} - return true; +static inline vm::ptr get_au_queue_elements(vm::ptr es_handle) +{ + return vm::ptr::make(es_handle.addr() + sizeof(DmuxEsContext)); +} + +static inline vm::cptr get_core_ops() +{ + return vm::cptr::make(*ppu_module_manager::cellDmuxPamf.variables.find(0x28b2b7b2)->second.export_addr); +} + +template +static auto lv2_syscall(ppu_thread& ppu, auto&&... args) +{ + const auto ret = Syscall(ppu, std::forward(args)...); + + if (ppu.test_stopped()) + { + ppu.state += cpu_flag::again; } - template - bool peek(T& out, u32 shift = 0) - { - if (sizeof(T) + shift > size) return false; + return ret; +} - std::memcpy(&out, vm::base(addr + shift), sizeof(T)); - return true; - } +// Callbacks for cellDmuxPamf - void skip(u32 count) - { - addr += count; - size = size > count ? size - count : 0; - } - - bool check(u32 count) const - { - return count <= size; - } - - u64 get_ts(u8 c) - { - u8 v[4]; get(v); - return - ((u64{c} & 0x0e) << 29) | - ((u64{v[0]}) << 21) | - ((u64{v[1]} & 0x7e) << 15) | - ((u64{v[2]}) << 7) | (u64{v[3]} >> 1); - } -}; - -struct PesHeader +static error_code notify_demux_done(ppu_thread& ppu, vm::ptr core_handle, u32 error, vm::ptr handle) { - u64 pts; - u64 dts; - u8 size; - bool has_ts; - bool is_ok; + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - PesHeader(DemuxerStream& stream); -}; - -class ElementaryStream; -class Demuxer; - -enum DemuxerJobType -{ - dmuxSetStream, - dmuxResetStream, - dmuxResetStreamAndWaitDone, - dmuxEnableEs, - dmuxDisableEs, - dmuxResetEs, - dmuxFlushEs, - dmuxClose, -}; - -struct DemuxerTask -{ - DemuxerJobType type; - - union + if (!savestate_lock) { - DemuxerStream stream; + ppu.state += cpu_flag::again; + return {}; + } - struct + cellDmux.trace("notify_demux_done(core_handle=*0x%x, error=%d, handle=*0x%x)", core_handle, error, handle); + + ensure(!!handle); // Not checked on LLE + + ensure(lv2_syscall(ppu, handle->_dx_mhd, 0) == CELL_OK); // Failing this check on LLE would result in it dereferencing an invalid pointer. + handle->dmux_state = DMUX_STOPPED; + ensure(lv2_syscall(ppu, handle->_dx_mhd) == CELL_OK); // Failing this check on LLE would result in it dereferencing an invalid pointer. + + if (handle->_this) + { + const vm::var msg{{ .msgType = CELL_DMUX_MSG_TYPE_DEMUX_DONE, .supplementalInfo = handle->user_data }}; + handle->dmux_cb.cbFunc(ppu, handle, msg, handle->dmux_cb.cbArg); + } + + return CELL_OK; +} + +static error_code notify_fatal_err(ppu_thread& ppu, vm::ptr core_handle, u32 error, vm::ptr handle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.error("notify_fatal_err(core_handle=*0x%x, error=%d, handle=*0x%x)", core_handle, error, handle); + + ensure(!!handle); // Not checked on LLE + + const vm::var msg{{ .msgType = CELL_DMUX_MSG_TYPE_FATAL_ERR, .supplementalInfo = static_cast(get_error(error)) }}; + return handle->dmux_cb.cbFunc(ppu, handle, msg, handle->dmux_cb.cbArg); +} + +static error_code notify_prog_end_code(ppu_thread& ppu, vm::ptr core_handle, vm::ptr handle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("notify_prog_end_code(core_handle=*0x%x, handle=*0x%x)", core_handle, handle); + + ensure(!!handle); // Not checked on LLE + + if (handle->_this) + { + const vm::var msg{{ .msgType = CELL_DMUX_MSG_TYPE_PROG_END_CODE, .supplementalInfo = handle->user_data }}; + handle->dmux_cb.cbFunc(ppu, handle, msg, handle->dmux_cb.cbArg); + } + + return CELL_OK; +} + +static error_code notify_es_au_found(ppu_thread& ppu, vm::ptr core_es_handle, vm::cptr au_info, vm::ptr es_handle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.trace("notify_es_au_found(core_es_handle=*0x%x, au_info=*0x%x, es_handle=*0x%x)", core_es_handle, au_info, es_handle); + + ensure(!!au_info && !!es_handle); // Not checked on LLE + + const auto fatal_err = [&](be_t es_is_enabled, error_code ret) + { + if (es_is_enabled) { - u32 es; - u32 auInfo_ptr_addr; - u32 auSpec_ptr_addr; - ElementaryStream* es_ptr; - } es; + const vm::var demuxerMsg{{ .msgType = CELL_DMUX_MSG_TYPE_FATAL_ERR, .supplementalInfo = static_cast(ret) }}; + es_handle->dmux_handle->dmux_cb.cbFunc(ppu, es_handle->dmux_handle, demuxerMsg, es_handle->dmux_handle->dmux_cb.cbArg); + } }; - DemuxerTask() - { - } - - DemuxerTask(DemuxerJobType type) - : type(type) - { - } -}; - -class ElementaryStream -{ - std::mutex m_mutex; - - squeue_t entries; // AU starting addresses - u32 put_count = 0; // number of AU written - u32 got_count = 0; // number of AU obtained by GetAu(Ex) - u32 released = 0; // number of AU released - - u32 put; // AU that is being written now - - bool is_full(u32 space); - -public: - static const u32 id_base = 1; - static const u32 id_step = 1; - static const u32 id_count = 1023; - SAVESTATE_INIT_POS(34); - - ElementaryStream(Demuxer* dmux, u32 addr, u32 size, u32 fidMajor, u32 fidMinor, u32 sup1, u32 sup2, vm::ptr cbFunc, u32 cbArg, u32 spec); - - Demuxer* dmux; - const u32 id = idm::last_id(); - const u32 memAddr; - const u32 memSize; - const u32 fidMajor; - const u32 fidMinor; - const u32 sup1; - const u32 sup2; - const vm::ptr cbFunc; - const u32 cbArg; - const u32 spec; //addr - - std::vector raw_data; // demultiplexed data stream (managed by demuxer thread) - usz raw_pos = 0; // should be <= raw_data.size() - u64 last_dts = CODEC_TS_INVALID; - u64 last_pts = CODEC_TS_INVALID; - - void push(DemuxerStream& stream, u32 size); // called by demuxer thread (not multithread-safe) - - bool isfull(u32 space); - - void push_au(u32 size, u64 dts, u64 pts, u64 userdata, bool rap, u32 specific); - - bool release(); - - bool peek(u32& out_data, bool no_ex, u32& out_spec, bool update_index); - - void reset(); -}; - -class Demuxer : public ppu_thread -{ -public: - squeue_t job; - const u32 memAddr; - const u32 memSize; - const vm::ptr cbFunc; - const u32 cbArg; - volatile bool is_finished = false; - volatile bool is_closed = false; - atomic_t is_running = false; - atomic_t is_working = false; - - Demuxer(u32 addr, u32 size, vm::ptr func, u32 arg) - : ppu_thread({}, "", 0) - , memAddr(addr) - , memSize(size) - , cbFunc(func) - , cbArg(arg) - { - } - - void non_task() - { - DemuxerTask task; - DemuxerStream stream = {}; - ElementaryStream* esALL[96]{}; - ElementaryStream** esAVC = &esALL[0]; // AVC (max 16 minus M2V count) - //ElementaryStream** esM2V = &esALL[16]; // M2V (max 16 minus AVC count) - //ElementaryStream** esDATA = &esALL[32]; // user data (max 16) - ElementaryStream** esATX = &esALL[48]; // ATRAC3+ (max 16) - //ElementaryStream** esAC3 = &esALL[64]; // AC3 (max 16) - //ElementaryStream** esPCM = &esALL[80]; // LPCM (max 16) - - u32 cb_add = 0; - - while (true) - { - if (Emu.IsStopped() || is_closed) - { - break; - } - - if (!job.try_peek(task) && is_running && stream.addr) - { - // default task (demuxing) (if there is no other work) - be_t code; - be_t len; - - if (!stream.peek(code)) - { - // demuxing finished - is_running = false; - - // callback - auto dmuxMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - dmuxMsg->msgType = CELL_DMUX_MSG_TYPE_DEMUX_DONE; - dmuxMsg->supplementalInfo = stream.userdata; - cbFunc(*this, id, dmuxMsg, cbArg); - lv2_obj::sleep(*this); - - is_working = false; - - stream = {}; - - continue; - } - - switch (code) - { - case PACK_START_CODE: - { - if (!stream.check(14)) - { - fmt::throw_exception("End of stream (PACK_START_CODE)"); - } - stream.skip(14); - break; - } - - case SYSTEM_HEADER_START_CODE: - { - if (!stream.check(18)) - { - fmt::throw_exception("End of stream (SYSTEM_HEADER_START_CODE)"); - } - stream.skip(18); - break; - } - - case PADDING_STREAM: - { - if (!stream.check(6)) - { - fmt::throw_exception("End of stream (PADDING_STREAM)"); - } - stream.skip(4); - stream.get(len); - - if (!stream.check(len)) - { - fmt::throw_exception("End of stream (PADDING_STREAM, len=%d)", len); - } - stream.skip(len); - break; - } - - case PRIVATE_STREAM_2: - { - if (!stream.check(6)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM_2)"); - } - stream.skip(4); - stream.get(len); - - cellDmux.notice("PRIVATE_STREAM_2 (%d)", len); - - if (!stream.check(len)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM_2, len=%d)", len); - } - stream.skip(len); - break; - } - - case PRIVATE_STREAM_1: - { - // audio and user data stream - DemuxerStream backup = stream; - - if (!stream.check(6)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM_1)"); - } - stream.skip(4); - stream.get(len); - - if (!stream.check(len)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM_1, len=%d)", len); - } - - const PesHeader pes(stream); - if (!pes.is_ok) - { - fmt::throw_exception("PesHeader error (PRIVATE_STREAM_1, len=%d)", len); - } - - if (len < pes.size + 4) - { - fmt::throw_exception("End of block (PRIVATE_STREAM_1, PesHeader + fid_minor, len=%d)", len); - } - len -= pes.size + 4; - - u8 fid_minor; - if (!stream.get(fid_minor)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM1, fid_minor)"); - } - - const u32 ch = fid_minor % 16; - if ((fid_minor & -0x10) == 0 && esATX[ch]) - { - ElementaryStream& es = *esATX[ch]; - if (es.raw_data.size() > 1024 * 1024) - { - stream = backup; - std::this_thread::sleep_for(1ms); // hack - continue; - } - - if (len < 3 || !stream.check(3)) - { - fmt::throw_exception("End of block (ATX, unknown header, len=%d)", len); - } - len -= 3; - stream.skip(3); - - if (pes.has_ts) - { - es.last_dts = pes.dts; - es.last_pts = pes.pts; - } - - es.push(stream, len); - - while (true) - { - auto const size = es.raw_data.size() - es.raw_pos; // size of available new data - auto const data = es.raw_data.data() + es.raw_pos; // pointer to available data - - if (size < 8) break; // skip if cannot read ATS header - - if (data[0] != 0x0f || data[1] != 0xd0) - { - fmt::throw_exception("ATX: 0x0fd0 header not found (ats=0x%llx)", *reinterpret_cast*>(data)); - } - - u32 frame_size = (((u32{data[2]} & 0x3) << 8) | u32{data[3]}) * 8 + 8; - - if (size < frame_size + 8) break; // skip non-complete AU - - if (es.isfull(frame_size + 8)) break; // skip if cannot push AU - - es.push_au(frame_size + 8, es.last_dts, es.last_pts, stream.userdata, false /* TODO: set correct value */, 0); - - //cellDmux.notice("ATX AU pushed (ats=0x%llx, frame_size=%d)", *(be_t*)data, frame_size); - - auto esMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND; - esMsg->supplementalInfo = stream.userdata; - es.cbFunc(*this, id, es.id, esMsg, es.cbArg); - lv2_obj::sleep(*this); - } - } - else - { - cellDmux.notice("PRIVATE_STREAM_1 (len=%d, fid_minor=0x%x)", len, fid_minor); - stream.skip(len); - } - break; - } - - case 0x1e0: case 0x1e1: case 0x1e2: case 0x1e3: - case 0x1e4: case 0x1e5: case 0x1e6: case 0x1e7: - case 0x1e8: case 0x1e9: case 0x1ea: case 0x1eb: - case 0x1ec: case 0x1ed: case 0x1ee: case 0x1ef: - { - // video stream (AVC or M2V) - DemuxerStream backup = stream; - - if (!stream.check(6)) - { - fmt::throw_exception("End of stream (video, code=0x%x)", code); - } - stream.skip(4); - stream.get(len); - - if (!stream.check(len)) - { - fmt::throw_exception("End of stream (video, code=0x%x, len=%d)", code, len); - } - - const PesHeader pes(stream); - if (!pes.is_ok) - { - fmt::throw_exception("PesHeader error (video, code=0x%x, len=%d)", code, len); - } - - if (len < pes.size + 3) - { - fmt::throw_exception("End of block (video, code=0x%x, PesHeader)", code); - } - len -= pes.size + 3; - - const u32 ch = code % 16; - if (esAVC[ch]) - { - ElementaryStream& es = *esAVC[ch]; - - const u32 old_size = ::size32(es.raw_data); - if (es.isfull(old_size)) - { - stream = backup; - std::this_thread::sleep_for(1ms); // hack - continue; - } - - if ((pes.has_ts && old_size) || old_size >= 0x69800) - { - // push AU if it becomes too big or the next packet contains PTS/DTS - es.push_au(old_size, es.last_dts, es.last_pts, stream.userdata, false /* TODO: set correct value */, 0); - - // callback - auto esMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND; - esMsg->supplementalInfo = stream.userdata; - es.cbFunc(*this, id, es.id, esMsg, es.cbArg); - lv2_obj::sleep(*this); - } - - if (pes.has_ts) - { - // preserve dts/pts for next AU - es.last_dts = pes.dts; - es.last_pts = pes.pts; - } - - // reconstruction of MPEG2-PS stream for vdec module - const u32 size = len + pes.size + 9; - stream = backup; - es.push(stream, size); - } - else - { - cellDmux.notice("Video stream (code=0x%x, len=%d)", code, len); - stream.skip(len); - } - break; - } - - default: - { - if ((code & PACKET_START_CODE_MASK) == PACKET_START_CODE_PREFIX) - { - fmt::throw_exception("Unknown code found (0x%x)", code); - } - - // search - stream.skip(1); - } - } - - continue; - } - - // wait for task if no work - if (!job.pop(task, &is_closed)) - { - break; // Emu is stopped - } - - switch (task.type) - { - case dmuxSetStream: - { - if (task.stream.discontinuity) - { - cellDmux.warning("dmuxSetStream (beginning)"); - for (u32 i = 0; i < std::size(esALL); i++) - { - if (esALL[i]) - { - esALL[i]->reset(); - } - } - } - - stream = task.stream; - //cellDmux.notice("*** stream updated(addr=0x%x, size=0x%x, discont=%d, userdata=0x%llx)", - //stream.addr, stream.size, stream.discontinuity, stream.userdata); - break; - } - - case dmuxResetStream: - case dmuxResetStreamAndWaitDone: - { - // demuxing stopped - if (is_running.exchange(false)) - { - // callback - auto dmuxMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - dmuxMsg->msgType = CELL_DMUX_MSG_TYPE_DEMUX_DONE; - dmuxMsg->supplementalInfo = stream.userdata; - cbFunc(*this, id, dmuxMsg, cbArg); - lv2_obj::sleep(*this); - - stream = {}; - - is_working = false; - } - - break; - } - - case dmuxEnableEs: - { - ElementaryStream& es = *task.es.es_ptr; - - // TODO: uncomment when ready to use - //if ((es.fidMajor & -0x10) == 0xe0 && es.fidMinor == 0 && es.sup1 == 1 && !es.sup2) - //{ - // esAVC[es.fidMajor % 16] = task.es.es_ptr; - //} - //else if ((es.fidMajor & -0x10) == 0xe0 && es.fidMinor == 0 && !es.sup1 && !es.sup2) - //{ - // esM2V[es.fidMajor % 16] = task.es.es_ptr; - //} - //else if (es.fidMajor == 0xbd && (es.fidMinor & -0x10) == 0 && !es.sup1 && !es.sup2) - //{ - // esATX[es.fidMinor % 16] = task.es.es_ptr; - //} - //else if (es.fidMajor == 0xbd && (es.fidMinor & -0x10) == 0x20 && !es.sup1 && !es.sup2) - //{ - // esDATA[es.fidMinor % 16] = task.es.es_ptr; - //} - //else if (es.fidMajor == 0xbd && (es.fidMinor & -0x10) == 0x30 && !es.sup1 && !es.sup2) - //{ - // esAC3[es.fidMinor % 16] = task.es.es_ptr; - //} - //else if (es.fidMajor == 0xbd && (es.fidMinor & -0x10) == 0x40 && !es.sup1 && !es.sup2) - //{ - // esPCM[es.fidMinor % 16] = task.es.es_ptr; - //} - //else - { - fmt::throw_exception("dmuxEnableEs: unknown filter (0x%x, 0x%x, 0x%x, 0x%x)", es.fidMajor, es.fidMinor, es.sup1, es.sup2); - } - es.dmux = this; - break; - } - - case dmuxDisableEs: - { - ElementaryStream& es = *task.es.es_ptr; - if (es.dmux != this) - { - fmt::throw_exception("dmuxDisableEs: invalid elementary stream"); - } - - for (u32 i = 0; i < std::size(esALL); i++) - { - if (esALL[i] == &es) - { - esALL[i] = nullptr; - } - } - es.dmux = nullptr; - idm::remove(task.es.es); - break; - } - - case dmuxFlushEs: - { - ElementaryStream& es = *task.es.es_ptr; - - const u32 old_size = ::size32(es.raw_data); - if (old_size && (es.fidMajor & -0x10) == 0xe0) - { - // TODO (it's only for AVC, some ATX data may be lost) - while (es.isfull(old_size)) - { - if (Emu.IsStopped() || is_closed) break; - - std::this_thread::sleep_for(1ms); // hack - } - - es.push_au(old_size, es.last_dts, es.last_pts, stream.userdata, false, 0); - - // callback - auto esMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND; - esMsg->supplementalInfo = stream.userdata; - es.cbFunc(*this, id, es.id, esMsg, es.cbArg); - lv2_obj::sleep(*this); - } - - if (!es.raw_data.empty()) - { - cellDmux.error("dmuxFlushEs: 0x%x bytes lost (es_id=%d)", ::size32(es.raw_data), es.id); - } - - // callback - auto esMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_FLUSH_DONE; - esMsg->supplementalInfo = stream.userdata; - es.cbFunc(*this, id, es.id, esMsg, es.cbArg); - lv2_obj::sleep(*this); - break; - } - - case dmuxResetEs: - { - task.es.es_ptr->reset(); - break; - } - - case dmuxClose: - { - break; - } - - default: - { - fmt::throw_exception("Demuxer thread error: unknown task (0x%x)", +task.type); - } - } - } - - is_finished = true; - } -}; - - -PesHeader::PesHeader(DemuxerStream& stream) - : pts(CODEC_TS_INVALID) - , dts(CODEC_TS_INVALID) - , size(0) - , has_ts(false) - , is_ok(false) -{ - u16 header; - if (!stream.get(header)) - { - fmt::throw_exception("End of stream (header)"); - } - if (!stream.get(size)) - { - fmt::throw_exception("End of stream (size)"); - } - if (!stream.check(size)) - { - fmt::throw_exception("End of stream (size=%d)", size); - } - - u8 pos = 0; - while (pos++ < size) - { - u8 v; - if (!stream.get(v)) - { - return; // should never occur - } - - if (v == 0xff) // skip padding bytes - { - continue; - } - - if ((v & 0xf0) == 0x20 && (size - pos) >= 4) // pts only - { - pos += 4; - pts = stream.get_ts(v); - has_ts = true; - } - else if ((v & 0xf0) == 0x30 && (size - pos) >= 9) // pts and dts - { - pos += 5; - pts = stream.get_ts(v); - stream.get(v); - has_ts = true; - - if ((v & 0xf0) != 0x10) - { - cellDmux.error("PesHeader(): dts not found (v=0x%x, size=%d, pos=%d)", v, size, pos - 1); - stream.skip(size - pos); - return; - } - pos += 4; - dts = stream.get_ts(v); - } - else - { - cellDmux.warning("PesHeader(): unknown code (v=0x%x, size=%d, pos=%d)", v, size, pos - 1); - stream.skip(size - pos); - pos = size; - break; - } - } - - is_ok = true; -} - -ElementaryStream::ElementaryStream(Demuxer* dmux, u32 addr, u32 size, u32 fidMajor, u32 fidMinor, u32 sup1, u32 sup2, vm::ptr cbFunc, u32 cbArg, u32 spec) - : put(utils::align(addr, 128)) - , dmux(dmux) - , memAddr(utils::align(addr, 128)) - , memSize(size - (addr - memAddr)) - , fidMajor(fidMajor) - , fidMinor(fidMinor) - , sup1(sup1) - , sup2(sup2) - , cbFunc(cbFunc) - , cbArg(cbArg) - , spec(spec) -{ -} - -bool ElementaryStream::is_full(u32 space) -{ - if (released < put_count) - { - if (entries.is_full()) - { - return true; - } - - u32 first = 0; - if (!entries.peek(first, 0, &dmux->is_closed) || !first) - { - fmt::throw_exception("entries.peek() failed"); - } - else if (first >= put) - { - return first - put < space + 128; - } - else if (put + space + 128 > memAddr + memSize) - { - return first - memAddr < space + 128; - } - else - { - return false; - } - } - else - { - return false; - } -} - -bool ElementaryStream::isfull(u32 space) -{ - std::lock_guard lock(m_mutex); - return is_full(space); -} - -void ElementaryStream::push_au(u32 size, u64 dts, u64 pts, u64 userdata, bool rap, u32 specific) -{ - u32 addr; - { - std::lock_guard lock(m_mutex); - ensure(!is_full(size)); - - if (put + size + 128 > memAddr + memSize) - { - put = memAddr; - } - - std::memcpy(vm::base(put + 128), raw_data.data(), size); - raw_data.erase(raw_data.begin(), raw_data.begin() + size); - - auto info = vm::ptr::make(put); - info->auAddr = put + 128; - info->auSize = size; - info->dts.lower = static_cast(dts); - info->dts.upper = static_cast(dts >> 32); - info->pts.lower = static_cast(pts); - info->pts.upper = static_cast(pts >> 32); - info->isRap = rap; - info->reserved = 0; - info->userData = userdata; - - auto spec = vm::ptr::make(put + u32{sizeof(CellDmuxAuInfoEx)}); - *spec = specific; - - auto inf = vm::ptr::make(put + 64); - inf->auAddr = put + 128; - inf->auSize = size; - inf->dtsLower = static_cast(dts); - inf->dtsUpper = static_cast(dts >> 32); - inf->ptsLower = static_cast(pts); - inf->ptsUpper = static_cast(pts >> 32); - inf->auMaxSize = 0; // ????? - inf->userData = userdata; - - addr = put; - - put = utils::align(put + 128 + size, 128); - - put_count++; - } - - ensure(entries.push(addr, &dmux->is_closed)); -} - -void ElementaryStream::push(DemuxerStream& stream, u32 size) -{ - auto const old_size = raw_data.size(); - - raw_data.resize(old_size + size); - - std::memcpy(raw_data.data() + old_size, vm::base(stream.addr), size); // append bytes - - stream.skip(size); -} - -bool ElementaryStream::release() -{ - std::lock_guard lock(m_mutex); - if (released >= put_count) - { - cellDmux.fatal("es::release() error: buffer is empty"); - return false; - } - if (released >= got_count) - { - cellDmux.fatal("es::release() error: buffer has not been seen yet"); - return false; - } - - u32 addr = 0; - if (!entries.pop(addr, &dmux->is_closed) || !addr) - { - cellDmux.fatal("es::release() error: entries.Pop() failed"); - return false; - } - - released++; - return true; -} - -bool ElementaryStream::peek(u32& out_data, bool no_ex, u32& out_spec, bool update_index) -{ - std::lock_guard lock(m_mutex); - if (got_count < released) - { - cellDmux.fatal("es::peek() error: got_count(%d) < released(%d) (put_count=%d)", got_count, released, put_count); - return false; - } - if (got_count >= put_count) - { - return false; - } - - u32 addr = 0; - if (!entries.peek(addr, got_count - released, &dmux->is_closed) || !addr) - { - cellDmux.fatal("es::peek() error: entries.Peek() failed"); - return false; - } - - out_data = no_ex ? addr + 64 : addr; - out_spec = addr + sizeof(CellDmuxAuInfoEx); - - if (update_index) - { - got_count++; - } - return true; -} - -void ElementaryStream::reset() -{ - std::lock_guard lock(m_mutex); - put = memAddr; - entries.clear(); - put_count = 0; - got_count = 0; - released = 0; - raw_data.clear(); - raw_pos = 0; -} - -void dmuxQueryAttr(u32 /* info_addr, may be 0 */, vm::ptr attr) -{ - attr->demuxerVerLower = 0x280000; // TODO: check values - attr->demuxerVerUpper = 0x260000; - attr->memSize = 0x10000; // 0x3e8e6 from ps3 -} - -void dmuxQueryEsAttr(u32 /* info, may be 0 */, vm::cptr esFilterId, u32 /*esSpecificInfo*/, vm::ptr attr) -{ - if (esFilterId->filterIdMajor >= 0xe0) - { - attr->memSize = 0x500000; // 0x45fa49 from ps3 - } - else - { - attr->memSize = 0x7000; // 0x73d9 from ps3 - } - - cellDmux.warning("*** filter(0x%x, 0x%x, 0x%x, 0x%x)", esFilterId->filterIdMajor, esFilterId->filterIdMinor, esFilterId->supplementalInfo1, esFilterId->supplementalInfo2); -} - -error_code cellDmuxQueryAttr(vm::cptr type, vm::ptr attr) -{ - cellDmux.warning("cellDmuxQueryAttr(type=*0x%x, attr=*0x%x)", type, attr); - - if (type->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - dmuxQueryAttr(0, attr); - return CELL_OK; -} - -error_code cellDmuxQueryAttr2(vm::cptr type2, vm::ptr attr) -{ - cellDmux.warning("cellDmuxQueryAttr2(demuxerType2=*0x%x, demuxerAttr=*0x%x)", type2, attr); - - if (type2->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - dmuxQueryAttr(type2->streamSpecificInfo, attr); - return CELL_OK; -} - -error_code cellDmuxOpen(vm::cptr type, vm::cptr res, vm::cptr cb, vm::ptr handle) -{ - cellDmux.warning("cellDmuxOpen(type=*0x%x, res=*0x%x, cb=*0x%x, handle=*0x%x)", type, res, cb, handle); - - if (type->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - // TODO: check demuxerResource and demuxerCb arguments - fmt::throw_exception("cellDmux disabled, use LLE."); -} - -error_code cellDmuxOpenEx(vm::cptr type, vm::cptr resEx, vm::cptr cb, vm::ptr handle) -{ - cellDmux.warning("cellDmuxOpenEx(type=*0x%x, resEx=*0x%x, cb=*0x%x, handle=*0x%x)", type, resEx, cb, handle); - - if (type->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - // TODO: check demuxerResourceEx and demuxerCb arguments - fmt::throw_exception("cellDmux disabled, use LLE."); -} - -error_code cellDmuxOpenExt(vm::cptr type, vm::cptr resEx, vm::cptr cb, vm::ptr handle) -{ - cellDmux.warning("cellDmuxOpenExt(type=*0x%x, resEx=*0x%x, cb=*0x%x, handle=*0x%x)", type, resEx, cb, handle); - - return cellDmuxOpenEx(type, resEx, cb, handle); -} - -error_code cellDmuxOpen2(vm::cptr type2, vm::cptr res2, vm::cptr cb, vm::ptr handle) -{ - cellDmux.warning("cellDmuxOpen2(type2=*0x%x, res2=*0x%x, cb=*0x%x, handle=*0x%x)", type2, res2, cb, handle); - - if (type2->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - // TODO: check demuxerType2, demuxerResource2 and demuxerCb arguments - fmt::throw_exception("cellDmux disabled, use LLE."); -} - -error_code cellDmuxClose(u32 handle) -{ - cellDmux.warning("cellDmuxClose(handle=0x%x)", handle); - - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) - { - return CELL_DMUX_ERROR_ARG; - } - - dmux->is_closed = true; - dmux->job.try_push(DemuxerTask(dmuxClose)); - - while (!dmux->is_finished) - { - if (Emu.IsStopped()) - { - cellDmux.warning("cellDmuxClose(%d) aborted", handle); - return CELL_OK; - } - - std::this_thread::sleep_for(1ms); // hack - } - - idm::remove(handle); - return CELL_OK; -} - -error_code cellDmuxSetStream(u32 handle, u32 streamAddress, u32 streamSize, b8 discontinuity, u64 userData) -{ - cellDmux.trace("cellDmuxSetStream(handle=0x%x, streamAddress=0x%x, streamSize=%d, discontinuity=%d, userData=0x%llx)", handle, streamAddress, streamSize, discontinuity, userData); - - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) - { - return CELL_DMUX_ERROR_ARG; - } - - if (dmux->is_running.exchange(true)) - { - //std::this_thread::sleep_for(1ms); // hack - return CELL_DMUX_ERROR_BUSY; - } - - DemuxerTask task(dmuxSetStream); - auto& info = task.stream; - info.addr = streamAddress; - info.size = streamSize; - info.discontinuity = discontinuity; - info.userdata = userData; - - dmux->job.push(task, &dmux->is_closed); - return CELL_OK; -} - -error_code cellDmuxResetStream(u32 handle) -{ - cellDmux.warning("cellDmuxResetStream(handle=0x%x)", handle); - - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) - { - return CELL_DMUX_ERROR_ARG; - } - - dmux->job.push(DemuxerTask(dmuxResetStream), &dmux->is_closed); - return CELL_OK; -} - -error_code cellDmuxResetStreamAndWaitDone(u32 handle) -{ - cellDmux.warning("cellDmuxResetStreamAndWaitDone(handle=0x%x)", handle); - - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) - { - return CELL_DMUX_ERROR_ARG; - } - - if (!dmux->is_running) + // This is frequently checked in here because the elementary stream could get disabled at any time by a different thread via cellDmuxDisableEs() or cellDmuxClose(). + if (!es_handle->is_enabled) { return CELL_OK; } - dmux->is_working = true; - - dmux->job.push(DemuxerTask(dmuxResetStreamAndWaitDone), &dmux->is_closed); - - while (dmux->is_running && dmux->is_working && !dmux->is_closed) // TODO: ensure that it is safe + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes, 0); ret != CELL_OK) { - if (Emu.IsStopped()) + fatal_err(es_handle->is_enabled, ret); + return 1; + } + + // Check if the access unit queue is full. One slot is reserved for the access unit produced by flushing the stream, so that flushing always succeeds. + if (!es_handle->is_enabled || es_handle->au_queue.allocated_size >= es_handle->au_queue.max_size - !es_handle->flush_started) + { + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) { - cellDmux.warning("cellDmuxResetStreamAndWaitDone(%d) aborted", handle); - return CELL_OK; + fatal_err(es_handle->is_enabled, ret); + return 1; } - std::this_thread::sleep_for(1ms); // hack + + return !es_handle->is_enabled ? CELL_OK : not_an_error(1); // Disable error reporting if the queue is full. This is expected to happen frequently. } + DmuxAuInfo& _au_info = get_au_queue_elements(es_handle)[es_handle->au_queue.back].au_info; + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) + { + fatal_err(es_handle->is_enabled, ret); + return 1; + } + + _au_info.info = au_info->info; + std::memcpy(_au_info.specific_info.get_ptr(), au_info->specific_info.get_ptr(), au_info->specific_info_size); + + if (!es_handle->is_enabled) + { + return CELL_OK; + } + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes, 0); ret != CELL_OK) + { + fatal_err(es_handle->is_enabled, ret); + return CELL_OK; // LLE returns CELL_OK + } + + if (!es_handle->is_enabled) + { + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) + { + fatal_err(es_handle->is_enabled, ret); + } + + return CELL_OK; + } + + es_handle->au_queue.back = (es_handle->au_queue.back + 1) % es_handle->au_queue.max_size; + es_handle->au_queue.allocated_size++; + es_handle->au_queue.size++; + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) + { + fatal_err(es_handle->is_enabled, ret); + return CELL_OK; // LLE returns CELL_OK + } + + if (!es_handle->is_enabled) + { + return CELL_OK; + } + + const vm::var es_msg{{ .msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND, .supplementalInfo = es_handle->dmux_handle->user_data }}; + es_handle->es_cb.cbFunc(ppu, es_handle->dmux_handle, es_handle, es_msg, es_handle->es_cb.cbArg); + return CELL_OK; } -error_code cellDmuxQueryEsAttr(vm::cptr type, vm::cptr esFilterId, u32 esSpecificInfo, vm::ptr esAttr) +static error_code notify_es_flush_done(ppu_thread& ppu, vm::ptr core_es_handle, vm::ptr es_handle) { - cellDmux.warning("cellDmuxQueryEsAttr(demuxerType=*0x%x, esFilterId=*0x%x, esSpecificInfo=*0x%x, esAttr=*0x%x)", type, esFilterId, esSpecificInfo, esAttr); + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - if (type->streamType != CELL_DMUX_STREAM_TYPE_PAMF) + if (!savestate_lock) { - return CELL_DMUX_ERROR_ARG; + ppu.state += cpu_flag::again; + return {}; } - // TODO: check esFilterId and esSpecificInfo correctly - dmuxQueryEsAttr(0, esFilterId, esSpecificInfo, esAttr); + cellDmux.notice("dmuxEsNotifyFlushDone(unk=*0x%x, es_handle=*0x%x)", core_es_handle, es_handle); + + ensure(!!es_handle); // Not checked on LLE + + if (!es_handle->dmux_handle->_this || !es_handle->is_enabled) + { + return CELL_OK; + } + + es_handle->flush_started = false; + + const vm::var es_msg{{ .msgType = CELL_DMUX_ES_MSG_TYPE_FLUSH_DONE, .supplementalInfo = es_handle->dmux_handle->user_data }}; + es_handle->es_cb.cbFunc(ppu, es_handle->dmux_handle, es_handle, es_msg, es_handle->es_cb.cbArg); + return CELL_OK; } -error_code cellDmuxQueryEsAttr2(vm::cptr type2, vm::cptr esFilterId, u32 esSpecificInfo, vm::ptr esAttr) -{ - cellDmux.warning("cellDmuxQueryEsAttr2(type2=*0x%x, esFilterId=*0x%x, esSpecificInfo=*0x%x, esAttr=*0x%x)", type2, esFilterId, esSpecificInfo, esAttr); - if (type2->streamType != CELL_DMUX_STREAM_TYPE_PAMF) +static error_code query_attr(ppu_thread& ppu, vm::ptr demuxerAttr, vm::cptr streamSpecificInfo) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) { - return CELL_DMUX_ERROR_ARG; + ppu.state += cpu_flag::again; + return {}; } - // TODO: check demuxerType2, esFilterId and esSpecificInfo correctly - dmuxQueryEsAttr(type2->streamSpecificInfo, esFilterId, esSpecificInfo, esAttr); + const vm::var pamf_attr; + + if (const error_code ret = get_error(get_core_ops()->queryAttr(ppu, streamSpecificInfo, pamf_attr)); ret != CELL_OK) + { + return ret; + } + + demuxerAttr->memSize = utils::align(sizeof(DmuxContext) + (pamf_attr->maxEnabledEsNum * sizeof(vm::addr_t)) + sizeof(DmuxEsContext), alignof(DmuxContext)) + + pamf_attr->memSize + 0xf; + demuxerAttr->demuxerVerUpper = 0x260000; + demuxerAttr->demuxerVerLower = pamf_attr->version; + return CELL_OK; } -error_code cellDmuxEnableEs(u32 handle, vm::cptr esFilterId, vm::cptr esResourceInfo, vm::cptr esCb, u32 esSpecificInfo, vm::ptr esHandle) +error_code cellDmuxQueryAttr(ppu_thread& ppu, vm::cptr demuxerType, vm::ptr demuxerAttr) { - cellDmux.warning("cellDmuxEnableEs(handle=0x%x, esFilterId=*0x%x, esResourceInfo=*0x%x, esCb=*0x%x, esSpecificInfo=*0x%x, esHandle=*0x%x)", handle, esFilterId, esResourceInfo, esCb, esSpecificInfo, esHandle); + cellDmux.notice("cellDmuxQueryAttr(demuxerType=*0x%x, demuxerAttr=*0x%x)", demuxerType, demuxerAttr); - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) + if (!demuxerType || !demuxerAttr || demuxerType->streamType != CELL_DMUX_STREAM_TYPE_PAMF) { return CELL_DMUX_ERROR_ARG; } - // TODO: check esFilterId, esResourceInfo, esCb and esSpecificInfo correctly + return query_attr(ppu, demuxerAttr, vm::null); +} - const auto es = idm::make_ptr(dmux.get(), esResourceInfo->memAddr, esResourceInfo->memSize, - esFilterId->filterIdMajor, esFilterId->filterIdMinor, esFilterId->supplementalInfo1, esFilterId->supplementalInfo2, - esCb->cbFunc, esCb->cbArg, esSpecificInfo); +error_code cellDmuxQueryAttr2(ppu_thread& ppu, vm::cptr demuxerType2, vm::ptr demuxerAttr) +{ + cellDmux.notice("cellDmuxQueryAttr2(demuxerType2=*0x%x, demuxerAttr=*0x%x)", demuxerType2, demuxerAttr); - *esHandle = es->id; + if (!demuxerType2 || !demuxerAttr || demuxerType2->streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } - cellDmux.warning("*** New ES(dmux=0x%x, addr=0x%x, size=0x%x, filter={0x%x, 0x%x, 0x%x, 0x%x}, cb=0x%x, arg=0x%x, spec=0x%x): id = 0x%x", - handle, es->memAddr, es->memSize, es->fidMajor, es->fidMinor, es->sup1, es->sup2, es->cbFunc, es->cbArg, es->spec, es->id); + return query_attr(ppu, demuxerAttr, demuxerType2->streamSpecificInfo); +} - DemuxerTask task(dmuxEnableEs); - task.es.es = es->id; - task.es.es_ptr = es.get(); +static error_code open(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr demuxerResource, vm::cptr demuxerResourceEx, + vm::cptr demuxerCb, vm::cptr streamSpecificInfo, vm::pptr demuxerHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + const vm::var type{{ .streamType = demuxerType->streamType, .streamSpecificInfo = streamSpecificInfo }}; + const vm::var attr; + + if (const error_code ret = cellDmuxQueryAttr2(ppu, type, attr); ret != CELL_OK) + { + return ret; + } + + if (attr->memSize > demuxerResource->memSize) + { + return CELL_DMUX_ERROR_ARG; + } + + const vm::var core_attr; + + if (const error_code ret = get_error(get_core_ops()->queryAttr(ppu, streamSpecificInfo, core_attr)); ret != CELL_OK) + { + return ret; + } + + const auto handle = vm::ptr::make(utils::align(demuxerResource->memAddr.addr(), alignof(DmuxContext))); + const u32 es_handles_size = core_attr->maxEnabledEsNum * sizeof(vm::addr_t); + const auto core_mem_addr = vm::ptr::make(utils::align(handle.addr() + sizeof(DmuxContext) + es_handles_size, 0x10)); + + const vm::var core_resource = + {{ + .memAddr = core_mem_addr, + .memSize = demuxerResource->memSize - (core_mem_addr.addr() - demuxerResource->memAddr.addr()), + .ppuThreadPriority = demuxerResource->ppuThreadPriority, + .ppuThreadStackSize = demuxerResource->ppuThreadStackSize, + .spuThreadPriority = demuxerResource->spuThreadPriority, + .numOfSpus = demuxerResource->numOfSpus + }}; + + const vm::var res_spurs; + + if (demuxerResourceEx) + { + res_spurs->spurs = demuxerResourceEx->spurs; + res_spurs->priority = demuxerResourceEx->priority; + res_spurs->maxContention = demuxerResourceEx->maxContention; + } + + const auto demux_done_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_demux_done))); + const auto prog_end_code_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_prog_end_code))); + const auto fatal_err_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_fatal_err))); + const vm::var> cb_demux_done{{ .cbFunc = demux_done_func, .cbArg = handle }}; + const vm::var> cb_prog_end_code{{ .cbFunc = prog_end_code_func, .cbArg = handle }}; + const vm::var> cb_fatal_err{{ .cbFunc = fatal_err_func, .cbArg = handle }}; + + const vm::var> core_handle; + + if (const error_code ret = get_error(get_core_ops()->open(ppu, streamSpecificInfo, core_resource, demuxerResourceEx ? +res_spurs : vm::null, + cb_demux_done, cb_prog_end_code, cb_fatal_err, core_handle)); + ret != CELL_OK) + { + return ret; + } + + handle->_this = handle; + handle->_this_size = sizeof(DmuxContext) + es_handles_size; + handle->version = core_attr->version; + handle->dmux_state = DMUX_STOPPED; + handle->dmux_type = *demuxerType; + handle->dmux_cb = *demuxerCb; + handle->stream_is_set = false; + handle->core_handle = *core_handle; + handle->version_ = core_attr->version; + handle->user_data = 0; + handle->max_enabled_es_num = core_attr->maxEnabledEsNum; + handle->enabled_es_num = 0; + + const vm::var mutex_attr = + {{ + .protocol = SYS_SYNC_PRIORITY, + .recursive = SYS_SYNC_NOT_RECURSIVE, + .pshared = SYS_SYNC_NOT_PROCESS_SHARED, + .adaptive = SYS_SYNC_NOT_ADAPTIVE, + .name_u64 = "_dx_mhd"_u64 + }}; + + if (const error_code ret = lv2_syscall(ppu, handle.ptr(&DmuxContext::_dx_mhd), mutex_attr); ret != CELL_OK) + { + return ret; + } + + *demuxerHandle = handle; - dmux->job.push(task, &dmux->is_closed); return CELL_OK; } -error_code cellDmuxDisableEs(u32 esHandle) +error_code cellDmuxOpen(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr demuxerResource, vm::cptr demuxerCb, vm::pptr demuxerHandle) { - cellDmux.warning("cellDmuxDisableEs(esHandle=0x%x)", esHandle); + cellDmux.notice("cellDmuxOpen(demuxerType=*0x%x, demuxerResource=*0x%x, demuxerCb=*0x%x, handle=*0x%x)", demuxerType, demuxerResource, demuxerCb, demuxerHandle); - const auto es = idm::get_unlocked(esHandle); - - if (!es) + if (!demuxerType || demuxerType->streamType != CELL_DMUX_STREAM_TYPE_PAMF + || !demuxerResource || !demuxerResource->memAddr || demuxerResource->memSize == umax || demuxerResource->ppuThreadStackSize == umax + || !demuxerCb || !demuxerCb->cbFunc + || !demuxerHandle) { return CELL_DMUX_ERROR_ARG; } - DemuxerTask task(dmuxDisableEs); - task.es.es = esHandle; - task.es.es_ptr = es.get(); - - es->dmux->job.push(task, &es->dmux->is_closed); - return CELL_OK; + return open(ppu, demuxerType, demuxerResource, vm::null, demuxerCb, vm::null, demuxerHandle); } -error_code cellDmuxResetEs(u32 esHandle) +error_code cellDmuxOpenEx(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr demuxerResourceEx, vm::cptr demuxerCb, vm::pptr demuxerHandle) { - cellDmux.trace("cellDmuxResetEs(esHandle=0x%x)", esHandle); + cellDmux.notice("cellDmuxOpenEx(demuxerType=*0x%x, demuxerResourceEx=*0x%x, demuxerCb=*0x%x, demuxerHandle=*0x%x)", demuxerType, demuxerResourceEx, demuxerCb, demuxerHandle); - const auto es = idm::get_unlocked(esHandle); - - if (!es) + if (!demuxerType || demuxerType->streamType != CELL_DMUX_STREAM_TYPE_PAMF + || !demuxerResourceEx || !demuxerResourceEx->memAddr || demuxerResourceEx->memSize == umax || demuxerResourceEx->ppuThreadStackSize == umax + || !demuxerResourceEx->spurs || demuxerResourceEx->maxContention == 0u + || (demuxerResourceEx->priority & 0xf0f0f0f0f0f0f0f0ull) != 0u // Each byte in priority must be less than 0x10 + || !demuxerCb + || !demuxerHandle) { return CELL_DMUX_ERROR_ARG; } - DemuxerTask task(dmuxResetEs); - task.es.es = esHandle; - task.es.es_ptr = es.get(); + const vm::var resource + {{ + .memAddr = demuxerResourceEx->memAddr, + .memSize = demuxerResourceEx->memSize, + .ppuThreadPriority = demuxerResourceEx->ppuThreadPriority, + .ppuThreadStackSize = demuxerResourceEx->ppuThreadStackSize, + .spuThreadPriority = 0xfa, + .numOfSpus = 1 + }}; - es->dmux->job.push(task, &es->dmux->is_closed); - return CELL_OK; + return open(ppu, demuxerType, resource, demuxerResourceEx, demuxerCb, vm::null, demuxerHandle); } -error_code cellDmuxGetAu(u32 esHandle, vm::ptr auInfo, vm::ptr auSpecificInfo) +error_code cellDmuxOpenExt(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr demuxerResourceEx, vm::cptr demuxerCb, vm::pptr demuxerHandle) { - cellDmux.trace("cellDmuxGetAu(esHandle=0x%x, auInfo=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfo, auSpecificInfo); + cellDmux.notice("cellDmuxOpenExt(demuxerType=*0x%x, demuxerResourceEx=*0x%x, demuxerCb=*0x%x, demuxerHandle=*0x%x)", demuxerType, demuxerResourceEx, demuxerCb, demuxerHandle); - const auto es = idm::get_unlocked(esHandle); - - if (!es) - { - return CELL_DMUX_ERROR_ARG; - } - - u32 info; - u32 spec; - if (!es->peek(info, true, spec, true)) - { - return CELL_DMUX_ERROR_EMPTY; - } - - *auInfo = info; - *auSpecificInfo = spec; - return CELL_OK; + return cellDmuxOpenEx(ppu, demuxerType, demuxerResourceEx, demuxerCb, demuxerHandle); } -error_code cellDmuxPeekAu(u32 esHandle, vm::ptr auInfo, vm::ptr auSpecificInfo) +error_code cellDmuxOpen2(ppu_thread& ppu, vm::cptr demuxerType2, vm::cptr demuxerResource2, vm::cptr demuxerCb, vm::pptr demuxerHandle) { - cellDmux.trace("cellDmuxPeekAu(esHandle=0x%x, auInfo=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfo, auSpecificInfo); + cellDmux.notice("cellDmuxOpen2(demuxerType2=*0x%x, demuxerResource2=*0x%x, demuxerCb=*0x%x, demuxerHandle=*0x%x)", demuxerType2, demuxerResource2, demuxerCb, demuxerHandle); - const auto es = idm::get_unlocked(esHandle); - - if (!es) + if (!demuxerType2 || demuxerType2->streamType != CELL_DMUX_STREAM_TYPE_PAMF + || !demuxerResource2 + || !demuxerCb || !demuxerCb->cbFunc + || !demuxerHandle) { return CELL_DMUX_ERROR_ARG; } - u32 info; - u32 spec; - if (!es->peek(info, true, spec, false)) + const vm::var type{{ .streamType = CELL_DMUX_STREAM_TYPE_PAMF }}; + + if (demuxerResource2->isResourceEx) { - return CELL_DMUX_ERROR_EMPTY; + if (!demuxerResource2->resourceEx.memAddr || demuxerResource2->resourceEx.memSize == umax || demuxerResource2->resourceEx.ppuThreadStackSize == umax + || !demuxerResource2->resourceEx.spurs || demuxerResource2->resourceEx.maxContention == 0u + || (demuxerResource2->resourceEx.priority & 0xf0f0f0f0f0f0f0f0ull) != 0u) // Each byte in priority must be less than 0x10 + { + return CELL_DMUX_ERROR_ARG; + } + + const vm::var resource + {{ + .memAddr = demuxerResource2->resourceEx.memAddr, + .memSize = demuxerResource2->resourceEx.memSize, + .ppuThreadPriority = demuxerResource2->resourceEx.ppuThreadPriority, + .ppuThreadStackSize = demuxerResource2->resourceEx.ppuThreadStackSize, + .spuThreadPriority = 0xfa, + .numOfSpus = 1 + }}; + + return open(ppu, type, resource, demuxerResource2.ptr(&CellDmuxResource2::resourceEx), demuxerCb, demuxerType2->streamSpecificInfo, demuxerHandle); } - *auInfo = info; - *auSpecificInfo = spec; - return CELL_OK; + if (!demuxerResource2->resource.memAddr || demuxerResource2->resource.memSize == umax || demuxerResource2->resource.ppuThreadStackSize == umax) + { + return CELL_DMUX_ERROR_ARG; + } + + return open(ppu, type, demuxerResource2.ptr(&CellDmuxResource2::resource), vm::null, demuxerCb, demuxerType2->streamSpecificInfo, demuxerHandle); } -error_code cellDmuxGetAuEx(u32 esHandle, vm::ptr auInfoEx, vm::ptr auSpecificInfo) +static error_code disable_es(ppu_thread& ppu, DmuxEsContext& esHandle) { - cellDmux.trace("cellDmuxGetAuEx(esHandle=0x%x, auInfoEx=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfoEx, auSpecificInfo); - - const auto es = idm::get_unlocked(esHandle); - - if (!es) + if (const error_code ret = lv2_syscall(ppu, esHandle._dx_mes, 0); ret != CELL_OK) { - return CELL_DMUX_ERROR_ARG; + return ret; } - u32 info; - u32 spec; - if (!es->peek(info, false, spec, true)) + const error_code core_ret = get_core_ops()->disableEs(ppu, esHandle.core_es_handle); + + esHandle.is_enabled = false; + + if (const error_code ret = lv2_syscall(ppu, esHandle._dx_mes); ret != CELL_OK) { - return CELL_DMUX_ERROR_EMPTY; + return ret; } - *auInfoEx = info; - *auSpecificInfo = spec; - return CELL_OK; + error_code ret; + while ((ret = lv2_syscall(ppu, esHandle._dx_mes)) == static_cast(CELL_EBUSY)) + { + lv2_syscall(ppu, 200); + } + + if (ret != CELL_OK) + { + return ret; + } + + esHandle._this = vm::null; + + return get_error(core_ret); } -error_code cellDmuxPeekAuEx(u32 esHandle, vm::ptr auInfoEx, vm::ptr auSpecificInfo) +error_code cellDmuxClose(ppu_thread& ppu, vm::ptr demuxerHandle) { - cellDmux.trace("cellDmuxPeekAuEx(esHandle=0x%x, auInfoEx=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfoEx, auSpecificInfo); + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - const auto es = idm::get_unlocked(esHandle); + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } - if (!es) + cellDmux.notice("cellDmuxClose(demuxerHandle=*0x%x)", demuxerHandle); + + if (!demuxerHandle || !demuxerHandle->_this || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) { return CELL_DMUX_ERROR_ARG; } - u32 info; - u32 spec; - if (!es->peek(info, false, spec, false)) + demuxerHandle->_this = vm::null; + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd, 0); ret != CELL_OK) { - return CELL_DMUX_ERROR_EMPTY; + demuxerHandle->_this = demuxerHandle; + return ret; } - *auInfoEx = info; - *auSpecificInfo = spec; - return CELL_OK; + for (const vm::ptr es_handle : get_es_handles(demuxerHandle)) + { + if (const error_code ret = disable_es(ppu, *es_handle); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + demuxerHandle->_this = demuxerHandle; + return ret; + } + + es_handle->dmux_handle = vm::null; + demuxerHandle->enabled_es_num--; + } + + error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + ret = ret ? ret : get_error(get_core_ops()->close(ppu, demuxerHandle->core_handle)); + ret = ret ? ret : lv2_syscall(ppu, demuxerHandle->_dx_mhd); + + if (ret != CELL_OK) + { + demuxerHandle->_this = demuxerHandle; + } + + return ret; } -error_code cellDmuxReleaseAu(u32 esHandle) +error_code cellDmuxSetStream(ppu_thread& ppu, vm::ptr demuxerHandle, vm::cptr streamAddress, u32 streamSize, b8 discontinuity, u64 userData) { - cellDmux.trace("cellDmuxReleaseAu(esHandle=0x%x)", esHandle); + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - const auto es = idm::get_unlocked(esHandle); + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } - if (!es) + cellDmux.trace("cellDmuxSetStream(demuxerHandle=*0x%x, streamAddress=*0x%x, streamSize=0x%x, discontinuity=%d, userData=0x%llx)", + demuxerHandle, streamAddress, streamSize, +discontinuity, userData); + + if (!demuxerHandle || !demuxerHandle->_this || streamSize == 0 || streamSize == umax || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) { return CELL_DMUX_ERROR_ARG; } - if (!es->release()) + if (!(demuxerHandle->dmux_state & DMUX_STOPPED)) + { + return CELL_DMUX_ERROR_BUSY; + } + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + if (const error_code ret = get_error(get_core_ops()->setStream(ppu, demuxerHandle->core_handle, streamAddress, streamSize, discontinuity, userData)); + ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + demuxerHandle->stream_is_set = true; + demuxerHandle->dmux_state = DMUX_RUNNING; + demuxerHandle->user_data = userData; + + return lv2_syscall(ppu, demuxerHandle->_dx_mhd); +} + +error_code cellDmuxResetStream(ppu_thread& ppu, vm::ptr demuxerHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxResetStream(demuxerHandle=*0x%x)", demuxerHandle); + + if (!demuxerHandle || !demuxerHandle->_this || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + const u32 dmux_status = demuxerHandle->dmux_state; + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); ret != CELL_OK) + { + return ret; + } + + if (!(dmux_status & DMUX_RUNNING) || !demuxerHandle->stream_is_set) { return CELL_DMUX_ERROR_SEQ; } + + if (const error_code ret = get_error(get_core_ops()->resetStream(ppu, demuxerHandle->core_handle)); ret != CELL_OK) + { + return ret; + } + + demuxerHandle->stream_is_set = false; + return CELL_OK; } -error_code cellDmuxFlushEs(u32 esHandle) +error_code cellDmuxResetStreamAndWaitDone(ppu_thread& ppu, vm::ptr demuxerHandle) { - cellDmux.warning("cellDmuxFlushEs(esHandle=0x%x)", esHandle); + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - const auto es = idm::get_unlocked(esHandle); + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } - if (!es) + cellDmux.notice("cellDmuxResetStreamAndWaitDone(demuxerHandle=*0x%x)", demuxerHandle); + + if (!demuxerHandle || !demuxerHandle->_this || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) { return CELL_DMUX_ERROR_ARG; } - DemuxerTask task(dmuxFlushEs); - task.es.es = esHandle; - task.es.es_ptr = es.get(); + if (const error_code ret = get_error(get_core_ops()->resetStreamAndWaitDone(ppu, demuxerHandle->core_handle)); ret != CELL_OK) + { + return ret; + } + + // LLE doesn't set DmuxContext::stream_is_set to false + + return CELL_OK; +} + +error_code cellDmuxQueryEsAttr(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr esFilterId, vm::cptr esSpecificInfo, vm::ptr esAttr) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxQueryEsAttr(demuxerType=*0x%x, esFilterId=*0x%x, esSpecificInfo=*0x%x, esAttr=*0x%x)", demuxerType, esFilterId, esSpecificInfo, esAttr); + + if (!demuxerType || demuxerType->streamType != CELL_DMUX_STREAM_TYPE_PAMF || !esFilterId || !esAttr) + { + return CELL_DMUX_ERROR_ARG; + } + + const vm::var core_es_attr; + + if (const error_code ret = get_error(get_core_ops()->queryEsAttr(ppu, vm::make_var(*esFilterId), esSpecificInfo, core_es_attr)); + ret != CELL_OK) + { + return ret; + } + + esAttr->memSize = utils::align(sizeof(DmuxEsContext) + ((core_es_attr->auQueueMaxSize + 1) * (core_es_attr->specificInfoSize + sizeof(DmuxAuQueueElement))), alignof(DmuxEsContext)) + + core_es_attr->memSize + 0xf; + + return CELL_OK; +} + +error_code cellDmuxQueryEsAttr2(ppu_thread& ppu, vm::cptr demuxerType2, vm::cptr esFilterId, vm::cptr esSpecificInfo, vm::ptr esAttr) +{ + cellDmux.notice("cellDmuxQueryEsAttr2(demuxerType2=*0x%x, esFilterId=*0x%x, esSpecificInfo=*0x%x, esAttr=*0x%x)", demuxerType2, esFilterId, esSpecificInfo, esAttr); + + ensure(!!demuxerType2); // Not checked on LLE + + const vm::var demuxerType{{ .streamType = demuxerType2->streamType }}; + + return cellDmuxQueryEsAttr(ppu, demuxerType, esFilterId, esSpecificInfo, esAttr); +} + +error_code cellDmuxEnableEs(ppu_thread& ppu, vm::ptr demuxerHandle, vm::cptr esFilterId, vm::cptr esResourceInfo, + vm::cptr esCb, vm::cptr esSpecificInfo, vm::pptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxEnableEs(demuxerHandle=*0x%x, esFilterId=*0x%x, esResourceInfo=*0x%x, esCb=*0x%x, esSpecificInfo=*0x%x, esHandle=**0x%x)", + demuxerHandle, esFilterId, esResourceInfo, esCb, esSpecificInfo, esHandle); + + if (!demuxerHandle || !demuxerHandle->_this || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF + || !esFilterId + || !esResourceInfo || !esResourceInfo->memAddr || esResourceInfo->memSize == umax + || !esCb || !esCb->cbFunc + || !esHandle) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + if (demuxerHandle->enabled_es_num >= demuxerHandle->max_enabled_es_num) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : CELL_DMUX_ERROR_ARG; + } + + const vm::var es_attr; + + if (const error_code ret = cellDmuxQueryEsAttr(ppu, demuxerHandle.ptr(&DmuxContext::dmux_type), esFilterId, esSpecificInfo, es_attr); ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + if (es_attr->memSize > esResourceInfo->memSize) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : CELL_DMUX_ERROR_ARG; + } + + const vm::var es_filter_id{ *esFilterId }; + const vm::var core_es_attr; + + if (const error_code ret = get_error(get_core_ops()->queryEsAttr(ppu, es_filter_id, esSpecificInfo, core_es_attr)); ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + core_es_attr->auQueueMaxSize++; // One extra slot for the access unit produced by flushing the stream, so that flushing always succeeds + + const auto es_handle = vm::ptr::make(utils::align(esResourceInfo->memAddr.addr(), alignof(DmuxEsContext))); + const u32 au_queue_elements_size = core_es_attr->auQueueMaxSize * (core_es_attr->specificInfoSize + sizeof(DmuxAuQueueElement)); + const auto core_mem_addr = vm::bptr::make(utils::align(es_handle.addr() + sizeof(DmuxEsContext) + au_queue_elements_size, 0x10)); + + const vm::var core_es_resource + {{ + .memAddr = core_mem_addr, + .memSize = esResourceInfo->memSize - (core_mem_addr.addr() - esResourceInfo->memAddr.addr()) + }}; + + const vm::var mutex_attr = + {{ + .protocol = SYS_SYNC_PRIORITY, + .recursive = SYS_SYNC_NOT_RECURSIVE, + .pshared = SYS_SYNC_NOT_PROCESS_SHARED, + .adaptive = SYS_SYNC_NOT_ADAPTIVE, + .name_u64 = "_dx_mes"_u64 + }}; + + if (const error_code ret = lv2_syscall(ppu, es_handle.ptr(&DmuxEsContext::_dx_mes), mutex_attr); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return ret; + } + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes, 0); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, es_handle->_dx_mes) == CELL_OK); // Not checked on LLE + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return ret; + } + + const auto au_found_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_es_au_found))); + const auto flush_done_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_es_flush_done))); + const vm::var> cb_au_found{{ .cbFunc = au_found_func, .cbArg = es_handle }}; + const vm::var> cb_flush_done{{ .cbFunc = flush_done_func, .cbArg = es_handle }}; + + const vm::var> core_es_handle; + + if (const error_code ret = get_error(get_core_ops()->enableEs(ppu, demuxerHandle->core_handle, es_filter_id, core_es_resource, cb_au_found, cb_flush_done, + esSpecificInfo, core_es_handle)); + ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, es_handle->_dx_mes); + const error_code mutex_destroy_ret = lv2_syscall(ppu, es_handle->_dx_mes); + + if (mutex_unlock_ret != CELL_OK) + { + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return mutex_unlock_ret; + } + + if (mutex_destroy_ret != CELL_OK) + { + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return mutex_destroy_ret; + } + + const error_code mutex_unlock_ret2 = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret2 ? mutex_unlock_ret2 : ret; + } + + es_handle->is_enabled = true; + es_handle->error_mem_size = 0; + es_handle->error_count = 0; + // es_handle->error_mem_addr is not initialized on LLE + es_handle->_this = es_handle; + es_handle->_this_size = sizeof(DmuxEsContext) + au_queue_elements_size; + es_handle->_this_index = demuxerHandle->enabled_es_num; + es_handle->dmux_handle = demuxerHandle; + es_handle->es_cb = *esCb; + es_handle->core_es_handle = *core_es_handle; + es_handle->flush_started = bf_t, 0, 1>{}; + es_handle->au_queue.max_size = core_es_attr->auQueueMaxSize; + es_handle->au_queue.allocated_size = 0; + es_handle->au_queue.size = 0; + es_handle->au_queue.front = 0; + es_handle->au_queue.back = 0; + es_handle->au_queue.allocated_back = 0; + + const vm::ptr au_queue_elements = get_au_queue_elements(es_handle); + + for (u32 i = 0; i < core_es_attr->auQueueMaxSize; i++) + { + au_queue_elements[i].index = i; + au_queue_elements[i].unk = 0; + au_queue_elements[i].au_info.info.auAddr = vm::null; + au_queue_elements[i].au_info.info.auMaxSize = 0; + au_queue_elements[i].au_info.specific_info.set(au_queue_elements.addr() + (core_es_attr->auQueueMaxSize * static_cast(sizeof(DmuxAuQueueElement))) + (i * core_es_attr->specificInfoSize)); + au_queue_elements[i].au_info.specific_info_size = core_es_attr->specificInfoSize; + } + + demuxerHandle->enabled_es_num++; + *get_es_handles(demuxerHandle).rbegin() = es_handle; + *esHandle = es_handle; + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, es_handle->_dx_mes) == CELL_OK); // Not checked on LLE + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return ret; + } + + return lv2_syscall(ppu, demuxerHandle->_dx_mhd); +} + +error_code cellDmuxDisableEs(ppu_thread& ppu, vm::ptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxDisableEs(esHandle=*0x%x)", esHandle); + + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + if (const error_code ret = disable_es(ppu, *esHandle); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd) == CELL_OK); // Not checked on LLE + return ret; + } + + const std::span> es_handles = get_es_handles(esHandle->dmux_handle); + + std::shift_left(std::ranges::find(es_handles, static_cast>(esHandle)), es_handles.end(), 1); + + esHandle->dmux_handle->enabled_es_num--; + *es_handles.rbegin() = vm::null; + + return lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd); +} + +error_code cellDmuxResetEs(ppu_thread& ppu, vm::ptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxResetEs(esHandle=*0x%x)", esHandle); + + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + const u32 dmux_status = esHandle->dmux_handle->dmux_state; + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd); ret != CELL_OK) + { + return ret; + } + + if (dmux_status & DMUX_STOPPED) + { + return CELL_DMUX_ERROR_SEQ; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->_dx_mes, 0); ret != CELL_OK) + { + return ret; + } + + if (const error_code ret = get_error(get_core_ops()->resetEs(ppu, esHandle->core_es_handle)); ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, esHandle->_dx_mes); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + const auto au_queue_elements = get_au_queue_elements(esHandle); + + for (s32 i = 0; i < esHandle->au_queue.max_size; i++) + { + au_queue_elements[i].index = i; + au_queue_elements[i].unk = 0; + au_queue_elements[i].au_info.info.auAddr = vm::null; + au_queue_elements[i].au_info.info.auMaxSize = 0; + } + + esHandle->error_mem_size = 0; + esHandle->error_count = 0; + esHandle->au_queue.allocated_size = 0; + esHandle->au_queue.size = 0; + esHandle->au_queue.front = 0; + esHandle->au_queue.back = 0; + esHandle->au_queue.allocated_back = 0; + + return lv2_syscall(ppu, esHandle->_dx_mes); +} + +template +static error_code pop_au(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfo, vm::cpptr auSpecificInfo) +{ + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->_dx_mes, 0); ret != CELL_OK) + { + return ret; + } + + if (ppu.state & cpu_flag::again) + { + return {}; + } + + if (esHandle->au_queue.size <= 0) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, esHandle->_dx_mes); + return mutex_unlock_ret ? mutex_unlock_ret : CELL_DMUX_ERROR_EMPTY; + } + + const vm::ptr au_info = (get_au_queue_elements(esHandle) + esHandle->au_queue.front).ptr(&DmuxAuQueueElement::au_info); + + if (auInfo) + { + *auInfo = au_info.ptr(&DmuxAuInfo::info); + } + + if (auSpecificInfo) + { + *auSpecificInfo = au_info->specific_info; + } + + if constexpr (!is_peek) + { + esHandle->au_queue.front = (esHandle->au_queue.front + 1) % esHandle->au_queue.max_size; + esHandle->au_queue.size--; + } + + return lv2_syscall(ppu, esHandle->_dx_mes); +} + +error_code cellDmuxGetAu(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfo, vm::cpptr auSpecificInfo) +{ + cellDmux.trace("cellDmuxGetAu(esHandle=*0x%x, auInfo=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfo, auSpecificInfo); + + return pop_au(ppu, esHandle, auInfo, auSpecificInfo); +} + +error_code cellDmuxPeekAu(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfo, vm::cpptr auSpecificInfo) +{ + cellDmux.trace("cellDmuxPeekAu(esHandle=*0x%x, auInfo=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfo, auSpecificInfo); + + return pop_au(ppu, esHandle, auInfo, auSpecificInfo); +} + +error_code cellDmuxGetAuEx(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfoEx, vm::cpptr auSpecificInfo) +{ + cellDmux.trace("cellDmuxGetAuEx(esHandle=*0x%x, auInfoEx=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfoEx, auSpecificInfo); + + return pop_au(ppu, esHandle, auInfoEx, auSpecificInfo); +} + +error_code cellDmuxPeekAuEx(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfoEx, vm::cpptr auSpecificInfo) +{ + cellDmux.trace("cellDmuxPeekAuEx(esHandle=*0x%x, auInfoEx=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfoEx, auSpecificInfo); + + return pop_au(ppu, esHandle, auInfoEx, auSpecificInfo); +} + +error_code cellDmuxReleaseAu(ppu_thread& ppu, vm::ptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.trace("cellDmuxReleaseAu(esHandle=*0x%x)", esHandle); + + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->_dx_mes, 0); ret != CELL_OK) + { + return ret; + } + + vm::bptr mem_addr; + u32 mem_size; + + if (esHandle->au_queue.allocated_size < 1) + { + if (esHandle->error_count == 0u) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, esHandle->_dx_mes); + return mutex_unlock_ret ? mutex_unlock_ret : CELL_DMUX_ERROR_SEQ; + } + + mem_addr = esHandle->error_mem_addr; + mem_size = esHandle->error_mem_size; + } + else + { + const DmuxAuInfo& au_info = get_au_queue_elements(esHandle)[esHandle->au_queue.allocated_back].au_info; + + mem_size = + esHandle->error_mem_size += au_info.info.auSize; + + if (esHandle->error_count == 0u) + { + mem_addr = au_info.info.auAddr; + } + else + { + mem_addr = esHandle->error_mem_addr; + } + + esHandle->au_queue.allocated_back = (esHandle->au_queue.allocated_back + 1) % esHandle->au_queue.max_size; + esHandle->au_queue.allocated_size--; + + if (esHandle->au_queue.allocated_size < esHandle->au_queue.size) + { + esHandle->au_queue.front = (esHandle->au_queue.front + 1) % esHandle->au_queue.max_size; + esHandle->au_queue.size--; + } + } + + if (const error_code ret = get_error(get_core_ops()->releaseAu(ppu, esHandle->core_es_handle, mem_addr, mem_size)); ret != CELL_OK) + { + if (esHandle->error_count == 0u) + { + esHandle->error_mem_addr = mem_addr; + } + + esHandle->error_count++; + + const error_code mutex_unlock_ret = lv2_syscall(ppu, esHandle->_dx_mes); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + esHandle->error_count = 0; + esHandle->error_mem_size = 0; + + return lv2_syscall(ppu, esHandle->_dx_mes); +} + +error_code cellDmuxFlushEs(ppu_thread& ppu, vm::ptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxFlushEs(esHandle=*0x%x)", esHandle); + + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + const u32 dmux_state = esHandle->dmux_handle->dmux_state; + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd); ret != CELL_OK) + { + return ret; + } + + if (!(dmux_state & DMUX_STOPPED)) + { + return CELL_DMUX_ERROR_SEQ; + } + + esHandle->flush_started = true; + + if (const error_code ret = get_error(get_core_ops()->flushEs(ppu, esHandle->core_es_handle)); ret != CELL_OK) + { + esHandle->flush_started = false; + return ret; + } - es->dmux->job.push(task, &es->dmux->is_closed); return CELL_OK; } @@ -1382,4 +1258,11 @@ DECLARE(ppu_module_manager::cellDmux)("cellDmux", []() REG_FUNC(cellDmux, cellDmuxPeekAuEx); REG_FUNC(cellDmux, cellDmuxReleaseAu); REG_FUNC(cellDmux, cellDmuxFlushEs); + + REG_HIDDEN_FUNC(notify_demux_done); + REG_HIDDEN_FUNC(notify_fatal_err); + REG_HIDDEN_FUNC(notify_prog_end_code); + + REG_HIDDEN_FUNC(notify_es_au_found); + REG_HIDDEN_FUNC(notify_es_flush_done); }); diff --git a/rpcs3/Emu/Cell/Modules/cellDmux.h b/rpcs3/Emu/Cell/Modules/cellDmux.h index 1767165283..3db8c63bee 100644 --- a/rpcs3/Emu/Cell/Modules/cellDmux.h +++ b/rpcs3/Emu/Cell/Modules/cellDmux.h @@ -1,7 +1,8 @@ #pragma once #include "Emu/Memory/vm_ptr.h" -#include "cellPamf.h" +#include "Emu/Cell/ErrorCodes.h" +#include "Utilities/BitField.h" // Error Codes enum CellDmuxError :u32 @@ -18,6 +19,10 @@ enum CellDmuxStreamType : s32 CELL_DMUX_STREAM_TYPE_UNDEF = 0, CELL_DMUX_STREAM_TYPE_PAMF = 1, CELL_DMUX_STREAM_TYPE_TERMINATOR = 2, + + // Only used in cellSail + CELL_DMUX_STREAM_TYPE_MP4 = 0x81, + CELL_DMUX_STREAM_TYPE_AVI = 0x82 }; enum CellDmuxMsgType : s32 @@ -33,118 +38,6 @@ enum CellDmuxEsMsgType : s32 CELL_DMUX_ES_MSG_TYPE_FLUSH_DONE = 1, }; -enum CellDmuxPamfM2vLevel : s32 -{ - CELL_DMUX_PAMF_M2V_MP_LL = 0, - CELL_DMUX_PAMF_M2V_MP_ML, - CELL_DMUX_PAMF_M2V_MP_H14, - CELL_DMUX_PAMF_M2V_MP_HL, -}; - -enum CellDmuxPamfAvcLevel : s32 -{ - CELL_DMUX_PAMF_AVC_LEVEL_2P1 = 21, - CELL_DMUX_PAMF_AVC_LEVEL_3P0 = 30, - CELL_DMUX_PAMF_AVC_LEVEL_3P1 = 31, - CELL_DMUX_PAMF_AVC_LEVEL_3P2 = 32, - CELL_DMUX_PAMF_AVC_LEVEL_4P1 = 41, - CELL_DMUX_PAMF_AVC_LEVEL_4P2 = 42, -}; - -struct CellDmuxPamfAuSpecificInfoM2v -{ - be_t reserved1; -}; - -struct CellDmuxPamfAuSpecificInfoAvc -{ - be_t reserved1; -}; - -struct CellDmuxPamfAuSpecificInfoLpcm -{ - u8 channelAssignmentInfo; - u8 samplingFreqInfo; - u8 bitsPerSample; -}; - -struct CellDmuxPamfAuSpecificInfoAc3 -{ - be_t reserved1; -}; - -struct CellDmuxPamfAuSpecificInfoAtrac3plus -{ - be_t reserved1; -}; - -struct CellDmuxPamfAuSpecificInfoUserData -{ - be_t reserved1; -}; - -struct CellDmuxPamfEsSpecificInfoM2v -{ - be_t profileLevel; -}; - -struct CellDmuxPamfEsSpecificInfoAvc -{ - be_t level; -}; - -struct CellDmuxPamfEsSpecificInfoLpcm -{ - be_t samplingFreq; - be_t numOfChannels; - be_t bitsPerSample; -}; - -struct CellDmuxPamfEsSpecificInfoAc3 -{ - be_t reserved1; -}; - -struct CellDmuxPamfEsSpecificInfoAtrac3plus -{ - be_t reserved1; -}; - -struct CellDmuxPamfEsSpecificInfoUserData -{ - be_t reserved1; -}; - -enum CellDmuxPamfSamplingFrequency : s32 -{ - CELL_DMUX_PAMF_FS_48K = 48000, -}; - -enum CellDmuxPamfBitsPerSample : s32 -{ - CELL_DMUX_PAMF_BITS_PER_SAMPLE_16 = 16, - CELL_DMUX_PAMF_BITS_PER_SAMPLE_24 = 24, -}; - -enum CellDmuxPamfLpcmChannelAssignmentInfo : s32 -{ - CELL_DMUX_PAMF_LPCM_CH_M1 = 1, - CELL_DMUX_PAMF_LPCM_CH_LR = 3, - CELL_DMUX_PAMF_LPCM_CH_LRCLSRSLFE = 9, - CELL_DMUX_PAMF_LPCM_CH_LRCLSCS1CS2RSLFE = 11, -}; - -enum CellDmuxPamfLpcmFs : s32 -{ - CELL_DMUX_PAMF_LPCM_FS_48K = 1, -}; - -enum CellDmuxPamfLpcmBitsPerSamples : s32 -{ - CELL_DMUX_PAMF_LPCM_BITS_PER_SAMPLE_16 = 1, - CELL_DMUX_PAMF_LPCM_BITS_PER_SAMPLE_24 = 3, -}; - struct CellDmuxMsg { be_t msgType; // CellDmuxMsgType @@ -160,24 +53,19 @@ struct CellDmuxEsMsg struct CellDmuxType { be_t streamType; // CellDmuxStreamType - be_t reserved[2]; -}; - -struct CellDmuxPamfSpecificInfo -{ - be_t thisSize; - b8 programEndCodeCb; + be_t reserved1; + be_t reserved2; }; struct CellDmuxType2 { - be_t streamType; // CellDmuxStreamType - be_t streamSpecificInfo; + be_t streamType; + vm::bcptr streamSpecificInfo; }; struct CellDmuxResource { - be_t memAddr; + vm::bptr memAddr; be_t memSize; be_t ppuThreadPriority; be_t ppuThreadStackSize; @@ -187,12 +75,12 @@ struct CellDmuxResource struct CellDmuxResourceEx { - be_t memAddr; + vm::bptr memAddr; be_t memSize; be_t ppuThreadPriority; be_t ppuThreadStackSize; - be_t spurs_addr; - u8 priority[8]; + vm::bptr spurs; // CellSpurs* + be_t priority; be_t maxContention; }; @@ -203,40 +91,30 @@ struct CellDmuxResourceSpurs be_t maxContention; }; -/* -struct CellDmuxResource2Ex -{ - b8 isResourceEx; //true - CellDmuxResourceEx resourceEx; -}; - -struct CellDmuxResource2NoEx -{ - b8 isResourceEx; //false - CellDmuxResource resource; -}; -*/ - struct CellDmuxResource2 { b8 isResourceEx; - be_t memAddr; - be_t memSize; - be_t ppuThreadPriority; - be_t ppuThreadStackSize; - be_t shit[4]; + + union + { + CellDmuxResource resource; + CellDmuxResourceEx resourceEx; + }; }; -using CellDmuxCbMsg = u32(u32 demuxerHandle, vm::ptr demuxerMsg, u32 cbArg); +struct DmuxContext; +struct DmuxEsContext; -using CellDmuxCbEsMsg = u32(u32 demuxerHandle, u32 esHandle, vm::ptr esMsg, u32 cbArg); +using CellDmuxCbMsg = u32(vm::ptr demuxerHandle, vm::cptr demuxerMsg, vm::ptr cbArg); + +using CellDmuxCbEsMsg = u32(vm::ptr demuxerHandle, vm::ptr esHandle, vm::cptr esMsg, vm::ptr cbArg); // Used for internal callbacks as well template struct DmuxCb { vm::bptr cbFunc; - be_t cbArg; + vm::bptr cbArg; }; using CellDmuxCb = DmuxCb; @@ -250,42 +128,114 @@ struct CellDmuxAttr be_t demuxerVerLower; }; +struct CellDmuxPamfAttr +{ + be_t maxEnabledEsNum; + be_t version; + be_t memSize; +}; + struct CellDmuxEsAttr { be_t memSize; }; +struct CellDmuxPamfEsAttr +{ + be_t auQueueMaxSize; + be_t memSize; + be_t specificInfoSize; +}; + struct CellDmuxEsResource { - be_t memAddr; + vm::bptr memAddr; be_t memSize; }; struct CellDmuxAuInfo { - be_t auAddr; + vm::bptr auAddr; be_t auSize; be_t auMaxSize; - be_t userData; - be_t ptsUpper; - be_t ptsLower; - be_t dtsUpper; - be_t dtsLower; -}; - -struct CellDmuxAuInfoEx -{ - be_t auAddr; - be_t auSize; - be_t reserved; b8 isRap; be_t userData; CellCodecTimeStamp pts; CellCodecTimeStamp dts; }; -struct CellDmuxPamfAttr; -struct CellDmuxPamfEsAttr; +using CellDmuxAuInfoEx = CellDmuxAuInfo; + +struct DmuxAuInfo +{ + CellDmuxAuInfo info; + vm::bptr specific_info; + be_t specific_info_size; +}; + +struct DmuxAuQueueElement +{ + be_t index; + u8 unk; // unused + DmuxAuInfo au_info; +}; + +CHECK_SIZE(DmuxAuQueueElement, 0x38); + +enum DmuxState : u32 +{ + DMUX_STOPPED = 1 << 0, + DMUX_RUNNING = 1 << 1, +}; + +struct alignas(0x10) DmuxContext // CellDmuxHandle = DmuxContext* +{ + vm::bptr _this; + be_t _this_size; + be_t version; + be_t dmux_state; + CellDmuxType dmux_type; + CellDmuxCb dmux_cb; + b8 stream_is_set; + vm::bptr core_handle; + be_t version_; // Same value as 'version' + be_t user_data; + be_t max_enabled_es_num; + be_t enabled_es_num; + be_t _dx_mhd; // sys_mutex_t + u8 reserved[0x7c]; +}; + +CHECK_SIZE_ALIGN(DmuxContext, 0xc0, 0x10); + +struct alignas(0x10) DmuxEsContext // CellDmuxEsHandle = DmuxEsContext* +{ + be_t _dx_mes; // sys_mutex_t + be_t is_enabled; + be_t error_mem_size; + be_t error_count; + vm::bptr error_mem_addr; + vm::bptr _this; + be_t _this_size; + be_t _this_index; + vm::bptr dmux_handle; + CellDmuxEsCb es_cb; + vm::bptr core_es_handle; + bf_t, 0, 1> flush_started; + + struct + { + be_t max_size; + be_t allocated_size; + be_t size; + be_t front; + be_t back; + be_t allocated_back; + } + au_queue; +}; + +CHECK_SIZE_ALIGN(DmuxEsContext, 0x50, 0x10); using DmuxNotifyDemuxDone = error_code(vm::ptr, u32, vm::ptr); using DmuxNotifyFatalErr = error_code(vm::ptr, u32, vm::ptr); @@ -301,13 +251,13 @@ using CellDmuxCoreOpResetStream = error_code(vm::ptr); using CellDmuxCoreOpCreateThread = error_code(vm::ptr); using CellDmuxCoreOpJoinThread = error_code(vm::ptr); using CellDmuxCoreOpSetStream = error_code(vm::ptr, vm::cptr, u32, b8, u64); -using CellDmuxCoreOpFreeMemory = error_code(vm::ptr, vm::ptr, u32); +using CellDmuxCoreOpReleaseAu = error_code(vm::ptr, vm::ptr, u32); using CellDmuxCoreOpQueryEsAttr = error_code(vm::cptr, vm::cptr, vm::ptr); using CellDmuxCoreOpEnableEs = error_code(vm::ptr, vm::cptr, vm::cptr, vm::cptr>, vm::cptr>, vm::cptr, vm::pptr); -using CellDmuxCoreOpDisableEs = u32(vm::ptr); -using CellDmuxCoreOpFlushEs = u32(vm::ptr); -using CellDmuxCoreOpResetEs = u32(vm::ptr); -using CellDmuxCoreOpResetStreamAndWaitDone = u32(vm::ptr); +using CellDmuxCoreOpDisableEs = error_code(vm::ptr); +using CellDmuxCoreOpFlushEs = error_code(vm::ptr); +using CellDmuxCoreOpResetEs = error_code(vm::ptr); +using CellDmuxCoreOpResetStreamAndWaitDone = error_code(vm::ptr); struct CellDmuxCoreOps { @@ -318,7 +268,7 @@ struct CellDmuxCoreOps vm::bptr createThread; vm::bptr joinThread; vm::bptr setStream; - vm::bptr freeMemory; + vm::bptr releaseAu; vm::bptr queryEsAttr; vm::bptr enableEs; vm::bptr disableEs; diff --git a/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp b/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp index 70162d4031..ecf59db508 100644 --- a/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp +++ b/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp @@ -1,121 +1,2860 @@ #include "stdafx.h" #include "Emu/Cell/PPUModule.h" -#include "Emu/IdManager.h" +#include "Emu/Cell/lv2/sys_cond.h" +#include "Emu/Cell/lv2/sys_memory.h" +#include "Emu/Cell/lv2/sys_mutex.h" +#include "Emu/Cell/lv2/sys_ppu_thread.h" +#include "Emu/Cell/lv2/sys_sync.h" +#include "sysPrxForUser.h" +#include "util/asm.hpp" -#include "cellDmux.h" #include "cellDmuxPamf.h" - +#include +#include vm::gvar g_cell_dmux_core_ops_pamf; vm::gvar g_cell_dmux_core_ops_raw_es; LOG_CHANNEL(cellDmuxPamf) +template <> +void fmt_class_string::format(std::string& out, u64 arg) +{ + format_enum(out, arg, [](CellDmuxPamfError value) + { + switch (value) + { + STR_CASE(CELL_DMUX_PAMF_ERROR_BUSY); + STR_CASE(CELL_DMUX_PAMF_ERROR_ARG); + STR_CASE(CELL_DMUX_PAMF_ERROR_UNKNOWN_STREAM); + STR_CASE(CELL_DMUX_PAMF_ERROR_NO_MEMORY); + STR_CASE(CELL_DMUX_PAMF_ERROR_FATAL); + } + + return unknown; + }); +} + +inline std::pair dmuxPamfStreamIdToTypeChannel(u16 stream_id, u16 private_stream_id) +{ + if ((stream_id & 0xf0) == 0xe0) + { + return { DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO, stream_id & 0x0f }; + } + + if ((stream_id & 0xff) != 0xbd) + { + return { DMUX_PAMF_STREAM_TYPE_INDEX_INVALID, 0 }; + } + + switch (private_stream_id & 0xf0) + { + case 0x40: return { DMUX_PAMF_STREAM_TYPE_INDEX_LPCM, private_stream_id & 0x0f }; + case 0x30: return { DMUX_PAMF_STREAM_TYPE_INDEX_AC3, private_stream_id & 0x0f }; + case 0x00: return { DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX, private_stream_id & 0x0f }; + case 0x20: return { DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA, private_stream_id & 0x0f }; + default: return { DMUX_PAMF_STREAM_TYPE_INDEX_INVALID, 0 }; + } +} + + +// SPU thread + +void dmux_pamf_base::output_queue::pop_back(u32 au_size) +{ + ensure(back - au_size >= buffer.data(), "Invalid au_size"); + back -= au_size; +} + +void dmux_pamf_base::output_queue::pop_back(u8* au_addr) +{ + ensure(au_addr >= buffer.data() && au_addr < std::to_address(buffer.end()), "Invalid au_addr"); + + // If au_begin is in front of the back pointer, unwrap the back pointer (there are no more access units behind the back pointer) + if (au_addr > back) + { + wrap_pos = buffer.data(); + } + + back = au_addr; +} + +void dmux_pamf_base::output_queue::pop_front(u32 au_size) +{ + ensure(front + au_size <= std::to_address(buffer.end()), "Invalid au_size"); + front += au_size; + + // When front reaches wrap_pos, unwrap the queue + if (wrap_pos != buffer.data() && wrap_pos <= front) + { + ensure(wrap_pos == front, "Invalid au_size"); + front = buffer.data(); + wrap_pos = buffer.data(); + } +} + +void dmux_pamf_base::output_queue::push_unchecked(const access_unit_chunk& au_chunk) +{ + std::ranges::copy(au_chunk.cached_data, back); + std::ranges::copy(au_chunk.data, back + au_chunk.cached_data.size()); + back += au_chunk.data.size() + au_chunk.cached_data.size(); +} + +bool dmux_pamf_base::output_queue::push(const access_unit_chunk& au_chunk, const std::function& on_fatal_error) +{ + // If there are any unconsumed access units behind the back pointer, the distance between the front and back pointers is the remaining capacity, + // otherwise the distance between the end of the buffer and the back pointer is the remaining capacity + if (wrap_pos == buffer.data()) + { + // Since it was already checked if there is enough space for au_max_size, this can only occur if the current access unit is larger than au_max_size + if (au_chunk.data.size() + au_chunk.cached_data.size() > static_cast(std::to_address(buffer.end()) - back)) + { + cellDmuxPamf.error("Access unit larger than specified maximum access unit size"); + on_fatal_error(); + return false; + } + } + else if (au_chunk.data.size() + au_chunk.cached_data.size() + 0x10 > static_cast(front - back)) // + sizeof(v128) because of SPU shenanigans probably + { + return false; + } + + push_unchecked(au_chunk); + return true; +} + +bool dmux_pamf_base::output_queue::prepare_next_au(u32 au_max_size) +{ + // LLE always checks the distance between the end of the buffer and the back pointer, even if the back pointer is wrapped around and there are unconsumed access units behind it + if (std::to_address(buffer.end()) - back < au_max_size) + { + // Can't wrap the back pointer around again as long as there are unconsumed access units behind it + if (wrap_pos != buffer.data()) + { + return false; + } + + wrap_pos = back; + back = buffer.data(); + } + + return true; +} + +void dmux_pamf_base::elementary_stream::flush_es() +{ + if (current_au.accumulated_size != 0) + { + ensure(au_queue.get_free_size() >= cache.size()); + au_queue.push_unchecked({ {}, cache }); + + current_au.accumulated_size += static_cast(cache.size()); + + ctx.on_au_found(get_stream_id().first, get_stream_id().second, user_data, { au_queue.peek_back(current_au.accumulated_size), current_au.accumulated_size }, current_au.pts, current_au.dts, + current_au.rap, au_specific_info_size, current_au.au_specific_info_buf); + } + + reset(); + + while (!ctx.on_flush_done(get_stream_id().first, get_stream_id().second, user_data)) {} // The flush_done event is repeatedly fired until it succeeds +} + +void dmux_pamf_base::elementary_stream::reset_es(u8* au_addr) +{ + if (!au_addr) + { + reset(); + au_queue.clear(); + } + else + { + au_queue.pop_back(au_addr); + } +} + +void dmux_pamf_base::elementary_stream::discard_access_unit() +{ + au_queue.pop_back(current_au.accumulated_size - static_cast(au_chunk.data.size() + au_chunk.cached_data.size())); + reset(); + cache.clear(); +} + +template +u32 dmux_pamf_base::elementary_stream::parse_audio_stream_header(std::span pes_packet_data) +{ + u32 extra_header_size_unk = 0; // No clue what this is, I have not found a single instance in any PAMF stream where it is something other than zero + + if (!au_size_unk) // For some reason, LLE uses the member that stores the size of user data access units here as bool + { + // Not checked on LLE + if (pes_packet_data.size() < sizeof(u32)) + { + return umax; + } + + extra_header_size_unk = read_from_ptr>(pes_packet_data) & extra_header_size_unk_mask; + au_size_unk = true; + } + + return extra_header_size_unk + sizeof(u32); +} + +bool dmux_pamf_base::elementary_stream::process_pes_packet_data() +{ + ensure(pes_packet_data, "set_pes_packet_data() should be used before process_stream()"); + + for (;;) + { + switch (state) + { + case state::initial: + if (stream_chunk.empty()) + { + pes_packet_data.reset(); + return true; + } + + // Parse the current stream section and increment the reading position by the amount that was consumed + stream_chunk = stream_chunk.subspan(parse_stream(stream_chunk)); + + current_au.accumulated_size += static_cast(au_chunk.data.size() + au_chunk.cached_data.size()); + + // If the beginning of a new access unit was found, set the current timestamps and rap indicator + if (!current_au.timestamps_rap_set && (current_au.state == access_unit::state::commenced || current_au.state == access_unit::state::m2v_sequence + || (current_au.state == access_unit::state::complete && au_chunk.cached_data.empty()))) + { + set_au_timestamps_rap(); + } + + state = state::pushing_au_queue; + [[fallthrough]]; + + case state::pushing_au_queue: + if (!au_chunk.data.empty() || !au_chunk.cached_data.empty()) + { + if (!au_queue.push(au_chunk, std::bind_front(&dmux_pamf_base::on_fatal_error, &ctx))) + { + ctx.on_au_queue_full(); + return false; + } + + au_chunk.data = {}; + au_chunk.cached_data.clear(); + } + + // This happens if the distance between two delimiters is greater than the size indicated in the info header of the stream. + if (current_au.state == access_unit::state::size_mismatch) + { + // LLE cuts off one byte from the beginning of the current PES packet data and then starts over. + pes_packet_data = pes_packet_data->subspan<1>(); + stream_chunk = *pes_packet_data; + + // It also removes the entire current access unit from the queue, even if it began in an earlier PES packet + au_queue.pop_back(current_au.accumulated_size); + current_au.accumulated_size = 0; + + state = state::initial; + continue; + } + + state = state::notifying_au_found; + [[fallthrough]]; + + case state::notifying_au_found: + if (current_au.state == access_unit::state::complete && !ctx.on_au_found(get_stream_id().first, get_stream_id().second, user_data, + { au_queue.peek_back(current_au.accumulated_size), current_au.accumulated_size }, current_au.pts, current_au.dts, current_au.rap, au_specific_info_size, current_au.au_specific_info_buf)) + { + return false; + } + + state = state::preparing_for_next_au; + [[fallthrough]]; + + case state::preparing_for_next_au: + if (current_au.state == access_unit::state::complete) + { + if (!au_queue.prepare_next_au(au_max_size)) + { + ctx.on_au_queue_full(); + return false; + } + + current_au = {}; + } + + state = state::initial; + } + } +} + +template +u32 dmux_pamf_base::video_stream::parse_stream(std::span stream) +{ + if (current_au.state != access_unit::state::none && (avc || current_au.state != access_unit::state::m2v_sequence)) + { + current_au.state = access_unit::state::incomplete; + } + + // Concatenate the cache of the previous stream section and the beginning of the current section + std::array buf{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; // Prevent false positives (M2V pic start code ends with 0x00) + ensure(cache.size() <= 3, "The size of the cache should never exceed three bytes"); + std::ranges::copy(cache, buf.begin()); + std::copy_n(stream.begin(), std::min(sizeof(u32) - 1, stream.size()), buf.begin() + cache.size()); // Not entirely accurate: LLE always reads three bytes from the stream, even if it is smaller than that + + auto au_chunk_begin = stream.begin(); + s32 cache_idx = 0; + auto stream_it = stream.begin(); + [&] + { + // Search for delimiter in cache + for (; cache_idx < static_cast(cache.size()); cache_idx++) + { + if (const be_t code = read_from_ptr>(buf.data(), cache_idx); + (avc && code == AVC_AU_DELIMITER) || (!avc && (code == M2V_PIC_START || code == M2V_SEQUENCE_HEADER || code == M2V_SEQUENCE_END))) + { + if (current_au.state != access_unit::state::none && (avc || current_au.state != access_unit::state::m2v_sequence)) + { + // The sequence end code is included in the access unit + // LLE increments the stream pointer instead of the cache index, which will cause the access unit to be corrupted at the end + if (!avc && code == M2V_SEQUENCE_END) + { + cellDmuxPamf.warning("M2V sequence end code in cache"); + stream_it += std::min(sizeof(u32), stream.size()); // Not accurate, LLE always increments by four, regardless of the stream size + } + + current_au.state = access_unit::state::complete; + return; + } + + // If current_au.state is none and there was a delimiter found here, then LLE outputs the entire cache, even if the access unit starts at cache_idx > 0 + + current_au.state = avc || code == M2V_PIC_START ? access_unit::state::commenced : access_unit::state::m2v_sequence; + } + } + + // Search for delimiter in stream + for (; stream_it <= stream.end() - sizeof(u32); stream_it++) + { + if (const be_t code = read_from_ptr>(stream_it); + (avc && code == AVC_AU_DELIMITER) || (!avc && (code == M2V_PIC_START || code == M2V_SEQUENCE_HEADER || code == M2V_SEQUENCE_END))) + { + if (current_au.state != access_unit::state::none && (avc || current_au.state != access_unit::state::m2v_sequence)) + { + stream_it += !avc && code == M2V_SEQUENCE_END ? sizeof(u32) : 0; // The sequence end code is included in the access unit + current_au.state = access_unit::state::complete; + return; + } + + au_chunk_begin = avc || current_au.state == access_unit::state::none ? stream_it : au_chunk_begin; + current_au.state = avc || code == M2V_PIC_START ? access_unit::state::commenced : access_unit::state::m2v_sequence; + } + } + }(); + + if (current_au.state != access_unit::state::none) + { + au_chunk.data = { au_chunk_begin, stream_it }; + std::copy_n(cache.begin(), cache_idx, std::back_inserter(au_chunk.cached_data)); + } + + cache.erase(cache.begin(), cache.begin() + cache_idx); + + // Cache the end of the stream if an access unit wasn't completed. There could be the beginning of a delimiter in the last three bytes + if (current_au.state != access_unit::state::complete) + { + std::copy(stream_it, stream.end(), std::back_inserter(cache)); + } + + return static_cast((current_au.state != access_unit::state::complete || stream_it > stream.end() ? stream.end() : stream_it) - stream.begin()); +} + +u32 dmux_pamf_base::lpcm_stream::parse_stream_header(std::span pes_packet_data, [[maybe_unused]] s8 pts_dts_flag) +{ + // Not checked on LLE + if (pes_packet_data.size() < sizeof(u8) + 0x10) + { + return umax; + } + + std::memcpy(au_specific_info_buf.data(), &pes_packet_data[1], au_specific_info_buf.size()); + return parse_audio_stream_header<0x7ff>(pes_packet_data); +} + +u32 dmux_pamf_base::lpcm_stream::parse_stream(std::span stream) +{ + if (current_au.state == access_unit::state::none) + { + current_au.au_specific_info_buf = au_specific_info_buf; + } + + if (au_max_size - current_au.accumulated_size > stream.size()) + { + au_chunk.data = stream; + current_au.state = current_au.state == access_unit::state::none ? access_unit::state::commenced : access_unit::state::incomplete; + } + else + { + au_chunk.data = stream.first(au_max_size - current_au.accumulated_size); + current_au.state = access_unit::state::complete; + } + + return static_cast(au_chunk.data.size()); +} + +template +u32 dmux_pamf_base::audio_stream::parse_stream(std::span stream) +{ + const auto parse_au_size = [](be_t data) -> u16 + { + if constexpr (ac3) + { + if (const u8 fscod = data >> 14, frmsizecod = data >> 8 & 0x3f; fscod < 3 && frmsizecod < 38) + { + return AC3_FRMSIZE_TABLE[fscod][frmsizecod] * sizeof(s16); + } + } + else if ((data & 0x3ff) < 0x200) + { + return ((data & 0x3ff) + 1) * 8 + ATRACX_ATS_HEADER_SIZE; + } + + return 0; + }; + + if (current_au.state != access_unit::state::none) + { + current_au.state = access_unit::state::incomplete; + } + + // Concatenate the cache of the previous stream section and the beginning of the current section + std::array buf{}; + ensure(cache.size() <= 3, "The size of the cache should never exceed three bytes"); + std::ranges::copy(cache, buf.begin()); + std::copy_n(stream.begin(), std::min(sizeof(u16) - 1, stream.size()), buf.begin() + cache.size()); + + auto au_chunk_begin = stream.begin(); + s32 cache_idx = 0; + auto stream_it = stream.begin(); + [&] + { + // Search for delimiter in cache + for (; cache_idx <= static_cast(cache.size() + std::min(sizeof(u16) - 1, stream.size()) - sizeof(u16)); cache_idx++) + { + if (const be_t tmp = read_from_ptr>(buf.data(), cache_idx); current_au.size_info_offset != 0) + { + if (--current_au.size_info_offset == 0) + { + current_au.parsed_size = parse_au_size(tmp); + } + } + else if (tmp == SYNC_WORD) + { + if (current_au.state == access_unit::state::none) + { + // If current_au.state is none and there was a delimiter found here, then LLE outputs the entire cache, even if the access unit starts at cache_idx > 0 + + current_au.size_info_offset = ac3 ? sizeof(u16) * 2 : sizeof(u16); + current_au.state = access_unit::state::commenced; + } + else if (const u32 au_size = current_au.accumulated_size + cache_idx; au_size >= current_au.parsed_size) + { + current_au.state = au_size == current_au.parsed_size ? access_unit::state::complete : access_unit::state::size_mismatch; + return; + } + } + } + + // As long as the current access unit hasn't reached the size indicated in its header, we don't need to parse the stream + if (current_au.state != access_unit::state::none && current_au.size_info_offset == 0 && current_au.accumulated_size + cache.size() < current_au.parsed_size) + { + stream_it += std::min(current_au.parsed_size - current_au.accumulated_size - cache.size(), stream.size() - sizeof(u32)); + } + + // Search for delimiter in stream + for (; stream_it <= stream.end() - sizeof(u32); stream_it++) // LLE uses sizeof(u32), even though the delimiter is only two bytes large + { + if (const be_t tmp = read_from_ptr>(stream_it); current_au.size_info_offset != 0) + { + if (--current_au.size_info_offset == 0) + { + current_au.parsed_size = parse_au_size(tmp); + } + } + else if (tmp == SYNC_WORD) + { + if (current_au.state == access_unit::state::none) + { + au_chunk_begin = stream_it; + current_au.size_info_offset = ac3 ? sizeof(u16) * 2 : sizeof(u16); + current_au.state = access_unit::state::commenced; + } + else if (const u32 au_size = static_cast(current_au.accumulated_size + stream_it - au_chunk_begin + cache.size()); au_size >= current_au.parsed_size) + { + current_au.state = au_size == current_au.parsed_size ? access_unit::state::complete : access_unit::state::size_mismatch; + return; + } + } + } + }(); + + if (current_au.state != access_unit::state::none) + { + au_chunk.data = { au_chunk_begin, stream_it }; + std::copy_n(cache.begin(), cache_idx, std::back_inserter(au_chunk.cached_data)); + } + + cache.erase(cache.begin(), cache.begin() + cache_idx); + + // Cache the end of the stream if an access unit wasn't completed. There could be the beginning of a delimiter in the last three bytes + if (current_au.state != access_unit::state::complete && current_au.state != access_unit::state::size_mismatch) + { + std::copy(stream_it, stream.end(), std::back_inserter(cache)); + } + + return static_cast((current_au.state != access_unit::state::complete ? stream.end() : stream_it) - stream.begin()); +} + +u32 dmux_pamf_base::user_data_stream::parse_stream_header(std::span pes_packet_data, s8 pts_dts_flag) +{ + if (pts_dts_flag < 0) // PTS field exists + { + // Not checked on LLE + if (pes_packet_data.size() < 2 + sizeof(u32)) + { + return umax; + } + + au_size_unk = read_from_ptr>(pes_packet_data.begin(), 2) - sizeof(u32); + return 10; + } + + return 2; +} + +u32 dmux_pamf_base::user_data_stream::parse_stream(std::span stream) +{ + if (au_size_unk > stream.size()) + { + au_chunk.data = stream; + au_size_unk -= static_cast(stream.size()); + current_au.state = access_unit::state::commenced; // User data streams always use commenced + } + else + { + au_chunk.data = stream.first(au_size_unk); + au_size_unk = 0; + current_au.state = access_unit::state::complete; + } + + return static_cast(stream.size()); // Always consume the entire stream +} + +bool dmux_pamf_base::enable_es(u32 stream_id, u32 private_stream_id, bool is_avc, std::span au_queue_buffer, u32 au_max_size, bool raw_es, u32 user_data) +{ + const auto [type_idx, channel] = dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id); + + if (type_idx == DMUX_PAMF_STREAM_TYPE_INDEX_INVALID || elementary_stream::is_enabled(elementary_streams[type_idx][channel])) + { + return false; + } + + this->raw_es = raw_es; + pack_es_type_idx = type_idx; + + switch (type_idx) + { + case DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO: + elementary_streams[0][channel] = is_avc ? static_cast>(std::make_unique>(channel, au_max_size, *this, user_data, au_queue_buffer)) + : std::make_unique>(channel, au_max_size, *this, user_data, au_queue_buffer); + return true; + + case DMUX_PAMF_STREAM_TYPE_INDEX_LPCM: elementary_streams[1][channel] = std::make_unique(channel, au_max_size, *this, user_data, au_queue_buffer); return true; + case DMUX_PAMF_STREAM_TYPE_INDEX_AC3: elementary_streams[2][channel] = std::make_unique>(channel, au_max_size, *this, user_data, au_queue_buffer); return true; + case DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX: elementary_streams[3][channel] = std::make_unique>(channel, au_max_size, *this, user_data, au_queue_buffer); return true; + case DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA: elementary_streams[4][channel] = std::make_unique(channel, au_max_size, *this, user_data, au_queue_buffer); return true; + default: fmt::throw_exception("Unreachable"); + } +} + +bool dmux_pamf_base::disable_es(u32 stream_id, u32 private_stream_id) +{ + const auto [type_idx, channel] = dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id); + + if (type_idx == DMUX_PAMF_STREAM_TYPE_INDEX_INVALID || !elementary_stream::is_enabled(elementary_streams[type_idx][channel])) + { + return false; + } + + elementary_streams[type_idx][channel] = nullptr; + return true; +} + +bool dmux_pamf_base::release_au(u32 stream_id, u32 private_stream_id, u32 au_size) const +{ + const auto [type_idx, channel] = dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id); + + if (type_idx == DMUX_PAMF_STREAM_TYPE_INDEX_INVALID || !elementary_stream::is_enabled(elementary_streams[type_idx][channel])) + { + return false; + } + + elementary_streams[type_idx][channel]->release_au(au_size); + return true; +} + +bool dmux_pamf_base::flush_es(u32 stream_id, u32 private_stream_id) +{ + const auto [type_idx, channel] = dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id); + + if (type_idx == DMUX_PAMF_STREAM_TYPE_INDEX_INVALID || !elementary_stream::is_enabled(elementary_streams[type_idx][channel])) + { + return false; + } + + state = state::initial; + elementary_streams[type_idx][channel]->flush_es(); + return true; +} + +void dmux_pamf_base::set_stream(std::span stream, bool continuity) +{ + if (!continuity) + { + std::ranges::for_each(elementary_streams | std::views::join | std::views::filter(elementary_stream::is_enabled), &elementary_stream::discard_access_unit); + } + + state = state::initial; + + // Not checked on LLE, it would parse old memory contents or uninitialized memory if the size of the input stream set by the user is not a multiple of 0x800. + // Valid PAMF streams are always a multiple of 0x800 bytes large. + if ((stream.size() & 0x7ff) != 0) + { + cellDmuxPamf.warning("Invalid stream size"); + } + + this->stream = stream; + demux_done_notified = false; +} + +void dmux_pamf_base::reset_stream() +{ + std::ranges::for_each(elementary_streams | std::views::join | std::views::filter(elementary_stream::is_enabled), &elementary_stream::discard_access_unit); + state = state::initial; + stream.reset(); +} + +bool dmux_pamf_base::reset_es(u32 stream_id, u32 private_stream_id, u8* au_addr) +{ + const auto [type_idx, channel] = dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id); + + if (type_idx == DMUX_PAMF_STREAM_TYPE_INDEX_INVALID || !elementary_stream::is_enabled(elementary_streams[type_idx][channel])) + { + return false; + } + + if (!au_addr) + { + state = state::initial; + } + + elementary_streams[type_idx][channel]->reset_es(au_addr); + return true; +} + +bool dmux_pamf_base::process_next_pack() +{ + if (!stream) + { + demux_done_notified = demux_done_notified || on_demux_done(); + return true; + } + + switch (state) + { + case state::initial: + { + // Search for the next pack start code or prog end code + std::span pack; + + for (;;) + { + if (stream->size() < PACK_STUFFING_LENGTH_OFFSET + sizeof(u8)) + { + stream.reset(); + demux_done_notified = on_demux_done(); + return true; + } + + pack = stream->first(std::min(stream->size(), PACK_SIZE)); + stream = stream->subspan(std::min(stream->size(), PACK_SIZE)); + + // If the input stream is a raw elementary stream, skip everything MPEG-PS related and go straight to elementary stream parsing + if (raw_es) + { + if (elementary_stream::is_enabled(elementary_streams[pack_es_type_idx][0])) + { + elementary_streams[pack_es_type_idx][0]->set_pes_packet_data(pack); + } + + state = state::elementary_stream; + return true; + } + + // While LLE is actually searching the entire section for a pack start code or program end code, + // it doesn't set its current reading position to the address where it found the code, so it would bug out if there isn't one at the start of the section + + if (const be_t code = read_from_ptr>(pack); code == PACK_START) + { + break; + } + else if (code == PROG_END) + { + if (!on_prog_end()) + { + state = state::prog_end; + } + + return true; + } + + cellDmuxPamf.warning("No start code found at the beginning of the current section"); + } + + // Skip over pack header + const u8 pack_stuffing_length = read_from_ptr>(pack, PACK_STUFFING_LENGTH_OFFSET); + + // Not checked on LLE, the SPU task would just increment the reading position and read random data in the SPU local store + if (PACK_STUFFING_LENGTH_OFFSET + sizeof(u8) + pack_stuffing_length + PES_HEADER_DATA_LENGTH_OFFSET + sizeof(u8) > pack.size()) + { + cellDmuxPamf.error("Invalid pack stuffing length"); + return false; + } + + std::span current_pes_packet = pack.subspan(PACK_STUFFING_LENGTH_OFFSET + sizeof(u8) + pack_stuffing_length); + + if (read_from_ptr, 8, 24>>(current_pes_packet) != PACKET_START_CODE_PREFIX) + { + cellDmuxPamf.error("Invalid start code after pack header"); + return false; + } + + // Skip over system header if present + if (read_from_ptr>(current_pes_packet) == SYSTEM_HEADER) + { + const u32 system_header_length = read_from_ptr>(current_pes_packet.begin(), PES_PACKET_LENGTH_OFFSET) + PES_PACKET_LENGTH_OFFSET + sizeof(u16); + + // Not checked on LLE, the SPU task would just increment the reading position and read random data in the SPU local store + if (system_header_length + PES_HEADER_DATA_LENGTH_OFFSET + sizeof(u8) > current_pes_packet.size()) + { + cellDmuxPamf.error("Invalid system header length"); + return false; + } + + current_pes_packet = current_pes_packet.subspan(system_header_length); + + // The SPU thread isn't doing load + rotate here for 4-byte loading (in valid PAMF streams, the next start code after a system header is always 0x10 byte aligned) + const u32 offset_low = (current_pes_packet.data() - pack.data()) & 0xf; + current_pes_packet = { current_pes_packet.begin() - offset_low, current_pes_packet.end() }; + + if (const be_t code = read_from_ptr>(current_pes_packet); code >> 8 != PACKET_START_CODE_PREFIX) + { + cellDmuxPamf.error("Invalid start code after system header"); + return false; + } + else if (code == PRIVATE_STREAM_2) + { + // A system header is optionally followed by a private stream 2 + // The first two bytes of the stream are the stream id of a video stream. The next access unit of that stream is a random access point/keyframe + + const u16 pes_packet_length = read_from_ptr>(current_pes_packet.begin(), PES_PACKET_LENGTH_OFFSET) + PES_PACKET_LENGTH_OFFSET + sizeof(u16); + + // Not checked on LLE, the SPU task would just increment the reading position and read random data in the SPU local store + if (pes_packet_length + PES_HEADER_DATA_LENGTH_OFFSET + sizeof(u8) > current_pes_packet.size()) + { + cellDmuxPamf.error("Invalid private stream 2 length"); + return false; + } + + if (const u8 channel = read_from_ptr>(current_pes_packet.begin(), PES_PACKET_LENGTH_OFFSET + sizeof(u16)) & 0xf; + elementary_stream::is_enabled(elementary_streams[0][channel])) + { + elementary_streams[0][channel]->set_rap(); + } + + current_pes_packet = current_pes_packet.subspan(pes_packet_length); + } + } + + // Parse PES packet + // LLE only parses the first PES packet per pack (valid PAMF streams only have one PES packet per pack, not including the system header + private stream 2) + + const u32 pes_packet_start_code = read_from_ptr>(current_pes_packet); + + if (pes_packet_start_code >> 8 != PACKET_START_CODE_PREFIX) + { + cellDmuxPamf.error("Invalid start code"); + return false; + } + + const u16 pes_packet_length = read_from_ptr>(current_pes_packet.begin(), PES_PACKET_LENGTH_OFFSET) + PES_PACKET_LENGTH_OFFSET + sizeof(u16); + const u8 pes_header_data_length = read_from_ptr(current_pes_packet.begin(), PES_HEADER_DATA_LENGTH_OFFSET) + PES_HEADER_DATA_LENGTH_OFFSET + sizeof(u8); + + // Not checked on LLE, the SPU task would just increment the reading position and read random data in the SPU local store + if (pes_packet_length > current_pes_packet.size() || pes_packet_length <= pes_header_data_length) + { + cellDmuxPamf.error("Invalid pes packet length"); + return false; + } + + const std::span pes_packet_data = current_pes_packet.subspan(pes_header_data_length, pes_packet_length - pes_header_data_length); + + const auto [type_idx, channel] = dmuxPamfStreamIdToTypeChannel(pes_packet_start_code, read_from_ptr(pes_packet_data)); + + if (type_idx == DMUX_PAMF_STREAM_TYPE_INDEX_INVALID) + { + cellDmuxPamf.error("Invalid stream type"); + return false; + } + + pack_es_type_idx = type_idx; + pack_es_channel = channel; + + if (elementary_stream::is_enabled(elementary_streams[type_idx][channel])) + { + const s8 pts_dts_flag = read_from_ptr(current_pes_packet.begin(), PTS_DTS_FLAG_OFFSET); + + if (pts_dts_flag < 0) + { + // The timestamps should be unsigned, but are sign-extended from s32 to u64 on LLE. They probably forgot about integer promotion + const s32 PTS_32_30 = read_from_ptr>(current_pes_packet.begin(), 9); + const s32 PTS_29_15 = read_from_ptr, 1, 15>>(current_pes_packet.begin(), 10); + const s32 PTS_14_0 = read_from_ptr, 1, 15>>(current_pes_packet.begin(), 12); + + elementary_streams[type_idx][channel]->set_pts(PTS_32_30 << 30 | PTS_29_15 << 15 | PTS_14_0); // Bit 32 is discarded + } + + if (pts_dts_flag & 0x40) + { + const s32 DTS_32_30 = read_from_ptr>(current_pes_packet.begin(), 14); + const s32 DTS_29_15 = read_from_ptr, 1, 15>>(current_pes_packet.begin(), 15); + const s32 DTS_14_0 = read_from_ptr, 1, 15>>(current_pes_packet.begin(), 17); + + elementary_streams[type_idx][channel]->set_dts(DTS_32_30 << 30 | DTS_29_15 << 15 | DTS_14_0); // Bit 32 is discarded + } + + const usz stream_header_size = elementary_streams[type_idx][channel]->parse_stream_header(pes_packet_data, pts_dts_flag); + + // Not checked on LLE, the SPU task would just increment the reading position and read random data in the SPU local store + if (stream_header_size > pes_packet_data.size()) + { + cellDmuxPamf.error("Invalid stream header size"); + return false; + } + + elementary_streams[type_idx][channel]->set_pes_packet_data(pes_packet_data.subspan(stream_header_size)); + } + + state = state::elementary_stream; + [[fallthrough]]; + } + case state::elementary_stream: + { + if (!elementary_stream::is_enabled(elementary_streams[pack_es_type_idx][pack_es_channel]) || elementary_streams[pack_es_type_idx][pack_es_channel]->process_pes_packet_data()) + { + state = state::initial; + } + + return true; + } + case state::prog_end: + { + if (on_prog_end()) + { + state = state::initial; + } + + return true; + } + default: + fmt::throw_exception("Unreachable"); + } +} + +u32 dmux_pamf_base::get_enabled_es_count() const +{ + return static_cast(std::ranges::count_if(elementary_streams | std::views::join, elementary_stream::is_enabled)); +} + +bool dmux_pamf_spu_context::get_next_cmd(DmuxPamfCommand& lhs, bool new_stream) const +{ + cellDmuxPamf.trace("Getting next command"); + + if (cmd_queue->pop(lhs)) + { + cellDmuxPamf.trace("Command type: %d", static_cast(lhs.type.get())); + return true; + } + + if ((new_stream || has_work()) && !wait_for_au_queue && !wait_for_event_queue) + { + cellDmuxPamf.trace("No new command, continuing demuxing"); + return false; + } + + cellDmuxPamf.trace("No new command and nothing to do, waiting..."); + + cmd_queue->wait(); + + if (thread_ctrl::state() == thread_state::aborting) + { + return false; + } + + ensure(cmd_queue->pop(lhs)); + + cellDmuxPamf.trace("Command type: %d", static_cast(lhs.type.get())); + return true; +} + +bool dmux_pamf_spu_context::send_event(auto&&... args) const +{ + if (event_queue->size() >= max_enqueued_events) + { + return false; + } + + return ensure(event_queue->emplace(std::forward(args)..., event_queue_was_too_full)); +} + +void dmux_pamf_spu_context::operator()() // cellSpursMain() +{ + DmuxPamfCommand cmd; + + while (thread_ctrl::state() != thread_state::aborting) + { + if (get_next_cmd(cmd, new_stream)) + { + event_queue_was_too_full = wait_for_event_queue; + wait_for_event_queue = false; + wait_for_au_queue = false; + + ensure(cmd_result_queue->emplace(static_cast(cmd.type.value()) + 1)); + + switch (cmd.type) + { + case DmuxPamfCommandType::enable_es: + max_enqueued_events += 2; + enable_es(cmd.enable_es.stream_id, cmd.enable_es.private_stream_id, cmd.enable_es.is_avc, { cmd.enable_es.au_queue_buffer.get_ptr(), cmd.enable_es.au_queue_buffer_size }, + cmd.enable_es.au_max_size, cmd.enable_es.is_raw_es, cmd.enable_es.user_data); + break; + + case DmuxPamfCommandType::disable_es: + disable_es(cmd.disable_flush_es.stream_id, cmd.disable_flush_es.private_stream_id); + max_enqueued_events -= 2; + break; + + case DmuxPamfCommandType::set_stream: + new_stream = true; + break; + + case DmuxPamfCommandType::release_au: + release_au(cmd.release_au.stream_id, cmd.release_au.private_stream_id, cmd.release_au.au_size); + break; + + case DmuxPamfCommandType::flush_es: + flush_es(cmd.disable_flush_es.stream_id, cmd.disable_flush_es.private_stream_id); + break; + + case DmuxPamfCommandType::close: + while (!send_event(DmuxPamfEventType::close)) {} + return; + + case DmuxPamfCommandType::reset_stream: + reset_stream(); + break; + + case DmuxPamfCommandType::reset_es: + reset_es(cmd.reset_es.stream_id, cmd.reset_es.private_stream_id, cmd.reset_es.au_addr ? cmd.reset_es.au_addr.get_ptr() : nullptr); + break; + + case DmuxPamfCommandType::resume: + break; + + default: + cellDmuxPamf.error("Invalid command"); + return; + } + } + else if (thread_ctrl::state() == thread_state::aborting) + { + return; + } + + // Only set the new stream once the previous one has been entirely consumed + if (new_stream && !has_work()) + { + new_stream = false; + + DmuxPamfStreamInfo stream_info; + ensure(stream_info_queue->pop(stream_info)); + + set_stream({ stream_info.stream_addr.get_ptr(), stream_info.stream_size }, stream_info.continuity); + + // Delay demuxing a bit + // Prevents White Knight Chronicles II FMVs from freezing, since events are otherwise fired before the game has finished initializing FMV playback + thread_ctrl::wait_for(1'500); + } + + process_next_pack(); + } +} + +void dmux_pamf_base::elementary_stream::save(utils::serial& ar) +{ + // These need to be saved first since they need to be initialized in the constructor's initializer list + if (ar.is_writing()) + { + ar(au_max_size, user_data); + au_queue.save(ar); + } + + ar(state, au_size_unk, au_specific_info_buf, current_au, pts, dts, rap); + + if (state == state::pushing_au_queue) + { + ar(au_chunk.cached_data); + + if (ar.is_writing()) + { + ar(vm::get_addr(au_chunk.data.data()), static_cast(au_chunk.data.size())); + } + else + { + au_chunk.data = { vm::_ptr(ar.pop()), ar.pop() }; + } + } + + if (current_au.state != access_unit::state::complete) + { + ar(cache); + } + + bool save_stream = !!pes_packet_data; + ar(save_stream); + + if (save_stream) + { + if (ar.is_writing()) + { + ensure(stream_chunk.size() <= pes_packet_data->size()); + ar(vm::get_addr(pes_packet_data->data()), static_cast(pes_packet_data->size()), static_cast(stream_chunk.data() - pes_packet_data->data())); + } + else + { + pes_packet_data = { vm::_ptr(ar.pop()), ar.pop() }; + stream_chunk = { pes_packet_data->begin() + ar.pop(), pes_packet_data->end() }; + } + } +} + +void dmux_pamf_base::save_base(utils::serial& ar) +{ + bool stream_not_consumed = !!stream; + + ar(state, stream_not_consumed, demux_done_notified, pack_es_type_idx, raw_es); + + if (stream_not_consumed) + { + if (ar.is_writing()) + { + ar(vm::get_addr(stream->data()), static_cast(stream->size())); + } + else + { + stream = std::span{ vm::_ptr(ar.pop()), ar.pop() }; + } + } + + if (state == state::elementary_stream) + { + ar(pack_es_channel); + } + + std::array enabled_video_streams; + std::array avc_video_streams; + std::array enabled_lpcm_streams; + std::array enabled_ac3_streams; + std::array enabled_atracx_streams; + std::array enabled_user_data_streams; + + if (ar.is_writing()) + { + std::ranges::transform(elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO], enabled_video_streams.begin(), elementary_stream::is_enabled); + std::ranges::transform(elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO], avc_video_streams.begin(), [](auto& es){ return !!dynamic_cast*>(es.get()); }); + std::ranges::transform(elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_LPCM], enabled_lpcm_streams.begin(), elementary_stream::is_enabled); + std::ranges::transform(elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_AC3], enabled_ac3_streams.begin(), elementary_stream::is_enabled); + std::ranges::transform(elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX], enabled_atracx_streams.begin(), elementary_stream::is_enabled); + std::ranges::transform(elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA], enabled_user_data_streams.begin(), elementary_stream::is_enabled); + } + + ar(enabled_video_streams, avc_video_streams, enabled_lpcm_streams, enabled_ac3_streams, enabled_atracx_streams, enabled_user_data_streams); + + if (ar.is_writing()) + { + std::ranges::for_each(elementary_streams | std::views::join | std::views::filter(elementary_stream::is_enabled), [&](const auto& es){ es->save(ar); }); + } + else + { + for (u32 ch = 0; ch < 0x10; ch++) + { + if (enabled_video_streams[ch]) + { + elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO][ch] = avc_video_streams[ch] ? static_cast>(std::make_unique>(ar, ch, *this)) + : std::make_unique>(ar, ch, *this); + } + } + + for (u32 ch = 0; ch < 0x10; ch++) + { + if (enabled_lpcm_streams[ch]) + { + elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_LPCM][ch] = std::make_unique(ar, ch, *this); + } + } + + for (u32 ch = 0; ch < 0x10; ch++) + { + if (enabled_ac3_streams[ch]) + { + elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_AC3][ch] = std::make_unique>(ar, ch, *this); + } + } + + for (u32 ch = 0; ch < 0x10; ch++) + { + if (enabled_atracx_streams[ch]) + { + elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX][ch] = std::make_unique>(ar, ch, *this); + } + } + + for (u32 ch = 0; ch < 0x10; ch++) + { + if (enabled_user_data_streams[ch]) + { + elementary_streams[DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA][ch] = std::make_unique(ar, ch, *this); + } + } + } +} + +void dmux_pamf_spu_context::save(utils::serial& ar) +{ + USING_SERIALIZATION_VERSION(cellDmuxPamf); + ar(cmd_queue, new_stream); // The queues are contiguous in guest memory, so we only need to save the address of the first one + save_base(ar); +} + + +// PPU thread + +template +static auto lv2_syscall(ppu_thread& ppu, auto&&... args) +{ + const auto ret = Syscall(ppu, std::forward(args)...); + + if (ppu.test_stopped()) + { + ppu.state += cpu_flag::again; + } + + return ret; +} + +template +void DmuxPamfContext::send_spu_command_and_wait(ppu_thread& ppu, bool waiting_for_spu_state, auto&&... cmd_params) +{ + if (!waiting_for_spu_state) + { + // The caller is supposed to own the mutex until the SPU thread has consumed the command, so the queue should always be empty here + ensure(cmd_queue.emplace(type, std::forward(cmd_params)...), "The command queue wasn't empty"); + } + + lv2_obj::sleep(ppu); + + // Block until the SPU thread has consumed the command + cmd_result_queue.wait(); + + if (ppu.check_state()) + { + ppu.state += cpu_flag::again; + return; + } + + be_t result{}; + ensure(cmd_result_queue.pop(result), "The result queue was empty"); + ensure(result == static_cast(type) + 1, "The HLE SPU thread sent an invalid result"); +} + +DmuxPamfElementaryStream* DmuxPamfContext::find_es(u16 stream_id, u16 private_stream_id) +{ + const auto it = dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id).first == DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO + ? std::ranges::find_if(elementary_streams | std::views::reverse, [&](const auto& es){ return es && es->stream_id == stream_id; }) + : std::ranges::find_if(elementary_streams | std::views::reverse, [&](const auto& es){ return es && es->stream_id == stream_id && es->private_stream_id == private_stream_id; }); + + return it != std::ranges::rend(elementary_streams) ? it->get_ptr() : nullptr; +} + +error_code DmuxPamfContext::wait_au_released_or_stream_reset(ppu_thread& ppu, u64 au_queue_full_bitset, b8& stream_reset_started, dmux_pamf_state& savestate) +{ + if (savestate == dmux_pamf_state::waiting_for_au_released) + { + goto label1_waiting_for_au_released_state; + } + + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + return {}; + } + + if (au_queue_full_bitset) + { + cellDmuxPamf.trace("Access unit queue of elementary stream no. %d is full. Waiting for access unit to be released...", std::countr_zero(au_queue_full_bitset)); + + while (!(au_queue_full_bitset & au_released_bitset) && !stream_reset_requested) + { + savestate = dmux_pamf_state::waiting_for_au_released; + label1_waiting_for_au_released_state: + + if (lv2_syscall(ppu, cond, 0) != CELL_OK) + { + lv2_syscall(ppu, mutex); + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + return {}; + } + } + + cellDmuxPamf.trace("Access unit released"); + } + + stream_reset_started = stream_reset_requested; + stream_reset_requested = false; + + au_released_bitset = 0; + + return lv2_syscall(ppu, mutex) != CELL_OK ? static_cast(CELL_DMUX_PAMF_ERROR_FATAL) : CELL_OK; +} + +template +error_code DmuxPamfContext::set_au_reset(ppu_thread& ppu) +{ + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + return {}; + } + + std::ranges::for_each(elementary_streams | std::views::filter([](auto es){ return !!es; }), [](auto& reset_next_au) { reset_next_au = reset; }, &DmuxPamfElementaryStream::reset_next_au); + + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; +} + +template +error_code DmuxPamfContext::callback(ppu_thread& ppu, DmuxCb cb, auto&&... args) +{ + std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock.owns_lock()) + { + ppu.state += cpu_flag::again; + return {}; + } + + return cb.cbFunc(ppu, std::forward(args)..., cb.cbArg); +} + +void DmuxPamfContext::run_spu_thread() +{ + hle_spu_thread_id = idm::make(cmd_queue_addr, cmd_result_queue_addr, stream_info_queue_addr, event_queue_addr); +} + +void DmuxPamfContext::exec(ppu_thread& ppu) +{ + // This is repeated a lot in this function, in my opinion using a define here makes it more readable +#define RETURN_ON_CPU_FLAG_AGAIN()\ + if (ppu.state & cpu_flag::again)\ + return + + switch (savestate) + { + case dmux_pamf_state::initial: break; + case dmux_pamf_state::waiting_for_au_released: goto label1_waiting_for_au_released_state; + case dmux_pamf_state::waiting_for_au_released_error: goto label2_waiting_for_au_released_error_state; + case dmux_pamf_state::waiting_for_event: goto label3_waiting_for_event_state; + case dmux_pamf_state::starting_demux_done: goto label4_starting_demux_done_state; + case dmux_pamf_state::starting_demux_done_mutex_lock_error: goto label5_starting_demux_done_mutex_lock_error_state; + case dmux_pamf_state::starting_demux_done_mutex_unlock_error: goto label6_starting_demux_done_mutex_unlock_error_state; + case dmux_pamf_state::starting_demux_done_checking_stream_reset: goto label7_starting_demux_done_check_stream_reset_state; + case dmux_pamf_state::starting_demux_done_checking_stream_reset_error: goto label8_start_demux_done_check_stream_reset_error_state; + case dmux_pamf_state::setting_au_reset: goto label9_setting_au_reset_state; + case dmux_pamf_state::setting_au_reset_error: goto label10_setting_au_reset_error_state; + case dmux_pamf_state::processing_event: goto label11_processing_event_state; + case dmux_pamf_state::au_found_waiting_for_spu: goto label12_au_found_waiting_for_spu_state; + case dmux_pamf_state::unsetting_au_reset: goto label13_unsetting_au_reset_state; + case dmux_pamf_state::demux_done_notifying: goto label14_demux_done_notifying_state; + case dmux_pamf_state::demux_done_mutex_lock: goto label15_demux_done_mutex_lock_state; + case dmux_pamf_state::demux_done_cond_signal: goto label16_demux_done_cond_signal_state; + case dmux_pamf_state::resuming_demux_mutex_lock: goto label17_resuming_demux_mutex_lock_state; + case dmux_pamf_state::resuming_demux_waiting_for_spu: goto label18_resuming_demux_waiting_for_spu_state; + case dmux_pamf_state::sending_fatal_err: ; // Handled below + } + + for (;;) + { + if (savestate == dmux_pamf_state::sending_fatal_err) + { + callback(ppu, notify_fatal_err, _this, CELL_OK); + RETURN_ON_CPU_FLAG_AGAIN(); + } + + savestate = dmux_pamf_state::initial; + + stream_reset_started = false; + + // If the access unit queue of an enabled elementary stream is full, wait until the user releases an access unit or requests a stream reset before processing the next event + label1_waiting_for_au_released_state: + + if (wait_au_released_or_stream_reset(ppu, au_queue_full_bitset, stream_reset_started, savestate) != CELL_OK) + { + savestate = dmux_pamf_state::waiting_for_au_released_error; + label2_waiting_for_au_released_error_state: + + callback(ppu, notify_fatal_err, _this, CELL_OK); + } + + RETURN_ON_CPU_FLAG_AGAIN(); + + // Wait for the next event + if (!event_queue.peek(event)) + { + savestate = dmux_pamf_state::waiting_for_event; + label3_waiting_for_event_state: + + cellDmuxPamf.trace("Waiting for the next event..."); + + lv2_obj::sleep(ppu); + event_queue.wait(); + + if (ppu.check_state()) + { + ppu.state += cpu_flag::again; + return; + } + + ensure(event_queue.peek(event)); + } + + cellDmuxPamf.trace("Event type: %d", static_cast(event.type.get())); + + // If the event is a demux done event, set the sequence state to resetting and check for a potential stream reset request again + if (event.type == DmuxPamfEventType::demux_done) + { + savestate = dmux_pamf_state::starting_demux_done; + label4_starting_demux_done_state: + + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + savestate = dmux_pamf_state::starting_demux_done_mutex_lock_error; + label5_starting_demux_done_mutex_lock_error_state: + + callback(ppu, notify_fatal_err, _this, CELL_OK); + } + + RETURN_ON_CPU_FLAG_AGAIN(); + + sequence_state = DmuxPamfSequenceState::resetting; + + if (lv2_syscall(ppu, mutex) != CELL_OK) + { + savestate = dmux_pamf_state::starting_demux_done_mutex_unlock_error; + label6_starting_demux_done_mutex_unlock_error_state: + + callback(ppu, notify_fatal_err, _this, CELL_OK); + + RETURN_ON_CPU_FLAG_AGAIN(); + } + + if (!stream_reset_started) + { + savestate = dmux_pamf_state::starting_demux_done_checking_stream_reset; + label7_starting_demux_done_check_stream_reset_state: + + if (wait_au_released_or_stream_reset(ppu, 0, stream_reset_started, savestate) != CELL_OK) + { + savestate = dmux_pamf_state::starting_demux_done_checking_stream_reset_error; + label8_start_demux_done_check_stream_reset_error_state: + + callback(ppu, notify_fatal_err, _this, CELL_OK); + } + + RETURN_ON_CPU_FLAG_AGAIN(); + } + } + + // If the user requested a stream reset, set the reset flag for every enabled elementary stream + if (stream_reset_started) + { + stream_reset_in_progress = true; + + savestate = dmux_pamf_state::setting_au_reset; + label9_setting_au_reset_state: + + if (set_au_reset(ppu) != CELL_OK) + { + savestate = dmux_pamf_state::setting_au_reset_error; + label10_setting_au_reset_error_state: + + callback(ppu, notify_fatal_err, _this, CELL_OK); + } + + RETURN_ON_CPU_FLAG_AGAIN(); + } + + savestate = dmux_pamf_state::processing_event; + label11_processing_event_state: + + switch (event.type) + { + case DmuxPamfEventType::au_found: + { + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + RETURN_ON_CPU_FLAG_AGAIN(); + + label12_au_found_waiting_for_spu_state: + + DmuxPamfElementaryStream* const es = find_es(event.au_found.stream_id, event.au_found.private_stream_id); + + // If the elementary stream of the found access unit is not enabled, don't do anything + if (!es || es->_this.get_ptr() != es || es->es_id != event.au_found.user_data) + { + if (lv2_syscall(ppu, mutex) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + break; + } + + // If a stream reset was requested, don't notify the user of any found access units that are still in the event queue + // We need to send the SPU thread the address of the first found access unit for each elementary stream still in the event queue, + // so that it can remove the access units from the queue. + if (stream_reset_in_progress) + { + if (es->reset_next_au) + { + send_spu_command_and_wait(ppu, savestate == dmux_pamf_state::au_found_waiting_for_spu, + event.au_found.stream_id, event.au_found.private_stream_id, event.au_found.au_addr); + + if (ppu.state & cpu_flag::again) + { + savestate = dmux_pamf_state::au_found_waiting_for_spu; + return; + } + + es->reset_next_au = false; + } + + if (lv2_syscall(ppu, mutex) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + break; + } + + const vm::var au_info; + au_info->addr = std::bit_cast>(event.au_found.au_addr); + au_info->size = event.au_found.au_size; + au_info->pts = event.au_found.pts; + au_info->dts = event.au_found.dts; + au_info->user_data = user_data; + au_info->specific_info = es->_this.ptr(&DmuxPamfElementaryStream::au_specific_info); + au_info->specific_info_size = es->au_specific_info_size; + au_info->is_rap = static_cast(event.au_found.is_rap); + + if (!is_raw_es && dmuxPamfStreamIdToTypeChannel(event.au_found.stream_id, event.au_found.private_stream_id).first == DMUX_PAMF_STREAM_TYPE_INDEX_LPCM) + { + es->au_specific_info[0] = read_from_ptr>(event.au_found.stream_header_buf); + es->au_specific_info[1] = read_from_ptr>(event.au_found.stream_header_buf); + es->au_specific_info[2] = read_from_ptr>(event.au_found.stream_header_buf, 1); + } + + if (lv2_syscall(ppu, mutex) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + if (callback(ppu, es->notify_au_found, es->_this, au_info) != CELL_OK) + { + // If the callback returns an error, the access unit queue for this elementary stream is full + au_queue_full_bitset |= 1ull << es->this_index; + continue; + } + + RETURN_ON_CPU_FLAG_AGAIN(); + + break; + } + case DmuxPamfEventType::demux_done: + { + if (stream_reset_in_progress) + { + stream_reset_in_progress = false; + + savestate = dmux_pamf_state::unsetting_au_reset; + label13_unsetting_au_reset_state: + + if (set_au_reset(ppu) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + RETURN_ON_CPU_FLAG_AGAIN(); + } + + savestate = dmux_pamf_state::demux_done_notifying; + label14_demux_done_notifying_state: + + callback(ppu, notify_demux_done, _this, CELL_OK); + + RETURN_ON_CPU_FLAG_AGAIN(); + + savestate = dmux_pamf_state::demux_done_mutex_lock; + label15_demux_done_mutex_lock_state: + + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + RETURN_ON_CPU_FLAG_AGAIN(); + + if (sequence_state == DmuxPamfSequenceState::resetting) + { + sequence_state = DmuxPamfSequenceState::dormant; + + savestate = dmux_pamf_state::demux_done_cond_signal; + label16_demux_done_cond_signal_state: + + if (lv2_syscall(ppu, cond) != CELL_OK) + { + lv2_syscall(ppu, mutex); + + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + RETURN_ON_CPU_FLAG_AGAIN(); + } + + if (lv2_syscall(ppu, mutex) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + break; + } + case DmuxPamfEventType::close: + { + while (event_queue.pop()){} // Clear the event queue + return; + } + case DmuxPamfEventType::flush_done: + { + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + RETURN_ON_CPU_FLAG_AGAIN(); + + DmuxPamfElementaryStream* const es = find_es(event.flush_done.stream_id, event.flush_done.private_stream_id); + const bool valid = es && es->_this.get_ptr() == es && es->es_id == event.flush_done.user_data; + + if (lv2_syscall(ppu, mutex) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + if (valid) + { + callback(ppu, es->notify_flush_done, es->_this); + + RETURN_ON_CPU_FLAG_AGAIN(); + } + + break; + } + case DmuxPamfEventType::prog_end_code: + { + callback(ppu, notify_prog_end_code, _this); + + RETURN_ON_CPU_FLAG_AGAIN(); + + break; + } + case DmuxPamfEventType::fatal_error: + { + ensure(event_queue.pop()); + + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + default: + fmt::throw_exception("Invalid event"); + } + + ensure(event_queue.pop()); + + // If there are too many events enqueued, the SPU thread will stop demuxing until it receives a new command. + // Once the event queue size is reduced to two, send a resume command + if (enabled_es_num >= 0 && event_queue.size() == 2) + { + savestate = dmux_pamf_state::resuming_demux_mutex_lock; + label17_resuming_demux_mutex_lock_state: + + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + + RETURN_ON_CPU_FLAG_AGAIN(); + + if (enabled_es_num >= 0) + { + ensure(cmd_queue.emplace(DmuxPamfCommandType::resume)); + + savestate = dmux_pamf_state::resuming_demux_waiting_for_spu; + label18_resuming_demux_waiting_for_spu_state: + + lv2_obj::sleep(ppu); + cmd_result_queue.wait(); + + if (ppu.check_state()) + { + ppu.state += cpu_flag::again; + return; + } + + ensure(cmd_result_queue.pop()); + } + + if (lv2_syscall(ppu, mutex) != CELL_OK) + { + savestate = dmux_pamf_state::sending_fatal_err; + continue; + } + } + + au_queue_full_bitset = 0; + } +} + +void dmuxPamfEntry(ppu_thread& ppu, vm::ptr dmux) +{ + dmux->exec(ppu); + + if (ppu.state & cpu_flag::again) + { + ppu.syscall_args[0] = dmux.addr(); + return; + } + + ppu_execute<&sys_ppu_thread_exit>(ppu, CELL_OK); +} + +error_code dmuxPamfVerifyEsSpecificInfo(u16 stream_id, u16 private_stream_id, bool is_avc, vm::cptr es_specific_info) +{ + // The meaning of error code value 5 in here is inconsistent with how it's used elsewhere for some reason + + if (!es_specific_info) + { + return CELL_OK; + } + + switch (dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id).first) + { + case DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO: + if (is_avc) + { + if (const u32 level = vm::static_ptr_cast(es_specific_info)->level; + level != CELL_DMUX_PAMF_AVC_LEVEL_2P1 && level != CELL_DMUX_PAMF_AVC_LEVEL_3P0 && level != CELL_DMUX_PAMF_AVC_LEVEL_3P1 + && level != CELL_DMUX_PAMF_AVC_LEVEL_3P2 && level != CELL_DMUX_PAMF_AVC_LEVEL_4P1 && level != CELL_DMUX_PAMF_AVC_LEVEL_4P2) + { + return 5; + } + } + else if (vm::static_ptr_cast(es_specific_info)->profileLevel > CELL_DMUX_PAMF_M2V_MP_HL) + { + return 5; + } + + return CELL_OK; + + case DMUX_PAMF_STREAM_TYPE_INDEX_LPCM: + if (const auto [sampling_freq, nch, bps] = *vm::static_ptr_cast(es_specific_info); + sampling_freq != CELL_DMUX_PAMF_FS_48K || (nch != 1u && nch != 2u && nch != 6u && nch != 8u) || (bps != CELL_DMUX_PAMF_BITS_PER_SAMPLE_16 && bps != CELL_DMUX_PAMF_BITS_PER_SAMPLE_24)) + { + return 5; + } + + return CELL_OK; + + case DMUX_PAMF_STREAM_TYPE_INDEX_AC3: + case DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX: + case DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA: + return CELL_OK; + + default: + return 5; + } +} + +template +u32 dmuxPamfGetAuSpecificInfoSize(u16 stream_id, u16 private_stream_id, bool is_avc) +{ + if constexpr (raw_es) + { + return 0; + } + + switch (dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id).first) + { + case DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO: + if (is_avc) + { + return 4; // LLE returns four, even though CellDmuxPamfAuSpecificInfoAvc only has a reserved field like the others + } + + return 0; + + case DMUX_PAMF_STREAM_TYPE_INDEX_LPCM: + case DMUX_PAMF_STREAM_TYPE_INDEX_AC3: // LLE returns three, even though CellDmuxPamfAuSpecificInfoAc3 only has a reserved field like the others + return 3; + + case DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX: + case DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA: + default: + return 0; + } +} + +u32 dmuxPamfGetAuQueueMaxSize(u16 stream_id, u16 private_stream_id) +{ + switch (dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id).first) + { + case DMUX_PAMF_STREAM_TYPE_INDEX_LPCM: + return 0x100; + + case DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO: + case DMUX_PAMF_STREAM_TYPE_INDEX_AC3: + case DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX: + case DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA: + return 0x40; + + default: + return 0; + } +} + +u32 dmuxPamfGetLpcmAuSize(vm::cptr lpcm_info) +{ + return lpcm_info->samplingFreq * lpcm_info->bitsPerSample / CHAR_BIT * (lpcm_info->numOfChannels + (lpcm_info->numOfChannels & 1)) / DMUX_PAMF_LPCM_FRAMES_PER_SEC; // Streams with an odd number of channels contain an empty dummy channel +} + +u32 dmuxPamfGetAuQueueBufferSize(u16 stream_id, u16 private_stream_id, bool is_avc, vm::cptr es_specific_info) +{ + switch (dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id).first) + { + case DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO: + if (is_avc) + { + if (!es_specific_info) + { + return 0x46a870; + } + + switch (vm::static_ptr_cast(es_specific_info)->level) + { + case CELL_DMUX_PAMF_AVC_LEVEL_2P1: return 0xb00c0; + case CELL_DMUX_PAMF_AVC_LEVEL_3P0: return 0x19f2e0; + case CELL_DMUX_PAMF_AVC_LEVEL_3P1: return 0x260120; + case CELL_DMUX_PAMF_AVC_LEVEL_3P2: return 0x35f6c0; + case CELL_DMUX_PAMF_AVC_LEVEL_4P1: return 0x45e870; + case CELL_DMUX_PAMF_AVC_LEVEL_4P2: // Same as below + default: return 0x46a870; + } + } + + if (es_specific_info && vm::static_ptr_cast(es_specific_info)->profileLevel > CELL_DMUX_PAMF_M2V_MP_ML) + { + return 0x255000; + } + + return 0x70000; + + case DMUX_PAMF_STREAM_TYPE_INDEX_LPCM: + { + if (!es_specific_info) + { + return 0x104380; + } + + const u32 nch = vm::static_ptr_cast(es_specific_info)->numOfChannels; + const u32 lpcm_au_size = dmuxPamfGetLpcmAuSize(vm::static_ptr_cast(es_specific_info)); + + if (vm::static_ptr_cast(es_specific_info)->samplingFreq <= 96000) + { + if (nch > 0 && nch <= 2) + { + return 0x20000 + lpcm_au_size; + } + + if (nch <= 6) + { + return 0x60000 + lpcm_au_size; + } + + if (nch <= 8) + { + return 0x80000 + lpcm_au_size; + } + + return lpcm_au_size; + } + + if (nch > 0 && nch <= 2) + { + return 0x60000 + lpcm_au_size; + } + + if (nch <= 6) + { + return 0x100000 + lpcm_au_size; + } + + return lpcm_au_size; + } + case DMUX_PAMF_STREAM_TYPE_INDEX_AC3: + return 0xa000; + + case DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX: + return 0x6400; + + case DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA: + return 0x160000; + + default: + return 0; + } +} + +template +u32 dmuxPamfGetEsMemSize(u16 stream_id, u16 private_stream_id, bool is_avc, vm::cptr es_specific_info) +{ + return dmuxPamfGetAuSpecificInfoSize(stream_id, private_stream_id, is_avc) * dmuxPamfGetAuQueueMaxSize(stream_id, private_stream_id) + + dmuxPamfGetAuQueueBufferSize(stream_id, private_stream_id, is_avc, es_specific_info) + 0x7f + static_cast(sizeof(DmuxPamfElementaryStream)) + 0xf; +} + +error_code dmuxPamfNotifyDemuxDone(ppu_thread& ppu, [[maybe_unused]] vm::ptr core_handle, error_code error, vm::ptr handle) +{ + handle->notify_demux_done.cbFunc(ppu, handle, error, handle->notify_demux_done.cbArg); + return CELL_OK; +} + +error_code dmuxPamfNotifyProgEndCode(ppu_thread& ppu, [[maybe_unused]] vm::ptr core_handle, vm::ptr handle) +{ + if (handle->notify_prog_end_code.cbFunc) + { + handle->notify_prog_end_code.cbFunc(ppu, handle, handle->notify_prog_end_code.cbArg); + } + + return CELL_OK; +} + +error_code dmuxPamfNotifyFatalErr(ppu_thread& ppu, [[maybe_unused]] vm::ptr core_handle, error_code error, vm::ptr handle) +{ + handle->notify_fatal_err.cbFunc(ppu, handle, error, handle->notify_fatal_err.cbArg); + return CELL_OK; +} + +error_code dmuxPamfEsNotifyAuFound(ppu_thread& ppu, [[maybe_unused]] vm::ptr core_handle, vm::cptr au_info, vm::ptr handle) +{ + const vm::var _au_info; + _au_info->info.auAddr = au_info->addr; + _au_info->info.auSize = au_info->size; + _au_info->info.isRap = au_info->is_rap; + _au_info->info.userData = au_info->user_data; + _au_info->info.pts = au_info->pts; + _au_info->info.dts = au_info->dts; + _au_info->specific_info = au_info->specific_info; + _au_info->specific_info_size = au_info->specific_info_size; + // _au_info->info.auMaxSize is left uninitialized + + return handle->notify_au_found.cbFunc(ppu, handle, _au_info, handle->notify_au_found.cbArg); +} + +error_code dmuxPamfEsNotifyFlushDone(ppu_thread& ppu, [[maybe_unused]] vm::ptr core_handle, vm::ptr handle) +{ + return handle->notify_flush_done.cbFunc(ppu, handle, handle->notify_flush_done.cbArg); +} + error_code _CellDmuxCoreOpQueryAttr(vm::cptr pamfSpecificInfo, vm::ptr pamfAttr) { - cellDmuxPamf.todo("_CellDmuxCoreOpQueryAttr(pamfSpecificInfo=*0x%x, pamfAttr=*0x%x)", pamfSpecificInfo, pamfAttr); + cellDmuxPamf.notice("_CellDmuxCoreOpQueryAttr(pamfSpecificInfo=*0x%x, pamfAttr=*0x%x)", pamfSpecificInfo, pamfAttr); + + if (!pamfAttr || (pamfSpecificInfo && pamfSpecificInfo->thisSize != sizeof(CellDmuxPamfSpecificInfo))) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + pamfAttr->maxEnabledEsNum = DMUX_PAMF_MAX_ENABLED_ES_NUM; + pamfAttr->version = DMUX_PAMF_VERSION; + pamfAttr->memSize = sizeof(CellDmuxPamfHandle) + sizeof(DmuxPamfContext) + 0xe7b; return CELL_OK; } -error_code _CellDmuxCoreOpOpen(vm::cptr pamfSpecificInfo, vm::cptr demuxerResource, vm::cptr demuxerResourceSpurs, vm::cptr> notifyDemuxDone, - vm::cptr> notifyProgEndCode, vm::cptr> notifyFatalErr, vm::pptr handle) +error_code DmuxPamfContext::open(ppu_thread& ppu, const CellDmuxPamfResource& res, vm::cptr res_spurs, const DmuxCb& notify_dmux_done, + const DmuxCb& notify_prog_end_code, const DmuxCb& notify_fatal_err, vm::bptr& handle) { - cellDmuxPamf.todo("_CellDmuxCoreOpOpen(pamfSpecificInfo=*0x%x, demuxerResource=*0x%x, demuxerResourceSpurs=*0x%x, notifyDemuxDone=*0x%x, notifyProgEndCode=*0x%x, notifyFatalErr=*0x%x, handle=**0x%x)", + if (res.ppuThreadPriority >= 0xc00u || res.ppuThreadStackSize < 0x1000u || res.spuThreadPriority >= 0x100u || res.numOfSpus != 1u || !res.memAddr || res.memSize < sizeof(DmuxPamfContext) + 0xe7b) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + const auto _this = vm::ptr::make(utils::align(+res.memAddr.addr(), 0x80)); + + _this->_this = _this; + _this->this_size = res.memSize; + _this->version = DMUX_PAMF_VERSION; + _this->notify_demux_done = notify_dmux_done; + _this->notify_prog_end_code = notify_prog_end_code; + _this->notify_fatal_err = notify_fatal_err; + _this->resource = res; + _this->unk = 0; + _this->ppu_thread_stack_size = res.ppuThreadStackSize; + _this->au_released_bitset = 0; + _this->stream_reset_requested = false; + _this->sequence_state = DmuxPamfSequenceState::dormant; + _this->max_enabled_es_num = DMUX_PAMF_MAX_ENABLED_ES_NUM; + _this->enabled_es_num = 0; + std::ranges::fill(_this->elementary_streams, vm::null); + _this->next_es_id = 0; + + const vm::var mutex_attr = {{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_dxpmtx"_u64 } }}; + const vm::var cond_attr = {{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_dxpcnd"_u64 } }}; + + if (lv2_syscall(ppu, _this.ptr(&DmuxPamfContext::mutex), mutex_attr) != CELL_OK + || lv2_syscall(ppu, _this.ptr(&DmuxPamfContext::cond), _this->mutex, cond_attr) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + _this->spurs_context_addr = _this.ptr(&DmuxPamfContext::spurs_context); + _this->cmd_queue_addr_ = _this.ptr(&DmuxPamfContext::cmd_queue); + _this->cmd_queue_buffer_addr_ = _this.ptr(&DmuxPamfContext::cmd_queue_buffer); + _this->cmd_queue_addr = _this.ptr(&DmuxPamfContext::cmd_queue); + _this->cmd_result_queue_addr = _this.ptr(&DmuxPamfContext::cmd_result_queue); + _this->stream_info_queue_addr = _this.ptr(&DmuxPamfContext::stream_info_queue); + _this->event_queue_addr = _this.ptr(&DmuxPamfContext::event_queue); + _this->cmd_queue_buffer_addr = _this.ptr(&DmuxPamfContext::cmd_queue_buffer); + _this->cmd_result_queue_buffer_addr = _this.ptr(&DmuxPamfContext::cmd_result_queue_buffer); + _this->event_queue_buffer_addr = _this.ptr(&DmuxPamfContext::event_queue_buffer); + _this->stream_info_queue_buffer_addr = _this.ptr(&DmuxPamfContext::stream_info_queue_buffer); + _this->cmd_queue_addr__ = _this.ptr(&DmuxPamfContext::cmd_queue); + + ensure(std::snprintf(_this->spurs_taskset_name, sizeof(_this->spurs_taskset_name), "_libdmux_pamf_%08x", _this.addr()) == 22); + + _this->use_existing_spurs = !!res_spurs; + + if (!res_spurs && g_fxo->get().take(0x40000) != 0x40000) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + _this->cmd_queue.init(_this->cmd_queue_buffer); + _this->cmd_result_queue.init(_this->cmd_result_queue_buffer); + _this->stream_info_queue.init(_this->stream_info_queue_buffer); + _this->event_queue.init(_this->event_queue_buffer); + + // HLE exclusive + _this->savestate = {}; + _this->au_queue_full_bitset = 0; + _this->stream_reset_started = false; + _this->stream_reset_in_progress = false; + + _this->run_spu_thread(); + + handle = _this; + return _this->create_thread(ppu); +} + +error_code _CellDmuxCoreOpOpen(ppu_thread& ppu, vm::cptr pamfSpecificInfo, vm::cptr demuxerResource, vm::cptr demuxerResourceSpurs, vm::cptr> notifyDemuxDone, + vm::cptr> notifyProgEndCode, vm::cptr> notifyFatalErr, vm::pptr handle) +{ + // Block savestates during ppu_execute + std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock.owns_lock()) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmuxPamf.notice("_CellDmuxCoreOpOpen(pamfSpecificInfo=*0x%x, demuxerResource=*0x%x, demuxerResourceSpurs=*0x%x, notifyDemuxDone=*0x%x, notifyProgEndCode=*0x%x, notifyFatalErr=*0x%x, handle=**0x%x)", pamfSpecificInfo, demuxerResource, demuxerResourceSpurs, notifyDemuxDone, notifyProgEndCode, notifyFatalErr, handle); - return CELL_OK; + if ((pamfSpecificInfo && pamfSpecificInfo->thisSize != sizeof(CellDmuxPamfSpecificInfo)) + || !demuxerResource + || (demuxerResourceSpurs && !demuxerResourceSpurs->spurs) + || !notifyDemuxDone || !notifyDemuxDone->cbFunc || !notifyDemuxDone->cbArg + || !notifyProgEndCode + || !notifyFatalErr || !notifyFatalErr->cbFunc || !notifyFatalErr->cbArg + || !handle) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + ensure(demuxerResource->memAddr.aligned(0x10)); // Not checked on LLE + ensure(demuxerResource->memSize >= sizeof(CellDmuxPamfHandle)); // Not checked on LLE + ensure(vm::check_addr(demuxerResource->memAddr.addr(), vm::page_readable | vm::page_writable, demuxerResource->memSize)); + + const auto _handle = vm::static_ptr_cast(demuxerResource->memAddr); + + _handle->notify_demux_done = *notifyDemuxDone; + _handle->notify_fatal_err = *notifyFatalErr; + _handle->notify_prog_end_code = *notifyProgEndCode; + + if (!pamfSpecificInfo || !pamfSpecificInfo->programEndCodeCb) + { + _handle->notify_prog_end_code.cbFunc = vm::null; + } + + const CellDmuxPamfResource res{ demuxerResource->ppuThreadPriority, demuxerResource->ppuThreadStackSize, demuxerResource->numOfSpus, demuxerResource->spuThreadPriority, + vm::bptr::make(demuxerResource->memAddr.addr() + sizeof(CellDmuxPamfHandle)), demuxerResource->memSize - sizeof(CellDmuxPamfHandle) }; + + const auto demux_done_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(dmuxPamfNotifyDemuxDone))); + const auto prog_end_code_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(dmuxPamfNotifyProgEndCode))); + const auto fatal_err_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(dmuxPamfNotifyFatalErr))); + + const error_code ret = DmuxPamfContext::open(ppu, res, demuxerResourceSpurs, { demux_done_func, _handle }, { prog_end_code_func, _handle }, { fatal_err_func, _handle }, _handle->demuxer); + + *handle = _handle; + + return ret; } -error_code _CellDmuxCoreOpClose(vm::ptr handle) +error_code DmuxPamfContext::close(ppu_thread& ppu) { - cellDmuxPamf.todo("_CellDmuxCoreOpClose(handle=*0x%x)", handle); + if (join_thread(ppu) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + ensure(idm::remove(hle_spu_thread_id)); + + if (!use_existing_spurs) + { + g_fxo->get().free(0x40000); + } + + if (lv2_syscall(ppu, cond) != CELL_OK + || lv2_syscall(ppu, mutex) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } return CELL_OK; } -error_code _CellDmuxCoreOpResetStream(vm::ptr handle) +error_code _CellDmuxCoreOpClose(ppu_thread& ppu, vm::ptr handle) { - cellDmuxPamf.todo("_CellDmuxCoreOpResetStream(handle=*0x%x)", handle); + // The PPU thread is going to use ppu_execute + std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock.owns_lock()) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmuxPamf.notice("_CellDmuxCoreOpClose(handle=*0x%x)", handle); + + if (!handle) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + return handle->demuxer->close(ppu); +} + +error_code DmuxPamfContext::reset_stream(ppu_thread& ppu) +{ + auto& ar = *ppu.optional_savestate_state; + const u8 savestate = ar.try_read().second; + ar.clear(); + + switch (savestate) + { + case 0: + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(0); + return {}; + } + + if (sequence_state != DmuxPamfSequenceState::running) + { + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + } + + [[fallthrough]]; + + case 1: + send_spu_command_and_wait(ppu, savestate); + + if (ppu.state & cpu_flag::again) + { + ar(1); + return {}; + } + + stream_reset_requested = true; + [[fallthrough]]; + + case 2: + if (const error_code ret = lv2_syscall(ppu, cond, static_cast(thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) + { + lv2_syscall(ppu, mutex); + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(2); + return {}; + } + + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + + default: + fmt::throw_exception("Unexpected savestate value: 0x%x", savestate); + } +} + +error_code _CellDmuxCoreOpResetStream(ppu_thread& ppu, vm::ptr handle) +{ + cellDmuxPamf.notice("_CellDmuxCoreOpResetStream(handle=*0x%x)", handle); + + if (!handle) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + return handle->demuxer->reset_stream(ppu); +} + +error_code DmuxPamfContext::create_thread(ppu_thread& ppu) +{ + const vm::var name = vm::make_str("HLE PAMF demuxer"); + const auto entry = g_fxo->get().func_addr(FIND_FUNC(dmuxPamfEntry)); + + if (ppu_execute<&sys_ppu_thread_create>(ppu, _this.ptr(&DmuxPamfContext::thread_id), entry, +_this.addr(), +resource.ppuThreadPriority, +resource.ppuThreadStackSize, SYS_PPU_THREAD_CREATE_JOINABLE, +name) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } return CELL_OK; } -error_code _CellDmuxCoreOpCreateThread(vm::ptr handle) +error_code _CellDmuxCoreOpCreateThread(ppu_thread& ppu, vm::ptr handle) { - cellDmuxPamf.todo("_CellDmuxCoreOpCreateThread(handle=*0x%x)", handle); + // Block savestates during ppu_execute + std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - return CELL_OK; + if (!savestate_lock.owns_lock()) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmuxPamf.notice("_CellDmuxCoreOpCreateThread(handle=*0x%x)", handle); + + if (!handle) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + return handle->demuxer->create_thread(ppu); } -error_code _CellDmuxCoreOpJoinThread(vm::ptr handle) +error_code DmuxPamfContext::join_thread(ppu_thread& ppu) { - cellDmuxPamf.todo("_CellDmuxCoreOpJoinThread(handle=*0x%x)", handle); + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + std::ranges::fill_n(elementary_streams, enabled_es_num, vm::null); + + enabled_es_num = -1; + + send_spu_command_and_wait(ppu, false); + + if (lv2_syscall(ppu, mutex) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + return lv2_syscall(ppu, static_cast(thread_id), +vm::var{}) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; +} + +error_code _CellDmuxCoreOpJoinThread(ppu_thread& ppu, vm::ptr handle) +{ + // The PPU thread is going to use ppu_execute + std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock.owns_lock()) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmuxPamf.notice("_CellDmuxCoreOpJoinThread(handle=*0x%x)", handle); + + if (!handle) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + return handle->demuxer->join_thread(ppu); +} + +template +error_code DmuxPamfContext::set_stream(ppu_thread& ppu, vm::cptr stream_address, u32 stream_size, b8 discontinuity, u32 user_data) +{ + auto& ar = *ppu.optional_savestate_state; + const bool waiting_for_spu_state = ar.try_read().second; + ar.clear(); + + if (!waiting_for_spu_state) + { + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(false); + return {}; + } + + this->user_data = user_data; + + if (!stream_info_queue.emplace(stream_address, stream_size, user_data, !discontinuity, raw_es)) + { + return lv2_syscall(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_BUSY : CELL_DMUX_PAMF_ERROR_FATAL; + } + } + + send_spu_command_and_wait(ppu, waiting_for_spu_state); + + if (ppu.state & cpu_flag::again) + { + ar(true); + return {}; + } + + sequence_state = DmuxPamfSequenceState::running; + + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; +} + +template +error_code _CellDmuxCoreOpSetStream(ppu_thread& ppu, vm::ptr handle, vm::cptr streamAddress, u32 streamSize, b8 discontinuity, u64 userData) +{ + cellDmuxPamf.trace("_CellDmuxCoreOpSetStream(handle=*0x%x, streamAddress=*0x%x, streamSize=0x%x, discontinuity=%d, userData=0x%llx)", raw_es, handle, streamAddress, streamSize, +discontinuity, userData); + + if (!streamAddress || streamSize == 0) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + ensure(!!handle); // Not checked on LLE + + return handle->demuxer->set_stream(ppu, streamAddress, streamSize, discontinuity, static_cast(userData)); +} + +error_code DmuxPamfElementaryStream::release_au(ppu_thread& ppu, vm::ptr au_addr, u32 au_size) const +{ + auto& ar = *ppu.optional_savestate_state; + const u8 savestate = ar.try_read().second; + ar.clear(); + + switch (savestate) + { + case 0: + if (lv2_syscall(ppu, demuxer->mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(0); + return {}; + } + + [[fallthrough]]; + + case 1: + demuxer->send_spu_command_and_wait(ppu, savestate, au_addr, au_size, static_cast>(stream_id), static_cast>(private_stream_id)); + + if (ppu.state & cpu_flag::again) + { + ar(1); + return {}; + } + + demuxer->au_released_bitset |= 1ull << this_index; + [[fallthrough]]; + + case 2: + if (const error_code ret = lv2_syscall(ppu, demuxer->cond, static_cast(demuxer->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) + { + lv2_syscall(ppu, demuxer->mutex); + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(2); + return {}; + } + + return lv2_syscall(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + + default: + fmt::throw_exception("Unexpected savestate value: 0x%x", savestate); + } +} + +error_code _CellDmuxCoreOpReleaseAu(ppu_thread& ppu, vm::ptr esHandle, vm::ptr auAddr, u32 auSize) +{ + cellDmuxPamf.trace("_CellDmuxCoreOpReleaseAu(esHandle=*0x%x, auAddr=*0x%x, auSize=0x%x)", esHandle, auAddr, auSize); + + if (!auAddr || auSize == 0) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + ensure(!!esHandle); // Not checked on LLE + + return esHandle->es->release_au(ppu, auAddr, auSize); +} + +template +error_code dmuxPamfGetEsAttr(u16 stream_id, u16 private_stream_id, bool is_avc, vm::cptr es_specific_info, CellDmuxPamfEsAttr& attr) +{ + if (dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id).first == DMUX_PAMF_STREAM_TYPE_INDEX_INVALID) + { + return CELL_DMUX_PAMF_ERROR_UNKNOWN_STREAM; + } + + if (dmuxPamfVerifyEsSpecificInfo(stream_id, private_stream_id, is_avc, es_specific_info) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + attr.auQueueMaxSize = dmuxPamfGetAuQueueMaxSize(stream_id, private_stream_id); + attr.memSize = dmuxPamfGetEsMemSize(stream_id, private_stream_id, is_avc, es_specific_info); + attr.specificInfoSize = dmuxPamfGetAuSpecificInfoSize(stream_id, private_stream_id, is_avc); return CELL_OK; } template -error_code _CellDmuxCoreOpSetStream(vm::ptr handle, vm::cptr streamAddress, u32 streamSize, b8 discontinuity, u64 userData) +static inline std::tuple get_stream_ids(vm::cptr esFilterId) { - cellDmuxPamf.todo("_CellDmuxCoreOpSetStream(handle=*0x%x, streamAddress=*0x%x, streamSize=0x%x, discontinuity=%d, userData=0x%llx)", raw_es, handle, streamAddress, streamSize, +discontinuity, userData); + if constexpr (raw_es) + { + const auto filter_id = vm::static_ptr_cast(esFilterId); + return { filter_id[2], filter_id[3], filter_id[8] >> 7 }; + } - return CELL_OK; -} - -error_code _CellDmuxCoreOpFreeMemory(vm::ptr esHandle, vm::ptr memAddr, u32 memSize) -{ - cellDmuxPamf.todo("_CellDmuxCoreOpFreeMemory(esHandle=*0x%x, memAddr=*0x%x, memSize=0x%x)", esHandle, memAddr, memSize); - - return CELL_OK; + const auto filter_id = vm::static_ptr_cast(esFilterId); + return { filter_id->filterIdMajor, filter_id->filterIdMinor, filter_id->supplementalInfo1 }; } template error_code _CellDmuxCoreOpQueryEsAttr(vm::cptr esFilterId, vm::cptr esSpecificInfo, vm::ptr attr) { - cellDmuxPamf.todo("_CellDmuxCoreOpQueryEsAttr(esFilterId=*0x%x, esSpecificInfo=*0x%x, attr=*0x%x)", raw_es, esFilterId, esSpecificInfo, attr); + cellDmuxPamf.notice("_CellDmuxCoreOpQueryEsAttr(esFilterId=*0x%x, esSpecificInfo=*0x%x, attr=*0x%x)", raw_es, esFilterId, esSpecificInfo, attr); + if (!esFilterId || !attr) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + const auto [stream_id, private_stream_id, is_avc] = get_stream_ids(esFilterId); + + CellDmuxPamfEsAttr es_attr; + + const error_code ret = dmuxPamfGetEsAttr(stream_id, private_stream_id, is_avc, esSpecificInfo, es_attr); + + *attr = es_attr; + attr->memSize += static_cast(sizeof(CellDmuxPamfEsHandle)); + + return ret; +} + +template +error_code DmuxPamfContext::enable_es(ppu_thread& ppu, u16 stream_id, u16 private_stream_id, bool is_avc, vm::cptr es_specific_info, vm::ptr mem_addr, u32 mem_size, const DmuxCb& notify_au_found, + const DmuxCb& notify_flush_done, vm::bptr& es) +{ + auto& ar = *ppu.optional_savestate_state; + const bool waiting_for_spu_state = ar.try_read().second; + ar.clear(); + + if (mem_size < dmuxPamfGetEsMemSize(stream_id, private_stream_id, is_avc, es_specific_info)) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + const auto stream_type = dmuxPamfStreamIdToTypeChannel(stream_id, private_stream_id).first; + + if (!waiting_for_spu_state) + { + if (stream_type == DMUX_PAMF_STREAM_TYPE_INDEX_INVALID) + { + return CELL_DMUX_PAMF_ERROR_UNKNOWN_STREAM; + } + + if (dmuxPamfVerifyEsSpecificInfo(stream_id, private_stream_id, is_avc, es_specific_info) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, mutex, 0); ret != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(false); + return {}; + } + + this->is_raw_es = raw_es; + + if (enabled_es_num == max_enabled_es_num) + { + return lv2_syscall(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_NO_MEMORY : CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (find_es(stream_id, private_stream_id)) + { + // Elementary stream is already enabled + return lv2_syscall(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; + } + } + + const be_t au_max_size = [&]() -> be_t + { + switch (stream_type) + { + case DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO: + if (is_avc) + { + if (!es_specific_info || vm::static_ptr_cast(es_specific_info)->level == CELL_DMUX_PAMF_AVC_LEVEL_4P2) + { + return 0xcc000u; + } + + switch (vm::static_ptr_cast(es_specific_info)->level) + { + case CELL_DMUX_PAMF_AVC_LEVEL_2P1: return 0x12900u; + case CELL_DMUX_PAMF_AVC_LEVEL_3P0: return 0x25f80u; + case CELL_DMUX_PAMF_AVC_LEVEL_3P1: return 0x54600u; + case CELL_DMUX_PAMF_AVC_LEVEL_3P2: return 0x78000u; + case CELL_DMUX_PAMF_AVC_LEVEL_4P1: return 0xc0000u; + default: fmt::throw_exception("Unreachable"); // es_specific_info was already checked for invalid values in dmuxPamfVerifyEsSpecificInfo() + } + } + + if (!es_specific_info || vm::static_ptr_cast(es_specific_info)->profileLevel > CELL_DMUX_PAMF_M2V_MP_ML) + { + return 0x12a800u; + } + + return 0x38000u; + + case DMUX_PAMF_STREAM_TYPE_INDEX_LPCM: return dmuxPamfGetLpcmAuSize(vm::static_ptr_cast(es_specific_info)); + case DMUX_PAMF_STREAM_TYPE_INDEX_AC3: return 0xf00u; + case DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX: return 0x1008u; + case DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA: return 0xa0000u; + default: fmt::throw_exception("Unreachable"); // stream_type was already checked + } + }(); + + const auto _es = vm::bptr::make(utils::align(mem_addr.addr(), 0x10)); + + const auto au_queue_buffer = vm::bptr::make(utils::align(_es.addr() + static_cast(sizeof(DmuxPamfElementaryStream)), 0x80)); + const be_t au_specific_info_size = dmuxPamfGetAuSpecificInfoSize(stream_id, private_stream_id, is_avc); + + send_spu_command_and_wait(ppu, waiting_for_spu_state, stream_id, private_stream_id, is_avc, au_queue_buffer, + dmuxPamfGetAuQueueBufferSize(stream_id, private_stream_id, is_avc, es_specific_info), au_max_size, au_specific_info_size, raw_es, next_es_id); + + if (ppu.state & cpu_flag::again) + { + ar(true); + return {}; + } + + u32 es_idx = umax; + while (elementary_streams[++es_idx]){} // There is guaranteed to be an empty slot, this was already checked above + + _es->_this = _es; + _es->this_size = mem_size; + _es->this_index = es_idx; + _es->demuxer = _this; + _es->notify_au_found = notify_au_found; + _es->notify_flush_done = notify_flush_done; + _es->stream_id = stream_id; + _es->private_stream_id = private_stream_id; + _es->is_avc = is_avc; + _es->au_queue_buffer = au_queue_buffer; + _es->au_max_size = au_max_size; + _es->au_specific_info_size = au_specific_info_size; + _es->reset_next_au = false; + _es->es_id = next_es_id++; + + elementary_streams[es_idx] = _es; + + enabled_es_num++; + + if (lv2_syscall(ppu, mutex) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + es = _es; return CELL_OK; } template -error_code _CellDmuxCoreOpEnableEs(vm::ptr handle, vm::cptr esFilterId, vm::cptr esResource, vm::cptr> notifyAuFound, - vm::cptr> notifyFlushDone, vm::cptr esSpecificInfo, vm::pptr esHandle) +error_code _CellDmuxCoreOpEnableEs(ppu_thread& ppu, vm::ptr handle, vm::cptr esFilterId, vm::cptr esResource, vm::cptr> notifyAuFound, + vm::cptr> notifyFlushDone, vm::cptr esSpecificInfo, vm::pptr esHandle) { - cellDmuxPamf.todo("_CellDmuxCoreOpEnableEs(handle=*0x%x, esFilterId=*0x%x, esResource=*0x%x, notifyAuFound=*0x%x, notifyFlushDone=*0x%x, esSpecificInfo=*0x%x, esHandle)", + cellDmuxPamf.notice("_CellDmuxCoreOpEnableEs(handle=*0x%x, esFilterId=*0x%x, esResource=*0x%x, notifyAuFound=*0x%x, notifyFlushDone=*0x%x, esSpecificInfo=*0x%x, esHandle=**0x%x)", raw_es, handle, esFilterId, esResource, notifyAuFound, notifyFlushDone, esSpecificInfo, esHandle); - return CELL_OK; + if (!handle || !esFilterId || !esResource || !esResource->memAddr || esResource->memSize == 0u || !notifyAuFound || !notifyAuFound->cbFunc || !notifyAuFound->cbArg || !notifyFlushDone || !notifyFlushDone->cbFunc || !notifyFlushDone->cbArg) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + ensure(!!esHandle && esResource->memAddr.aligned(0x10)); // Not checked on LLE + ensure(esResource->memSize >= sizeof(CellDmuxPamfEsHandle)); // Not checked on LLE + ensure(vm::check_addr(esResource->memAddr.addr(), vm::page_readable | vm::page_writable, esResource->memSize)); + + const auto es_handle = vm::static_ptr_cast(esResource->memAddr); + + es_handle->notify_au_found = *notifyAuFound; + es_handle->notify_flush_done = *notifyFlushDone; + + const auto au_found_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(dmuxPamfEsNotifyAuFound))); + const auto flush_done_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(dmuxPamfEsNotifyFlushDone))); + + const auto [stream_id, private_stream_id, is_avc] = get_stream_ids(esFilterId); + + const error_code ret = handle->demuxer->enable_es(ppu, stream_id, private_stream_id, is_avc, esSpecificInfo, vm::ptr::make(esResource->memAddr.addr() + sizeof(CellDmuxPamfEsHandle)), + esResource->memSize - sizeof(CellDmuxPamfEsHandle), { au_found_func, es_handle }, { flush_done_func, es_handle }, es_handle->es); + + *esHandle = es_handle; + + return ret; } -error_code _CellDmuxCoreOpDisableEs(vm::ptr esHandle) +error_code DmuxPamfElementaryStream::disable_es(ppu_thread& ppu) { - cellDmuxPamf.todo("_CellDmuxCoreOpDisableEs(esHandle=*0x%x)", esHandle); + const auto dmux = demuxer.get_ptr(); - return CELL_OK; + auto& ar = *ppu.optional_savestate_state; + const u8 savestate = ar.try_read().second; + ar.clear(); + + switch (savestate) + { + case 0: + if (lv2_syscall(ppu, dmux->mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(0); + return {}; + } + + if (!dmux->find_es(stream_id, private_stream_id)) + { + // Elementary stream is already disabled + return lv2_syscall(ppu, dmux->mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; + } + + [[fallthrough]]; + + case 1: + dmux->send_spu_command_and_wait(ppu, savestate, static_cast>(stream_id), static_cast>(private_stream_id)); + + if (ppu.state & cpu_flag::again) + { + ar(1); + return {}; + } + + _this = vm::null; + this_size = 0; + demuxer = vm::null; + notify_au_found = {}; + au_queue_buffer = vm::null; + unk = 0; + au_max_size = 0; + + dmux->elementary_streams[this_index] = vm::null; + dmux->enabled_es_num--; + + dmux->au_released_bitset |= 1ull << this_index; + + this_index = 0; + [[fallthrough]]; + + case 2: + if (const error_code ret = lv2_syscall(ppu, dmux->cond, static_cast(dmux->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) + { + lv2_syscall(ppu, dmux->mutex); + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(2); + return {}; + } + + return lv2_syscall(ppu, dmux->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + + default: + fmt::throw_exception("Unexpected savestate value: 0x%x", savestate); + } } -error_code _CellDmuxCoreOpFlushEs(vm::ptr esHandle) +error_code _CellDmuxCoreOpDisableEs(ppu_thread& ppu, vm::ptr esHandle) { - cellDmuxPamf.todo("_CellDmuxCoreOpFlushEs(esHandle=*0x%x)", esHandle); + cellDmuxPamf.notice("_CellDmuxCoreOpDisableEs(esHandle=*0x%x)", esHandle); - return CELL_OK; + if (!esHandle) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + return esHandle->es->disable_es(ppu); } -error_code _CellDmuxCoreOpResetEs(vm::ptr esHandle) +error_code DmuxPamfElementaryStream::flush_es(ppu_thread& ppu) const { - cellDmuxPamf.todo("_CellDmuxCoreOpResetEs(esHandle=*0x%x)", esHandle); + auto& ar = *ppu.optional_savestate_state; + const bool waiting_for_spu_state = ar.try_read().second; + ar.clear(); - return CELL_OK; + if (!waiting_for_spu_state) + { + if (lv2_syscall(ppu, demuxer->mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(false); + return {}; + } + } + + demuxer->send_spu_command_and_wait(ppu, waiting_for_spu_state, static_cast>(stream_id), static_cast>(private_stream_id)); + + if (ppu.state & cpu_flag::again) + { + ar(true); + return {}; + } + + return lv2_syscall(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } -error_code _CellDmuxCoreOpResetStreamAndWaitDone(vm::ptr handle) +error_code _CellDmuxCoreOpFlushEs(ppu_thread& ppu, vm::ptr esHandle) { - cellDmuxPamf.todo("_CellDmuxCoreOpResetStreamAndWaitDone(handle=*0x%x)", handle); + cellDmuxPamf.notice("_CellDmuxCoreOpFlushEs(esHandle=*0x%x)", esHandle); - return CELL_OK; + if (!esHandle) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + return esHandle->es->flush_es(ppu); } +error_code DmuxPamfElementaryStream::reset_es(ppu_thread& ppu) const +{ + auto& ar = *ppu.optional_savestate_state; + const bool waiting_for_spu_state = ar.try_read().second; + ar.clear(); + + if (!waiting_for_spu_state) + { + if (lv2_syscall(ppu, demuxer->mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + ar(false); + return {}; + } + } + + demuxer->send_spu_command_and_wait(ppu, waiting_for_spu_state, static_cast>(stream_id), static_cast>(private_stream_id), vm::null); + + if (ppu.state & cpu_flag::again) + { + ar(true); + return {}; + } + + return lv2_syscall(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; +} + +error_code _CellDmuxCoreOpResetEs(ppu_thread& ppu, vm::ptr esHandle) +{ + cellDmuxPamf.notice("_CellDmuxCoreOpResetEs(esHandle=*0x%x)", esHandle); + + if (!esHandle) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + return esHandle->es->reset_es(ppu); +} + +error_code DmuxPamfContext::reset_stream_and_wait_done(ppu_thread& ppu) +{ + // Both sys_cond_wait() and DmuxPamfContext::reset_stream() are already using ppu_thread::optional_savestate_state, so we can't save this function currently + std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock.owns_lock()) + { + ppu.state += cpu_flag::again; + return {}; + } + + if (reset_stream(ppu) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + return {}; + } + + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) + { + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + return {}; + } + + while (sequence_state != DmuxPamfSequenceState::dormant) + { + if (lv2_syscall(ppu, cond, 0) != CELL_OK) + { + lv2_syscall(ppu, mutex); + return CELL_DMUX_PAMF_ERROR_FATAL; + } + + if (ppu.state & cpu_flag::again) + { + return {}; + } + } + + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; +} + +error_code _CellDmuxCoreOpResetStreamAndWaitDone(ppu_thread& ppu, vm::ptr handle) +{ + cellDmuxPamf.notice("_CellDmuxCoreOpResetStreamAndWaitDone(handle=*0x%x)", handle); + + if (!handle) + { + return CELL_DMUX_PAMF_ERROR_ARG; + } + + return handle->demuxer->reset_stream_and_wait_done(ppu); +} + +template static void init_gvar(const vm::gvar& var) { var->queryAttr.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpQueryAttr))); @@ -124,7 +2863,10 @@ static void init_gvar(const vm::gvar& var) var->resetStream.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpResetStream))); var->createThread.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpCreateThread))); var->joinThread.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpJoinThread))); - var->freeMemory.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpFreeMemory))); + var->setStream.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpSetStream))); + var->releaseAu.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpReleaseAu))); + var->queryEsAttr.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpQueryEsAttr))); + var->enableEs.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpEnableEs))); var->disableEs.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpDisableEs))); var->flushEs.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpFlushEs))); var->resetEs.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpResetEs))); @@ -133,21 +2875,8 @@ static void init_gvar(const vm::gvar& var) DECLARE(ppu_module_manager::cellDmuxPamf)("cellDmuxPamf", [] { - REG_VNID(cellDmuxPamf, 0x28b2b7b2, g_cell_dmux_core_ops_pamf).init = [] - { - g_cell_dmux_core_ops_pamf->setStream.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpSetStream))); - g_cell_dmux_core_ops_pamf->queryEsAttr.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpQueryEsAttr))); - g_cell_dmux_core_ops_pamf->enableEs.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpEnableEs))); - init_gvar(g_cell_dmux_core_ops_pamf); - }; - - REG_VNID(cellDmuxPamf, 0x9728a0e9, g_cell_dmux_core_ops_raw_es).init = [] - { - g_cell_dmux_core_ops_raw_es->setStream.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpSetStream))); - g_cell_dmux_core_ops_raw_es->queryEsAttr.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpQueryEsAttr))); - g_cell_dmux_core_ops_raw_es->enableEs.set(g_fxo->get().func_addr(FIND_FUNC(_CellDmuxCoreOpEnableEs))); - init_gvar(g_cell_dmux_core_ops_raw_es); - }; + REG_VNID(cellDmuxPamf, 0x28b2b7b2, g_cell_dmux_core_ops_pamf).init = []{ init_gvar(g_cell_dmux_core_ops_pamf); }; + REG_VNID(cellDmuxPamf, 0x9728a0e9, g_cell_dmux_core_ops_raw_es).init = []{ init_gvar(g_cell_dmux_core_ops_raw_es); }; REG_HIDDEN_FUNC(_CellDmuxCoreOpQueryAttr); REG_HIDDEN_FUNC(_CellDmuxCoreOpOpen); @@ -157,7 +2886,7 @@ DECLARE(ppu_module_manager::cellDmuxPamf)("cellDmuxPamf", [] REG_HIDDEN_FUNC(_CellDmuxCoreOpJoinThread); REG_HIDDEN_FUNC(_CellDmuxCoreOpSetStream); REG_HIDDEN_FUNC(_CellDmuxCoreOpSetStream); - REG_HIDDEN_FUNC(_CellDmuxCoreOpFreeMemory); + REG_HIDDEN_FUNC(_CellDmuxCoreOpReleaseAu); REG_HIDDEN_FUNC(_CellDmuxCoreOpQueryEsAttr); REG_HIDDEN_FUNC(_CellDmuxCoreOpQueryEsAttr); REG_HIDDEN_FUNC(_CellDmuxCoreOpEnableEs); @@ -166,4 +2895,12 @@ DECLARE(ppu_module_manager::cellDmuxPamf)("cellDmuxPamf", [] REG_HIDDEN_FUNC(_CellDmuxCoreOpFlushEs); REG_HIDDEN_FUNC(_CellDmuxCoreOpResetEs); REG_HIDDEN_FUNC(_CellDmuxCoreOpResetStreamAndWaitDone); + + REG_HIDDEN_FUNC(dmuxPamfNotifyDemuxDone); + REG_HIDDEN_FUNC(dmuxPamfNotifyProgEndCode); + REG_HIDDEN_FUNC(dmuxPamfNotifyFatalErr); + REG_HIDDEN_FUNC(dmuxPamfEsNotifyAuFound); + REG_HIDDEN_FUNC(dmuxPamfEsNotifyFlushDone); + + REG_HIDDEN_FUNC(dmuxPamfEntry); }); diff --git a/rpcs3/Emu/Cell/Modules/cellDmuxPamf.h b/rpcs3/Emu/Cell/Modules/cellDmuxPamf.h index 01983b724a..5b42103ca7 100644 --- a/rpcs3/Emu/Cell/Modules/cellDmuxPamf.h +++ b/rpcs3/Emu/Cell/Modules/cellDmuxPamf.h @@ -1,15 +1,1039 @@ #pragma once -struct CellDmuxPamfAttr +#include "Emu/savestate_utils.hpp" +#include "Utilities/Thread.h" +#include "cellPamf.h" +#include "cellDmux.h" +#include + +// Replacement for CellSpursQueue +template requires(std::is_trivial_v && max_num_of_entries > 0) +class alignas(0x80) dmux_pamf_hle_spurs_queue { - be_t maxEnabledEsNum; - be_t version; + T* buffer; + + alignas(atomic_t) std::array)> _size; // Stored in a byte array since the PPU context needs to be trivial + u32 front; + u32 back; + + template + bool _pop(T* lhs) + { + atomic_t& _size = *std::launder(reinterpret_cast*>(this->_size.data())); + + if (_size == 0) + { + return false; + } + + if (lhs) + { + *lhs = buffer[front]; + } + + if constexpr (!is_peek) + { + front = (front + 1) % max_num_of_entries; + _size--; + _size.notify_one(); + } + + return true; + } + +public: + void init(T (&buffer)[max_num_of_entries]) + { + this->buffer = buffer; + new (_size.data()) atomic_t(0); + front = 0; + back = 0; + } + + bool pop(T& lhs) { return _pop(&lhs); } + bool pop() { return _pop(nullptr); } + bool peek(T& lhs) const { return const_cast(this)->_pop(&lhs); } + bool emplace(auto&&... args) + { + atomic_t& _size = *std::launder(reinterpret_cast*>(this->_size.data())); + + if (_size >= max_num_of_entries) + { + return false; + } + + new (&buffer[back]) T(std::forward(args)...); + + back = (back + 1) % max_num_of_entries; + _size++; + _size.notify_one(); + + return true; + } + + [[nodiscard]] u32 size() const { return std::launder(reinterpret_cast*>(this->_size.data()))->observe(); } + + void wait() const + { + const atomic_t& _size = *std::launder(reinterpret_cast*>(this->_size.data())); + + while (_size == 0 && thread_ctrl::state() != thread_state::aborting) + { + thread_ctrl::wait_on(_size, 0); + } + } +}; + +enum class DmuxPamfCommandType : u32 +{ + enable_es = 0, + disable_es = 2, + set_stream = 4, + release_au = 6, + flush_es = 8, + close = 10, + reset_stream = 12, + reset_es = 14, + resume = 16, +}; + +struct alignas(0x80) DmuxPamfCommand +{ + be_t type; + + union + { + struct + { + be_t stream_id; + be_t private_stream_id; + be_t is_avc; + vm::bptr au_queue_buffer; + be_t au_queue_buffer_size; + be_t au_max_size; + be_t au_specific_info_size; + be_t is_raw_es; + be_t user_data; + } + enable_es; + + struct + { + be_t stream_id; + be_t private_stream_id; + } + disable_flush_es; + + struct + { + vm::ptr> au_addr; + be_t au_size; + be_t stream_id; + be_t private_stream_id; + } + release_au; + + struct + { + be_t stream_id; + be_t private_stream_id; + vm::ptr> au_addr; + } + reset_es; + }; + + DmuxPamfCommand() = default; + + DmuxPamfCommand(be_t&& type) + : type(type) + { + } + + DmuxPamfCommand(be_t&& type, const be_t& stream_id, const be_t& private_stream_id) + : type(type), disable_flush_es{ stream_id, private_stream_id } + { + } + + DmuxPamfCommand(be_t&& type, const be_t& stream_id, const be_t& private_stream_id, const vm::ptr>& au_addr) + : type(type), reset_es{ stream_id, private_stream_id, au_addr } + { + } + + DmuxPamfCommand(be_t&& type, const vm::ptr>& au_addr, const be_t& au_size, const be_t& stream_id, const be_t& private_stream_id) + : type(type), release_au{ au_addr, au_size, stream_id, private_stream_id } + { + } + + DmuxPamfCommand(be_t&& type, const be_t& stream_id, const be_t& private_stream_id, const be_t& is_avc, const vm::bptr& au_queue_buffer, + const be_t& au_queue_buffer_size, const be_t& au_max_size, const be_t& au_specific_info_size, const be_t& is_raw_es, const be_t& user_data) + : type(type), enable_es{ stream_id, private_stream_id, is_avc, au_queue_buffer, au_queue_buffer_size, au_max_size, au_specific_info_size, is_raw_es, user_data } + { + } +}; + +CHECK_SIZE_ALIGN(DmuxPamfCommand, 0x80, 0x80); + +enum class DmuxPamfEventType : u32 +{ + au_found, + demux_done, + fatal_error, + close, + flush_done, + prog_end_code, +}; + +struct alignas(0x80) DmuxPamfEvent +{ + be_t type; + + union + { + u8 pad[0x78]; + + struct + { + be_t stream_id; + be_t private_stream_id; + vm::ptr> au_addr; + CellCodecTimeStamp pts; + CellCodecTimeStamp dts; + be_t unk; + u8 reserved[4]; + be_t au_size; + be_t stream_header_size; + std::array stream_header_buf; + be_t user_data; + be_t is_rap; + } + au_found; + + struct + { + be_t stream_id; + be_t private_stream_id; + be_t user_data; + } + flush_done; + }; + + be_t event_queue_was_too_full; + + DmuxPamfEvent() = default; + + DmuxPamfEvent(be_t&& type, const be_t& event_queue_was_too_full) + : type(type), event_queue_was_too_full(event_queue_was_too_full) + { + } + + DmuxPamfEvent(be_t&& type, const be_t& stream_id, const be_t& private_stream_id, const be_t& user_data, const be_t& event_queue_was_too_full) + : type(type), flush_done{ stream_id, private_stream_id, user_data }, event_queue_was_too_full(event_queue_was_too_full) + { + } + + DmuxPamfEvent(be_t&& type, const be_t& stream_id, const be_t& private_stream_id, const vm::ptr>& au_addr, const CellCodecTimeStamp& pts, const CellCodecTimeStamp& dts, const be_t& unk, + const be_t& au_size, const be_t& au_specific_info_size, const std::array& au_specific_info, const be_t& user_data, const be_t& is_rap, const be_t& event_queue_was_too_full) + : type(type) + , au_found{ stream_id, private_stream_id, au_addr, pts, dts, static_cast>(unk), {}, au_size, au_specific_info_size, au_specific_info, user_data, is_rap } + , event_queue_was_too_full(event_queue_was_too_full) + { + } +}; + +CHECK_SIZE_ALIGN(DmuxPamfEvent, 0x80, 0x80); + +struct alignas(0x80) DmuxPamfStreamInfo +{ + vm::bcptr stream_addr; + be_t stream_size; + be_t user_data; + be_t continuity; + be_t is_raw_es; +}; + +CHECK_SIZE_ALIGN(DmuxPamfStreamInfo, 0x80, 0x80); + +enum DmuxPamfStreamTypeIndex +{ + DMUX_PAMF_STREAM_TYPE_INDEX_INVALID = -1, + DMUX_PAMF_STREAM_TYPE_INDEX_VIDEO, + DMUX_PAMF_STREAM_TYPE_INDEX_LPCM, + DMUX_PAMF_STREAM_TYPE_INDEX_AC3, + DMUX_PAMF_STREAM_TYPE_INDEX_ATRACX, + DMUX_PAMF_STREAM_TYPE_INDEX_USER_DATA, +}; + + +// SPU thread + +class dmux_pamf_base +{ + // Event handlers for the demuxer. These correspond to the events that the SPU thread sends to the PPU thread on LLE (except for au_queue_full(): the SPU thread just sets a global bool, + // but it is never notified to the PPU thread or the user). + + virtual bool on_au_found(u8 stream_id, u8 private_stream_id, u32 user_data, std::span au, u64 pts, u64 dts, bool rap, u8 au_specific_info_size, std::array au_specific_info_buf) = 0; + virtual bool on_demux_done() = 0; + virtual void on_fatal_error() = 0; + virtual bool on_flush_done(u8 stream_id, u8 private_stream_id, u32 user_data) = 0; + virtual bool on_prog_end() = 0; + virtual void on_au_queue_full() = 0; + +public: + virtual ~dmux_pamf_base() = default; + + bool enable_es(u32 stream_id, u32 private_stream_id, bool is_avc, std::span au_queue_buffer, u32 au_max_size, bool raw_es, u32 user_data); + bool disable_es(u32 stream_id, u32 private_stream_id); + bool release_au(u32 stream_id, u32 private_stream_id, u32 au_size) const; + bool flush_es(u32 stream_id, u32 private_stream_id); + void set_stream(std::span stream, bool continuity); + void reset_stream(); + bool reset_es(u32 stream_id, u32 private_stream_id, u8* au_addr); + bool process_next_pack(); + +protected: + void save_base(utils::serial& ar); + [[nodiscard]] bool has_work() const { return !!stream || !demux_done_notified; } + [[nodiscard]] u32 get_enabled_es_count() const; + +private: + static constexpr u16 PACK_SIZE = 0x800; + static constexpr s8 PACK_STUFFING_LENGTH_OFFSET = 0xd; + static constexpr s8 PES_PACKET_LENGTH_OFFSET = 0x4; + static constexpr s8 PES_HEADER_DATA_LENGTH_OFFSET = 0x8; + static constexpr s8 PTS_DTS_FLAG_OFFSET = 0x7; + static constexpr u8 PACKET_START_CODE_PREFIX = 1; + + static constexpr be_t M2V_PIC_START = 0x100; + static constexpr be_t AVC_AU_DELIMITER = 0x109; + static constexpr be_t M2V_SEQUENCE_HEADER = 0x1b3; + static constexpr be_t M2V_SEQUENCE_END = 0x1b7; + static constexpr be_t PACK_START = 0x1ba; + static constexpr be_t SYSTEM_HEADER = 0x1bb; + static constexpr be_t PRIVATE_STREAM_1 = 0x1bd; + static constexpr be_t PRIVATE_STREAM_2 = 0x1bf; + static constexpr be_t PROG_END = 0x1b9; + static constexpr be_t VIDEO_STREAM_BASE = 0x1e0; // The lower 4 bits indicate the channel + + // Partial access unit that will be written to the output queue + struct access_unit_chunk + { + std::vector cached_data; // Up to three bytes of data from the previous PES packet (copied into this vector, since it might not be in memory anymore) + std::span data; // Data of the current PES packet + }; + + // Output queue for access units + // The queue doesn't keep track of where access units are in the buffer (only which parts are used and which are free), this has to be done extenally + class output_queue + { + public: + explicit output_queue(std::span buffer) : buffer(buffer) {} + + explicit output_queue(utils::serial& ar) + : buffer{vm::_ptr(ar.pop()), ar.pop()} + , back(vm::_ptr(ar.pop())) + , front(vm::_ptr(ar.pop())) + , wrap_pos(vm::_ptr(ar.pop())) + { + } + + void save(utils::serial& ar) const { ar(vm::get_addr(buffer.data()), static_cast(buffer.size()), vm::get_addr(back), vm::get_addr(front), vm::get_addr(wrap_pos)); } + + // The queue itself doesn't keep track of the location of each access unit, so the pop and access operations need the size or address of the access unit to remove/return + void pop_back(u32 au_size); + void pop_back(u8* au_addr); + void pop_front(u32 au_size); + [[nodiscard]] const u8* peek_back(u32 au_size) const { return back - au_size; } + + void clear() { wrap_pos = front = back = buffer.data(); } + + void push_unchecked(const access_unit_chunk& au_chunk); + bool push(const access_unit_chunk& au_chunk, const std::function& on_fatal_error); + + [[nodiscard]] bool prepare_next_au(u32 au_max_size); + + [[nodiscard]] usz get_free_size() const { return wrap_pos != buffer.data() ? front - back : std::to_address(buffer.end()) - back; } + + private: + const std::span buffer; + + // Since access units have a variable size, uses pointers instead of indices + u8* back = buffer.data(); + const u8* front = buffer.data(); + const u8* wrap_pos = buffer.data(); // The address where the back pointer wrapped around to the beginning of the queue + }; + + // Base class for elementary streams and subclasses for each stream type + // Responsible for processing the data section of PES packets and splitting it into access units with the stream parsers of each subclass + class elementary_stream + { + public: + elementary_stream(u8 channel, u32 au_max_size, dmux_pamf_base& ctx, u32 user_data, u8 au_specific_info_size, std::span au_queue_buffer) + : channel(channel) + , au_max_size(au_max_size == umax || au_max_size > au_queue_buffer.size() ? 0x800 : au_max_size) + , ctx(ctx) + , au_specific_info_size(au_specific_info_size) + , user_data(user_data) + , au_queue(au_queue_buffer) + { + // The cache sizes will never exceed three bytes + cache.reserve(3); + au_chunk.cached_data.reserve(3); + } + + elementary_stream(utils::serial& ar, u8 channel, dmux_pamf_base& ctx, u8 au_specific_info_size) + : channel(channel) + , au_max_size(ar.pop()) + , ctx(ctx) + , au_specific_info_size(au_specific_info_size) + , user_data(ar.pop()) + , au_queue(ar) + { + save(ar); + } + + virtual ~elementary_stream() = default; + void save(utils::serial& ar); + + static bool is_enabled(const std::unique_ptr& es) { return !!es; } + + [[nodiscard]] virtual std::pair get_stream_id() const = 0; + + void set_pes_packet_data(std::span pes_packet_data) { ensure(!this->pes_packet_data); this->pes_packet_data = this->stream_chunk = pes_packet_data; } + void set_pts(u64 pts) { this->pts = pts; } + void set_dts(u64 dts) { this->dts = dts; } + void set_rap() { rap = true; } + + // Parses the proprietary header of private streams. Returns the size of the header or umax if the stream is invalid + virtual u32 parse_stream_header(std::span elementary_stream, s8 pts_dts_flag) = 0; + + // Processes the current PES packet. Returns true if it has been entirely consumed + bool process_pes_packet_data(); + + void release_au(u32 au_size) { au_queue.pop_front(au_size); } + void flush_es(); + void reset_es(u8* au_addr); + void discard_access_unit(); + + protected: + const u8 channel : 4; + const u32 au_max_size; // Maximum possible size of an access unit + u32 au_size_unk = 0; // For user data streams, used to store the size of the current access unit. For other private streams, used as a bool for some reason + alignas(0x10) std::array au_specific_info_buf{}; // For LPCM streams, stores the first 0x10 bytes of the current PES packet data, contains info like the number of channels + + // The access unit that is currently being cut out + struct access_unit + { + ENABLE_BITWISE_SERIALIZATION + + enum class state : u8 + { + none, // An access unit is not currently being cut out + incomplete, // An access unit is currently being cut out + commenced, // The current PES packet contains the beginning of an access unit + complete, // The current PES packet contains the end of an access unit + size_mismatch, // The distance between sync words and size indicated in the access unit's info header does not match + m2v_sequence, // Special case for M2V, access unit commenced, but the next start code does not complete the access unit + } + state = state::none; + + bool rap = false; + bool timestamps_rap_set = false; + + // Since the delimiters of compressed audio streams are allowed to appear anywhere in the stream (instead of just the beginning of an access unit), we need to parse the size of the access unit from the stream + u8 size_info_offset = 0; + u16 parsed_size = 0; + + u32 accumulated_size = 0; // Incremented after every access unit chunk cut out from the stream + + u64 pts = umax; + u64 dts = umax; + + alignas(0x10) std::array au_specific_info_buf{}; + } + current_au; + + access_unit_chunk au_chunk; // A partial access unit that will be written to the access unit queue. Set by the stream parsers + std::vector cache; // The last three bytes of the current PES packet need to be saved, since they could contain part of an access unit delimiter + + // Returns the stream header size of audio streams. The only difference between LPCM and compressed streams is the extra_header_size_unk_mask + template + u32 parse_audio_stream_header(std::span pes_packet_data); + + private: + dmux_pamf_base& ctx; // For access to event handlers + + enum class state : u8 + { + initial, + pushing_au_queue, + notifying_au_found, + preparing_for_next_au + } + state = state::initial; + + // Size of the "CellDmuxPamfAuSpecificInfo..." struct for the type of this stream ("reserved" fields are not counted, so for all stream types other than LPCM this will be 0) + // This does NOT correspond to the amount of data in au_specific_info_buf, the info in the buffer gets unpacked by the PPU thread + const u8 au_specific_info_size; + + const u32 user_data; + + // Data section of the current PES packet. Needs to be remembered separately from the span we're working with below + std::optional> pes_packet_data; + + std::span stream_chunk; // The current section of the PES packet data to be processed + u64 pts = umax; // Presentation time stamp of the current PES packet + u64 dts = umax; // Decoding time stamp of the current PES packet + bool rap = false; // Random access point indicator + + output_queue au_queue; + + // Extracts access units from the stream by searching for the access unit delimiter and setting au_chunk accordingly. Returns the number of bytes that were parsed + virtual u32 parse_stream(std::span stream) = 0; + + void reset() + { + state = state::initial; + pes_packet_data.reset(); + au_size_unk = 0; + pts = + dts = umax; + rap = false; + au_chunk.data = {}; + au_chunk.cached_data.clear(); + current_au = {}; + } + + void set_au_timestamps_rap() + { + current_au.pts = pts; + current_au.dts = dts; + current_au.rap = rap; + pts = + dts = umax; + rap = false; + current_au.timestamps_rap_set = true; + } + }; + + template + class video_stream final : public elementary_stream + { + public: + video_stream(u8 channel, u32 au_max_size, dmux_pamf_base& ctx, u32 user_data, std::span au_queue_buffer) : elementary_stream(channel, au_max_size, ctx, user_data, 0, au_queue_buffer) {} + video_stream(utils::serial& ar, u8 channel, dmux_pamf_base& ctx) : elementary_stream(ar, channel, ctx, 0) {} + + private: + u32 parse_stream(std::span stream) override; + u32 parse_stream_header([[maybe_unused]] std::span pes_packet_data, [[maybe_unused]] s8 pts_dts_flag) override { return 0; } + [[nodiscard]] std::pair get_stream_id() const override { return { 0xe0 | channel, 0 }; } + }; + + class lpcm_stream final : public elementary_stream + { + public: + lpcm_stream(u8 channel, u32 au_max_size, dmux_pamf_base& ctx, u32 user_data, std::span au_queue_buffer) : elementary_stream(channel, au_max_size, ctx, user_data, 3, au_queue_buffer) {} + lpcm_stream(utils::serial& ar, u8 channel, dmux_pamf_base& ctx) : elementary_stream(ar, channel, ctx, 3) {} + + private: + u32 parse_stream(std::span stream) override; + u32 parse_stream_header(std::span pes_packet_data, [[maybe_unused]] s8 pts_dts_flag) override; + [[nodiscard]] std::pair get_stream_id() const override { return { 0xbd, 0x40 | channel }; } + }; + + template + class audio_stream final : public elementary_stream + { + public: + audio_stream(u8 channel, u32 au_max_size, dmux_pamf_base& ctx, u32 user_data, std::span au_queue_buffer) : elementary_stream(channel, au_max_size, ctx, user_data, 0, au_queue_buffer) {} + audio_stream(utils::serial& ar, u8 channel, dmux_pamf_base& ctx) : elementary_stream(ar, channel, ctx, 0) {} + + private: + static constexpr be_t SYNC_WORD = ac3 ? 0x0b77 : 0x0fd0; + static constexpr u8 ATRACX_ATS_HEADER_SIZE = 8; + static constexpr u16 AC3_FRMSIZE_TABLE[3][38] = + { + { 0x40, 0x40, 0x50, 0x50, 0x60, 0x60, 0x70, 0x70, 0x80, 0x80, 0xa0, 0xa0, 0xc0, 0xc0, 0xe0, 0xe0, 0x100, 0x100, 0x140, 0x140, 0x180, 0x180, 0x1c0, 0x1c0, 0x200, 0x200, 0x280, 0x280, 0x300, 0x300, 0x380, 0x380, 0x400, 0x400, 0x480, 0x480, 0x500, 0x500 }, + { 0x45, 0x46, 0x57, 0x58, 0x68, 0x69, 0x79, 0x7a, 0x8b, 0x8c, 0xae, 0xaf, 0xd0, 0xd1, 0xf3, 0xf4, 0x116, 0x117, 0x15c, 0x15d, 0x1a1, 0x1a2, 0x1e7, 0x1e8, 0x22d, 0x22e, 0x2b8, 0x2b9, 0x343, 0x344, 0x3cf, 0x3d0, 0x45a, 0x45b, 0x4e5, 0x4e6, 0x571, 0x572 }, + { 0x60, 0x60, 0x78, 0x78, 0x90, 0x90, 0xa8, 0xa8, 0xc0, 0xc0, 0xf0, 0xf0, 0x120, 0x120, 0x150, 0x150, 0x180, 0x180, 0x1e0, 0x1e0, 0x240, 0x240, 0x2a0, 0x2a0, 0x300, 0x300, 0x3c0, 0x3c0, 0x480, 0x480, 0x540, 0x540, 0x600, 0x600, 0x6c0, 0x6c0, 0x780, 0x780 } + }; + + u32 parse_stream(std::span stream) override; + u32 parse_stream_header(std::span pes_packet_data, [[maybe_unused]] s8 pts_dts_flag) override { return parse_audio_stream_header<0xffff>(pes_packet_data); } + [[nodiscard]] std::pair get_stream_id() const override { return { 0xbd, (ac3 ? 0x30 : 0x00) | channel }; } + }; + + class user_data_stream final : public elementary_stream + { + public: + user_data_stream(u8 channel, u32 au_max_size, dmux_pamf_base& ctx, u32 user_data, std::span au_queue_buffer) : elementary_stream(channel, au_max_size, ctx, user_data, 0, au_queue_buffer) {} + user_data_stream(utils::serial& ar, u8 channel, dmux_pamf_base& ctx) : elementary_stream(ar, channel, ctx, 0) {} + + private: + u32 parse_stream(std::span stream) override; + u32 parse_stream_header(std::span pes_packet_data, s8 pts_dts_flag) override; + [[nodiscard]] std::pair get_stream_id() const override { return { 0xbd, 0x20 | channel }; } + }; + + + enum class state : u8 + { + initial, + elementary_stream, + prog_end + } + state = state::initial; + + bool demux_done_notified = true; // User was successfully notified that the stream has been consumed + + u8 pack_es_type_idx = umax; // Elementary stream type in the current pack + u8 pack_es_channel = 0; // Elementary stream channel in the current pack + + bool raw_es = false; // Indicates that the input stream is a raw elementary stream instead of a multiplexed MPEG program stream. If set to true, MPEG-PS related parsing will be skipped + + std::optional> stream; // The stream to be demultiplexed, provided by the user + + std::unique_ptr elementary_streams[5][0x10]; // One for each possible type and channel +}; + +// Implementation of the SPU thread +class dmux_pamf_spu_context : dmux_pamf_base +{ +public: + static constexpr u32 id_base = 0; + static constexpr u32 id_step = 1; + static constexpr u32 id_count = 0x400; + SAVESTATE_INIT_POS(std::numeric_limits::max()); // Doesn't depend on or is a dependency of anything + + dmux_pamf_spu_context(vm::ptr> cmd_queue, vm::ptr, 1>> cmd_result_queue, + vm::ptr> stream_info_queue, vm::ptr> event_queue) + : cmd_queue(cmd_queue), cmd_result_queue(cmd_result_queue), stream_info_queue(stream_info_queue), event_queue(event_queue) + { + } + + explicit dmux_pamf_spu_context(utils::serial& ar) + : cmd_queue(ar.pop>>()) + , cmd_result_queue(vm::ptr, 1>>::make(cmd_queue.addr() + sizeof(dmux_pamf_hle_spurs_queue))) + , stream_info_queue(vm::ptr>::make(cmd_result_queue.addr() + sizeof(dmux_pamf_hle_spurs_queue, 1>))) + , event_queue(vm::ptr>::make(stream_info_queue.addr() + sizeof(dmux_pamf_hle_spurs_queue))) + , new_stream(ar.pop()) + { + save_base(ar); + max_enqueued_events += 2 * get_enabled_es_count(); + } + + void save(utils::serial& ar); + + void operator()(); // cellSpursMain() + static constexpr auto thread_name = "HLE PAMF demuxer SPU thread"sv; + +private: + // These are globals in the SPU thread + const vm::ptr> cmd_queue; + const vm::ptr, 1>> cmd_result_queue; + const vm::ptr> stream_info_queue; + const vm::ptr> event_queue; + bool wait_for_au_queue = false; + bool wait_for_event_queue = false; + bool event_queue_was_too_full = false; // Sent to the PPU thread + u8 max_enqueued_events = 4; // 4 + 2 * number of enabled elementary streams + + // This is a local variable in cellSpursMain(), needs to be saved for savestates + bool new_stream = false; + + bool get_next_cmd(DmuxPamfCommand& lhs, bool new_stream) const; + bool send_event(auto&&... args) const; + + // The events are sent to the PPU thread via the event_queue + bool on_au_found(u8 stream_id, u8 private_stream_id, u32 user_data, std::span au, u64 pts, u64 dts, bool rap, u8 au_specific_info_size, std::array au_specific_info_buf) override + { + return !((wait_for_event_queue = !send_event(DmuxPamfEventType::au_found, stream_id, private_stream_id, vm::get_addr(au.data()), std::bit_cast(static_cast>(pts)), + std::bit_cast(static_cast>(dts)), 0, static_cast(au.size()), au_specific_info_size, au_specific_info_buf, user_data, rap))); + } + bool on_demux_done() override { return !((wait_for_event_queue = !send_event(DmuxPamfEventType::demux_done))); } + void on_fatal_error() override { send_event(DmuxPamfEventType::fatal_error); } + bool on_flush_done(u8 stream_id, u8 private_stream_id, u32 user_data) override { return send_event(DmuxPamfEventType::flush_done, stream_id, private_stream_id, user_data); } // The "flush done" event does not set wait_for_event_queue if the queue is full + bool on_prog_end() override { return !((wait_for_event_queue = !send_event(DmuxPamfEventType::prog_end_code))); } + void on_au_queue_full() override { wait_for_au_queue = true; } +}; + +using dmux_pamf_spu_thread = named_thread; + + +// PPU thread + +// For some reason, cellDmuxPamf doesn't use regular error code values and also has a second set of error codes that's only used internally +enum CellDmuxPamfError +{ + CELL_DMUX_PAMF_ERROR_BUSY = 1, + CELL_DMUX_PAMF_ERROR_ARG = 2, + CELL_DMUX_PAMF_ERROR_UNKNOWN_STREAM = 3, + CELL_DMUX_PAMF_ERROR_NO_MEMORY = 5, + CELL_DMUX_PAMF_ERROR_FATAL = 6, +}; + +enum CellDmuxPamfM2vLevel +{ + CELL_DMUX_PAMF_M2V_MP_LL = 0, + CELL_DMUX_PAMF_M2V_MP_ML, + CELL_DMUX_PAMF_M2V_MP_H14, + CELL_DMUX_PAMF_M2V_MP_HL, +}; + +enum CellDmuxPamfAvcLevel +{ + CELL_DMUX_PAMF_AVC_LEVEL_2P1 = 21, + CELL_DMUX_PAMF_AVC_LEVEL_3P0 = 30, + CELL_DMUX_PAMF_AVC_LEVEL_3P1 = 31, + CELL_DMUX_PAMF_AVC_LEVEL_3P2 = 32, + CELL_DMUX_PAMF_AVC_LEVEL_4P1 = 41, + CELL_DMUX_PAMF_AVC_LEVEL_4P2 = 42, +}; + +struct CellDmuxPamfAuSpecificInfoM2v +{ + be_t reserved1; +}; + +struct CellDmuxPamfAuSpecificInfoAvc +{ + be_t reserved1; +}; + +struct CellDmuxPamfAuSpecificInfoLpcm +{ + u8 channelAssignmentInfo; + u8 samplingFreqInfo; + u8 bitsPerSample; +}; + +struct CellDmuxPamfAuSpecificInfoAc3 +{ + be_t reserved1; +}; + +struct CellDmuxPamfAuSpecificInfoAtrac3plus +{ + be_t reserved1; +}; + +struct CellDmuxPamfAuSpecificInfoUserData +{ + be_t reserved1; +}; + +struct CellDmuxPamfEsSpecificInfoM2v +{ + be_t profileLevel; +}; + +struct CellDmuxPamfEsSpecificInfoAvc +{ + be_t level; +}; + +struct CellDmuxPamfEsSpecificInfoLpcm +{ + be_t samplingFreq; + be_t numOfChannels; + be_t bitsPerSample; +}; + +struct CellDmuxPamfEsSpecificInfoAc3 +{ + be_t reserved1; +}; + +struct CellDmuxPamfEsSpecificInfoAtrac3plus +{ + be_t reserved1; +}; + +struct CellDmuxPamfEsSpecificInfoUserData +{ + be_t reserved1; +}; + +enum CellDmuxPamfSamplingFrequency +{ + CELL_DMUX_PAMF_FS_48K = 48000, +}; + +enum CellDmuxPamfBitsPerSample +{ + CELL_DMUX_PAMF_BITS_PER_SAMPLE_16 = 16, + CELL_DMUX_PAMF_BITS_PER_SAMPLE_24 = 24, +}; + +enum CellDmuxPamfLpcmChannelAssignmentInfo +{ + CELL_DMUX_PAMF_LPCM_CH_M1 = 1, + CELL_DMUX_PAMF_LPCM_CH_LR = 3, + CELL_DMUX_PAMF_LPCM_CH_LRCLSRSLFE = 9, + CELL_DMUX_PAMF_LPCM_CH_LRCLSCS1CS2RSLFE = 11, +}; + +enum CellDmuxPamfLpcmFs +{ + CELL_DMUX_PAMF_LPCM_FS_48K = 1, +}; + +enum CellDmuxPamfLpcmBitsPerSamples +{ + CELL_DMUX_PAMF_LPCM_BITS_PER_SAMPLE_16 = 1, + CELL_DMUX_PAMF_LPCM_BITS_PER_SAMPLE_24 = 3, +}; + +constexpr u8 DMUX_PAMF_LPCM_FRAMES_PER_SEC = 200; + +struct CellDmuxPamfSpecificInfo +{ + be_t thisSize; + b8 programEndCodeCb; +}; + +struct CellDmuxPamfResource +{ + be_t ppuThreadPriority; + be_t ppuThreadStackSize; + be_t numOfSpus; + be_t spuThreadPriority; + vm::bptr memAddr; be_t memSize; }; -struct CellDmuxPamfEsAttr +struct DmuxPamfAuInfo { - be_t auQueueMaxSize; - be_t memSize; - be_t specificInfoSize; + vm::bptr addr; + be_t size; + CellCodecTimeStamp pts; + CellCodecTimeStamp dts; + be_t user_data; + vm::bptr specific_info; + be_t specific_info_size; + b8 is_rap; }; + +CHECK_SIZE(DmuxPamfAuInfo, 0x30); + +constexpr u32 DMUX_PAMF_VERSION = 0x280000; +constexpr s32 DMUX_PAMF_MAX_ENABLED_ES_NUM = 0x40; + +// HLE exclusive, for savestates +enum class dmux_pamf_state : u8 +{ + initial, + waiting_for_au_released, + waiting_for_au_released_error, + waiting_for_event, + starting_demux_done, + starting_demux_done_mutex_lock_error, + starting_demux_done_mutex_unlock_error, + starting_demux_done_checking_stream_reset, + starting_demux_done_checking_stream_reset_error, + setting_au_reset, + setting_au_reset_error, + processing_event, + au_found_waiting_for_spu, + unsetting_au_reset, + demux_done_notifying, + demux_done_mutex_lock, + demux_done_cond_signal, + resuming_demux_mutex_lock, + resuming_demux_waiting_for_spu, + sending_fatal_err +}; + +enum class DmuxPamfSequenceState : u32 +{ + dormant, + resetting, + running +}; + +struct DmuxPamfElementaryStream; + +class DmuxPamfContext +{ + // HLE exclusive + // These are local variables in the PPU thread function, they're here for savestates + DmuxPamfEvent event; + u64 au_queue_full_bitset; + b8 stream_reset_started; + b8 stream_reset_in_progress; + + u32 hle_spu_thread_id; + dmux_pamf_state savestate; + + [[maybe_unused]] u8 spurs[0xf6b]; // CellSpurs, 0x1000 bytes on LLE + [[maybe_unused]] vm::bptr spurs_addr; // CellSpurs* + [[maybe_unused]] b8 use_existing_spurs; + + [[maybe_unused]] alignas(0x80) u8 spurs_taskset[0x1900]; // CellSpursTaskset + [[maybe_unused]] be_t spurs_task_id; // CellSpursTaskId + vm::bptr spurs_context_addr; + + [[maybe_unused]] u8 reserved1[0x10]; + + vm::bptr _this; + be_t this_size; + be_t version; + + DmuxCb notify_demux_done; + DmuxCb notify_prog_end_code; + DmuxCb notify_fatal_err; + + CellDmuxPamfResource resource; + + be_t thread_id; // sys_ppu_thread_t + + be_t unk; // Unused + + be_t ppu_thread_stack_size; + + be_t au_released_bitset; // Each bit corresponds to an elementary stream, if a bit is set then cellDmuxReleaseAu() was called for that elementary stream + + b8 stream_reset_requested; + + be_t sequence_state; + + be_t max_enabled_es_num; + be_t enabled_es_num; + vm::bptr elementary_streams[DMUX_PAMF_MAX_ENABLED_ES_NUM]; + + be_t mutex; // sys_mutex_t + be_t cond; // sys_cond_t + + vm::bptr> cmd_queue_addr_; // Same as cmd_queue_addr, unused + vm::bptr cmd_queue_buffer_addr_; // Same as cmd_queue_buffer_addr, unused + + vm::bptr> cmd_queue_addr; // CellSpursQueue* + vm::bptr, 1>> cmd_result_queue_addr; // CellSpursQueue* + vm::bptr> stream_info_queue_addr; // CellSpursQueue* + vm::bptr> event_queue_addr; // CellSpursQueue* + + vm::bptr cmd_queue_buffer_addr; + vm::bptr[1]> cmd_result_queue_buffer_addr; + vm::bptr event_queue_buffer_addr; + vm::bptr stream_info_queue_buffer_addr; + + vm::bptr> cmd_queue_addr__; // Same as cmd_queue_addr, unused + + be_t user_data; + + b8 is_raw_es; + + be_t next_es_id; + + char spurs_taskset_name[24]; + + [[maybe_unused]] u8 reserved2[928]; // Unused + + dmux_pamf_hle_spurs_queue cmd_queue; // CellSpursQueue + dmux_pamf_hle_spurs_queue, 1> cmd_result_queue; // CellSpursQueue + dmux_pamf_hle_spurs_queue stream_info_queue; // CellSpursQueue + dmux_pamf_hle_spurs_queue event_queue; // CellSpursQueue + + DmuxPamfCommand cmd_queue_buffer[1]; + alignas(0x80) be_t cmd_result_queue_buffer[1]; + DmuxPamfStreamInfo stream_info_queue_buffer[1]; + DmuxPamfEvent event_queue_buffer[4 + 2 * DMUX_PAMF_MAX_ENABLED_ES_NUM]; + + alignas(0x80) u8 spurs_context[0x36400]; + + + template + void send_spu_command_and_wait(ppu_thread& ppu, bool waiting_for_spu_state, auto&&... cmd_params); + + error_code wait_au_released_or_stream_reset(ppu_thread& ppu, u64 au_queue_full_bitset, b8& stream_reset_started, dmux_pamf_state& savestate); + + template + error_code set_au_reset(ppu_thread& ppu); + + template + static error_code callback(ppu_thread& ppu, DmuxCb cb, auto&&... args); + + friend struct DmuxPamfElementaryStream; + +public: + void run_spu_thread(); + + DmuxPamfElementaryStream* find_es(u16 stream_id, u16 private_stream_id); + + void exec(ppu_thread& ppu); + + static error_code open(ppu_thread& ppu, const CellDmuxPamfResource& res, vm::cptr res_spurs, const DmuxCb& notify_dmux_done, + const DmuxCb& notify_prog_end_code, const DmuxCb& notify_fatal_err, vm::bptr& handle); + error_code create_thread(ppu_thread& ppu); + error_code close(ppu_thread& ppu); + error_code reset_stream(ppu_thread& ppu); + error_code join_thread(ppu_thread& ppu); + + template + error_code set_stream(ppu_thread& ppu, vm::cptr stream_address, u32 stream_size, b8 discontinuity, u32 user_data); + + template + error_code enable_es(ppu_thread& ppu, u16 stream_id, u16 private_stream_id, bool is_avc, vm::cptr es_specific_info, vm::ptr mem_addr, u32 mem_size, const DmuxCb& notify_au_found, + const DmuxCb& notify_flush_done, vm::bptr& es); + + error_code reset_stream_and_wait_done(ppu_thread& ppu); +}; + +static_assert(std::is_standard_layout_v && std::is_trivial_v); +CHECK_SIZE_ALIGN(DmuxPamfContext, 0x3d880, 0x80); + +struct CellDmuxPamfHandle +{ + vm::bptr demuxer; + + DmuxCb notify_demux_done; + DmuxCb notify_prog_end_code; + DmuxCb notify_fatal_err; +}; + +CHECK_SIZE(CellDmuxPamfHandle, 0x1c); + +struct DmuxPamfElementaryStream +{ + vm::bptr _this; + be_t this_size; + u8 this_index; + + vm::bptr demuxer; + + DmuxCb notify_au_found; + DmuxCb notify_flush_done; + + be_t stream_id; + be_t private_stream_id; + b8 is_avc; + + vm::bptr au_queue_buffer; + be_t unk; // Likely au_queue_buffer_size, unused + be_t au_max_size; + u8 au_specific_info[0x10]; + be_t au_specific_info_size; + + b8 reset_next_au; + + be_t es_id; + + u8 reserved[72]; + + error_code release_au(ppu_thread& ppu, vm::ptr au_addr, u32 au_size) const; + error_code disable_es(ppu_thread& ppu); + error_code flush_es(ppu_thread& ppu) const; + error_code reset_es(ppu_thread& ppu) const; +}; + +static_assert(std::is_standard_layout_v && std::is_trivial_v); +CHECK_SIZE_ALIGN(DmuxPamfElementaryStream, 0x98, 4); + +struct CellDmuxPamfEsHandle +{ + vm::bptr es; + + DmuxCb notify_au_found; + DmuxCb notify_flush_done; +}; + +CHECK_SIZE(CellDmuxPamfEsHandle, 0x14); diff --git a/rpcs3/Emu/Cell/Modules/cellGame.cpp b/rpcs3/Emu/Cell/Modules/cellGame.cpp index 371aa2a7b3..d7073a4e47 100644 --- a/rpcs3/Emu/Cell/Modules/cellGame.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGame.cpp @@ -492,8 +492,8 @@ error_code cellHddGameCheck(ppu_thread& ppu, u32 version, vm::cptr dirName strcpy_trunc(get->getParam.titleLang[i], psf::get_string(psf, fmt::format("TITLE_%02d", i))); } - cellGame.warning("cellHddGameCheck(): Data exists:\nATTRIBUTE: 0x%x, RESOLUTION: 0x%x, RESOLUTION: 0x%x, SOUND_FORMAT: 0x%x, dataVersion: %s" - , get->getParam.attribute, get->getParam.resolution, get->getParam.soundFormat, get->getParam.soundFormat, std::span(reinterpret_cast(get->getParam.dataVersion), 6)); + cellGame.warning("cellHddGameCheck(): Data exists:\nATTRIBUTE: 0x%x, RESOLUTION: 0x%x, SOUND_FORMAT: 0x%x, dataVersion: %s" + , get->getParam.attribute, get->getParam.resolution, get->getParam.soundFormat, std::span(reinterpret_cast(get->getParam.dataVersion), 6)); } // TODO ? @@ -520,10 +520,11 @@ error_code cellHddGameCheck(ppu_thread& ppu, u32 version, vm::cptr dirName return CELL_GAMEDATA_ERROR_PARAM; } - if (!fs::create_path(vfs::get(usrdir))) - { - return {CELL_GAME_ERROR_ACCESS_ERROR, usrdir}; - } + // Nuked until correctly reversed engineered + //if (!fs::create_path(vfs::get(usrdir))) + //{ + // return {CELL_GAME_ERROR_ACCESS_ERROR, usrdir}; + //} } // Nuked until correctly reversed engineered @@ -580,7 +581,7 @@ error_code cellHddGameCheck(ppu_thread& ppu, u32 version, vm::cptr dirName break; default: - cellGame.error("cellHddGameCheck(): callback returned unknown error (code=0x%x). Error message: %s", result->invalidMsg); + cellGame.error("cellHddGameCheck(): callback returned unknown error (code=0x%x). Error message: %s", result->result, result->invalidMsg); error_msg = get_localized_string(localized_string_id::CELL_HDD_GAME_CHECK_INVALID, "%s", result->invalidMsg); break; } @@ -1199,7 +1200,7 @@ error_code cellGameDataCheckCreate2(ppu_thread& ppu, u32 version, vm::cptr break; default: - cellGame.error("cellGameDataCheckCreate2(): callback returned unknown error (code=0x%x). Error message: %s", cbResult->invalidMsg); + cellGame.error("cellGameDataCheckCreate2(): callback returned unknown error (code=0x%x). Error message: %s", cbResult->result, cbResult->invalidMsg); error_msg = get_localized_string(localized_string_id::CELL_GAMEDATA_CHECK_INVALID, "%s", cbResult->invalidMsg); break; } @@ -1747,7 +1748,7 @@ error_code cellGameThemeInstall(vm::cptr usrdirPath, vm::cptr fileNa { u32 magic{}; - if (src_path.ends_with(".p3t") || !theme.read(magic) || magic != "P3TF"_u32) + if (!fmt::to_lower(src_path).ends_with(".p3t") || !theme.read(magic) || magic != "P3TF"_u32) { return CELL_GAME_ERROR_INVALID_THEME_FILE; } @@ -1819,7 +1820,7 @@ error_code cellGameThemeInstallFromBuffer(ppu_thread& ppu, u32 fileSize, u32 buf const u32 read_size = std::min(bufSize, fileSize - file_offset); cellGame.notice("cellGameThemeInstallFromBuffer: writing %d bytes at pos %d", read_size, file_offset); - if (theme.write(reinterpret_cast(buf.get_ptr()) + file_offset, read_size) != read_size) + if (theme.write(reinterpret_cast(buf.get_ptr()), read_size) != read_size) { cellGame.error("cellGameThemeInstallFromBuffer: failed to write to destination file '%s' (error=%s)", dst_path, fs::g_tls_error); diff --git a/rpcs3/Emu/Cell/Modules/cellGem.cpp b/rpcs3/Emu/Cell/Modules/cellGem.cpp index 3c7b299af7..f9f5ea4100 100644 --- a/rpcs3/Emu/Cell/Modules/cellGem.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGem.cpp @@ -770,8 +770,8 @@ namespace gem if constexpr (use_gain) { dst0[0] = static_cast(std::clamp(r * gain_r, 0.0f, 255.0f)); - dst0[1] = static_cast(std::clamp(b * gain_b, 0.0f, 255.0f)); - dst0[2] = static_cast(std::clamp(g * gain_g, 0.0f, 255.0f)); + dst0[1] = static_cast(std::clamp(g * gain_g, 0.0f, 255.0f)); + dst0[2] = static_cast(std::clamp(b * gain_b, 0.0f, 255.0f)); } else { @@ -822,8 +822,8 @@ namespace gem if constexpr (use_gain) { dst0[0] = static_cast(std::clamp(r * gain_r, 0.0f, 255.0f)); - dst0[1] = static_cast(std::clamp(b * gain_b, 0.0f, 255.0f)); - dst0[2] = static_cast(std::clamp(g * gain_g, 0.0f, 255.0f)); + dst0[1] = static_cast(std::clamp(g * gain_g, 0.0f, 255.0f)); + dst0[2] = static_cast(std::clamp(b * gain_b, 0.0f, 255.0f)); } else { @@ -845,6 +845,53 @@ namespace gem debayer_raw8_impl(src, dst, alpha, gain_r, gain_g, gain_b); } + template + static inline void debayer_raw8_downscale_impl(const u8* src, u8* dst, u8 alpha, f32 gain_r, f32 gain_g, f32 gain_b) + { + constexpr u32 in_pitch = 640; + constexpr u32 out_pitch = 320 * 4; + + // Simple debayer + for (s32 y = 0; y < 240; y++) + { + const u8* src0 = src + y * 2 * in_pitch; + const u8* src1 = src0 + in_pitch; + + u8* dst0 = dst + y * out_pitch; + + for (s32 x = 0; x < 320; x++, dst0 += 4, src0 += 2, src1 += 2) + { + const u8 b = src0[0]; + const u8 g0 = src0[1]; + const u8 g1 = src1[0]; + const u8 r = src1[1]; + const u8 g = (g0 + g1) >> 1; + + if constexpr (use_gain) + { + dst0[0] = static_cast(std::clamp(r * gain_r, 0.0f, 255.0f)); + dst0[1] = static_cast(std::clamp(g * gain_g, 0.0f, 255.0f)); + dst0[2] = static_cast(std::clamp(b * gain_b, 0.0f, 255.0f)); + } + else + { + dst0[0] = r; + dst0[1] = g; + dst0[2] = b; + } + dst0[3] = alpha; + } + } + } + + static void debayer_raw8_downscale(const u8* src, u8* dst, u8 alpha, f32 gain_r, f32 gain_g, f32 gain_b) + { + if (gain_r != 1.0f || gain_g != 1.0f || gain_b != 1.0f) + debayer_raw8_downscale_impl(src, dst, alpha, gain_r, gain_g, gain_b); + else + debayer_raw8_downscale_impl(src, dst, alpha, gain_r, gain_g, gain_b); + } + bool convert_image_format(CellCameraFormat input_format, const CellGemVideoConvertAttribute& vc, const std::vector& video_data_in, u32 width, u32 height, u8* video_data_out, u32 video_data_out_size, u8* buffer_memory, @@ -881,9 +928,9 @@ namespace gem const u8* src_data = video_data_in.data(); const u8 alpha = vc.alpha; - const f32 gain_r = vc.gain * vc.blue_gain; + const f32 gain_r = vc.gain * vc.red_gain; const f32 gain_g = vc.gain * vc.green_gain; - const f32 gain_b = vc.gain * vc.red_gain; + const f32 gain_b = vc.gain * vc.blue_gain; // Only RAW8 should be relevant for cellGem unless I'm mistaken if (input_format == CELL_CAMERA_RAW8) @@ -1183,34 +1230,7 @@ namespace gem { case CELL_CAMERA_RAW8: { - const u32 in_pitch = width; - const u32 out_pitch = width * 4 / 2; - - for (u32 y = 0; y < height - 1; y += 2) - { - const u8* src0 = src_data + y * in_pitch; - const u8* src1 = src0 + in_pitch; - - u8* dst0 = video_data_out + (y / 2) * out_pitch; - u8* dst1 = dst0 + out_pitch; - - for (u32 x = 0; x < width - 1; x += 2, src0 += 2, src1 += 2, dst0 += 4, dst1 += 4) - { - const u8 b = src0[0]; - const u8 g0 = src0[1]; - const u8 g1 = src1[0]; - const u8 r = src1[1]; - - const u8 top[4] = { r, g0, b, alpha }; - const u8 bottom[4] = { r, g1, b, alpha }; - - // Top-Left - std::memcpy(dst0, top, 4); - - // Bottom-Left Pixel - std::memcpy(dst1, bottom, 4); - } - } + debayer_raw8_downscale(src_data, video_data_out, alpha, gain_r, gain_g, gain_b); break; } case CELL_CAMERA_RGBA: @@ -1609,13 +1629,8 @@ public: return false; } - if (!m_camera_info.bytesize) - { - cellGem.error("gem_tracker: unexpected image size: %d", m_camera_info.bytesize); - return false; - } - m_tracker.set_image_data(m_camera_info.buffer.get_ptr(), m_camera_info.bytesize, m_camera_info.width, m_camera_info.height, m_camera_info.format); + m_framenumber++; // using framenumber instead of timestamp since the timestamp could be identical return true; } @@ -1648,6 +1663,7 @@ public: } auto& gem = g_fxo->get(); + u64 last_framenumber = 0; while (thread_ctrl::state() != thread_state::aborting) { @@ -1663,6 +1679,13 @@ public: } } + if (std::exchange(last_framenumber, m_framenumber.load()) == last_framenumber) + { + cellGem.warning("Tracker woke up without new frame. Skipping processing (framenumber=%d)", last_framenumber); + tracker_done(); + continue; + } + m_busy.release(true); // Update PS Move LED colors @@ -1751,9 +1774,16 @@ public: shared_mutex mutex; + gem_tracker& operator=(thread_state) noexcept + { + wake_up_tracker(); + return *this; + } + private: atomic_t m_wake_up_tracker = 0; atomic_t m_tracker_done = 0; + atomic_t m_framenumber = 0; atomic_t m_busy = false; ps_move_tracker m_tracker{}; CellCameraInfoEx m_camera_info{}; @@ -1873,21 +1903,10 @@ static inline void pos_to_gem_state(u32 gem_num, gem_config::gem_controller& con gem_state->pos[2] = controller.distance_mm; gem_state->pos[3] = 0.f; - // TODO: calculate handle position based on our world coordinate and the angles - gem_state->handle_pos[0] = camera_x; - gem_state->handle_pos[1] = camera_y; - gem_state->handle_pos[2] = controller.distance_mm + 10.0f; - gem_state->handle_pos[3] = 0.f; - // Calculate orientation - if (g_cfg.io.move == move_handler::real || (g_cfg.io.move == move_handler::fake && move_data.orientation_enabled)) - { - gem_state->quat[0] = move_data.quaternion.x(); - gem_state->quat[1] = move_data.quaternion.y(); - gem_state->quat[2] = move_data.quaternion.z(); - gem_state->quat[3] = move_data.quaternion.w(); - } - else + ps_move_data::vect<4> quat = move_data.quaternion; + + if (g_cfg.io.move != move_handler::real && !(g_cfg.io.move == move_handler::fake && move_data.orientation_enabled)) { const f32 max_angle_per_side_h = g_cfg.io.fake_move_rotation_cone_h / 2.0f; const f32 max_angle_per_side_v = g_cfg.io.fake_move_rotation_cone_v / 2.0f; @@ -1901,17 +1920,27 @@ static inline void pos_to_gem_state(u32 gem_num, gem_config::gem_controller& con const f32 cy = std::cos(yaw * 0.5f); const f32 sy = std::sin(yaw * 0.5f); - const f32 q_x = sr * cp * cy - cr * sp * sy; - const f32 q_y = cr * sp * cy + sr * cp * sy; - const f32 q_z = cr * cp * sy - sr * sp * cy; - const f32 q_w = cr * cp * cy + sr * sp * sy; - - gem_state->quat[0] = q_x; - gem_state->quat[1] = q_y; - gem_state->quat[2] = q_z; - gem_state->quat[3] = q_w; + quat.x() = sr * cp * cy - cr * sp * sy; + quat.y() = cr * sp * cy + sr * cp * sy; + quat.z() = cr * cp * sy - sr * sp * cy; + quat.w() = cr * cp * cy + sr * sp * sy; } + gem_state->quat[0] = quat.x(); + gem_state->quat[1] = quat.y(); + gem_state->quat[2] = quat.z(); + gem_state->quat[3] = quat.w(); + + // Calculate handle position based on our world coordinate and the current orientation + constexpr ps_move_data::vect<3> offset_local_mm({0.f, 0.f, -45.f}); // handle is ~45 mm below sphere + const ps_move_data::vect<3> offset_world = ps_move_data::rotate_vector(quat, offset_local_mm); + + gem_state->handle_pos[0] = gem_state->pos[0] - offset_world.x(); // Flip x offset + gem_state->handle_pos[1] = gem_state->pos[1] - offset_world.y(); // Flip y offset + gem_state->handle_pos[2] = gem_state->pos[2] + offset_world.z(); + gem_state->handle_pos[3] = 0.f; + + // Calculate velocity if constexpr (!ps_move_data::use_imu_for_velocity) { move_data.update_velocity(shared_data.frame_timestamp_us, gem_state->pos); @@ -1920,6 +1949,10 @@ static inline void pos_to_gem_state(u32 gem_num, gem_config::gem_controller& con { gem_state->vel[i] = move_data.vel_world[i]; gem_state->accel[i] = move_data.accel_world[i]; + + // TODO: maybe this also needs to be adjusted depending on the orientation + gem_state->handle_vel[i] = gem_state->vel[i]; + gem_state->handle_accel[i] = gem_state->accel[i]; } } @@ -3612,7 +3645,7 @@ error_code cellGemReadExternalPortDeviceInfo(u32 gem_num, vm::ptr ext_id, v if (!pad->move_data.external_device_read_requested) { *ext_id = controller.ext_id = pad->move_data.external_device_id; - std::memcpy(pad->move_data.external_device_read.data(), ext_info.get_ptr(), CELL_GEM_EXTERNAL_PORT_OUTPUT_SIZE); + std::memcpy(ext_info.get_ptr(), pad->move_data.external_device_read.data(), CELL_GEM_EXTERNAL_PORT_DEVICE_INFO_SIZE); break; } } @@ -3876,13 +3909,15 @@ error_code cellGemUpdateStart(vm::cptr camera_frame, u64 timestamp) gem.camera_frame = camera_frame.addr(); - if (!tracker.set_image(gem.camera_frame)) + const bool image_set = tracker.set_image(gem.camera_frame); + + tracker.wake_up_tracker(); + + if (!image_set) { return not_an_error(CELL_GEM_NO_VIDEO); } - tracker.wake_up_tracker(); - return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp index 97375c4e6d..83b001cc52 100644 --- a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp +++ b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp @@ -192,7 +192,46 @@ error_code cellNetCtlDelHandler(s32 hid) error_code cellNetCtlGetInfo(s32 code, vm::ptr info) { - cellNetCtl.warning("cellNetCtlGetInfo(code=0x%x (%s), info=*0x%x)", code, InfoCodeToName(code), info); + bool log_it_once = false; + + switch (code) + { + case CELL_NET_CTL_INFO_ETHER_ADDR: + case CELL_NET_CTL_INFO_DEVICE: + case CELL_NET_CTL_INFO_MTU: + case CELL_NET_CTL_INFO_LINK_TYPE: + case CELL_NET_CTL_INFO_IP_CONFIG: + case CELL_NET_CTL_INFO_IP_ADDRESS: + case CELL_NET_CTL_INFO_NETMASK: + case CELL_NET_CTL_INFO_DEFAULT_ROUTE: + case CELL_NET_CTL_INFO_HTTP_PROXY_CONFIG: + case CELL_NET_CTL_INFO_UPNP_CONFIG: + { + log_it_once = true; + break; + } + default: + { + break; + } + } + + bool log_it = true; + + if (log_it_once && vm::check_addr(info.addr())) + { + struct logged_t + { + std::array, 256> logged_code{}; + }; + + if (g_fxo->get().logged_code[::narrow(code)].exchange(true)) + { + log_it = false; + } + } + + (log_it ? cellNetCtl.warning : cellNetCtl.trace)("cellNetCtlGetInfo(code=0x%x (%s), info=*0x%x)", code, InfoCodeToName(code), info); auto& nph = g_fxo->get>(); diff --git a/rpcs3/Emu/Cell/Modules/cellPamf.cpp b/rpcs3/Emu/Cell/Modules/cellPamf.cpp index d63e7bb3e2..4bb383c2d2 100644 --- a/rpcs3/Emu/Cell/Modules/cellPamf.cpp +++ b/rpcs3/Emu/Cell/Modules/cellPamf.cpp @@ -5,14 +5,6 @@ #include #include "cellPamf.h" -const std::function SQUEUE_ALWAYS_EXIT = []() { return true; }; -const std::function SQUEUE_NEVER_EXIT = []() { return false; }; - -bool squeue_test_exit() -{ - return Emu.IsStopped(); -} - LOG_CHANNEL(cellPamf); template<> diff --git a/rpcs3/Emu/Cell/Modules/cellPamf.h b/rpcs3/Emu/Cell/Modules/cellPamf.h index e42acf60f4..14608f9100 100644 --- a/rpcs3/Emu/Cell/Modules/cellPamf.h +++ b/rpcs3/Emu/Cell/Modules/cellPamf.h @@ -1,5 +1,6 @@ #pragma once +#include "Emu/Cell/ErrorCodes.h" #include "Emu/Memory/vm_ptr.h" // Error Codes @@ -594,345 +595,3 @@ struct CellPamfReader CHECK_SIZE(CellPamfReader, 128); error_code cellPamfReaderInitialize(vm::ptr pSelf, vm::cptr pAddr, u64 fileSize, u32 attribute); - -#include -#include - -extern const std::function SQUEUE_ALWAYS_EXIT; -extern const std::function SQUEUE_NEVER_EXIT; - -bool squeue_test_exit(); - -// TODO: eliminate this boolshit -template -class squeue_t -{ - struct squeue_sync_var_t - { - struct - { - u32 position : 31; - u32 pop_lock : 1; - }; - struct - { - u32 count : 31; - u32 push_lock : 1; - }; - }; - - atomic_t m_sync; - - mutable std::mutex m_rcv_mutex; - mutable std::mutex m_wcv_mutex; - mutable std::condition_variable m_rcv; - mutable std::condition_variable m_wcv; - - T m_data[sq_size]; - - enum squeue_sync_var_result : u32 - { - SQSVR_OK = 0, - SQSVR_LOCKED = 1, - SQSVR_FAILED = 2, - }; - -public: - squeue_t() - : m_sync(squeue_sync_var_t{}) - { - } - - static u32 get_max_size() - { - return sq_size; - } - - bool is_full() const - { - return m_sync.load().count == sq_size; - } - - bool push(const T& data, const std::function& test_exit) - { - u32 pos = 0; - - while (u32 res = m_sync.atomic_op([&pos](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (sync.push_lock) - { - return SQSVR_LOCKED; - } - if (sync.count == sq_size) - { - return SQSVR_FAILED; - } - - sync.push_lock = 1; - pos = sync.position + sync.count; - return SQSVR_OK; - })) - { - if (res == SQSVR_FAILED && (test_exit() || squeue_test_exit())) - { - return false; - } - - std::unique_lock wcv_lock(m_wcv_mutex); - m_wcv.wait_for(wcv_lock, std::chrono::milliseconds(1)); - } - - m_data[pos >= sq_size ? pos - sq_size : pos] = data; - - m_sync.atomic_op([](squeue_sync_var_t& sync) - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - ensure(!!sync.push_lock); - sync.push_lock = 0; - sync.count++; - }); - - m_rcv.notify_one(); - m_wcv.notify_one(); - return true; - } - - bool push(const T& data, const volatile bool* do_exit) - { - return push(data, [do_exit]() { return do_exit && *do_exit; }); - } - - bool push(const T& data) - { - return push(data, SQUEUE_NEVER_EXIT); - } - - bool try_push(const T& data) - { - return push(data, SQUEUE_ALWAYS_EXIT); - } - - bool pop(T& data, const std::function& test_exit) - { - u32 pos = 0; - - while (u32 res = m_sync.atomic_op([&pos](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (!sync.count) - { - return SQSVR_FAILED; - } - if (sync.pop_lock) - { - return SQSVR_LOCKED; - } - - sync.pop_lock = 1; - pos = sync.position; - return SQSVR_OK; - })) - { - if (res == SQSVR_FAILED && (test_exit() || squeue_test_exit())) - { - return false; - } - - std::unique_lock rcv_lock(m_rcv_mutex); - m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1)); - } - - data = m_data[pos]; - - m_sync.atomic_op([](squeue_sync_var_t& sync) - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - ensure(!!sync.pop_lock); - sync.pop_lock = 0; - sync.position++; - sync.count--; - if (sync.position == sq_size) - { - sync.position = 0; - } - }); - - m_rcv.notify_one(); - m_wcv.notify_one(); - return true; - } - - bool pop(T& data, const volatile bool* do_exit) - { - return pop(data, [do_exit]() { return do_exit && *do_exit; }); - } - - bool pop(T& data) - { - return pop(data, SQUEUE_NEVER_EXIT); - } - - bool try_pop(T& data) - { - return pop(data, SQUEUE_ALWAYS_EXIT); - } - - bool peek(T& data, u32 start_pos, const std::function& test_exit) - { - ensure(start_pos < sq_size); - u32 pos = 0; - - while (u32 res = m_sync.atomic_op([&pos, start_pos](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (sync.count <= start_pos) - { - return SQSVR_FAILED; - } - if (sync.pop_lock) - { - return SQSVR_LOCKED; - } - - sync.pop_lock = 1; - pos = sync.position + start_pos; - return SQSVR_OK; - })) - { - if (res == SQSVR_FAILED && (test_exit() || squeue_test_exit())) - { - return false; - } - - std::unique_lock rcv_lock(m_rcv_mutex); - m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1)); - } - - data = m_data[pos >= sq_size ? pos - sq_size : pos]; - - m_sync.atomic_op([](squeue_sync_var_t& sync) - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - ensure(!!sync.pop_lock); - sync.pop_lock = 0; - }); - - m_rcv.notify_one(); - return true; - } - - bool peek(T& data, u32 start_pos, const volatile bool* do_exit) - { - return peek(data, start_pos, [do_exit]() { return do_exit && *do_exit; }); - } - - bool peek(T& data, u32 start_pos = 0) - { - return peek(data, start_pos, SQUEUE_NEVER_EXIT); - } - - bool try_peek(T& data, u32 start_pos = 0) - { - return peek(data, start_pos, SQUEUE_ALWAYS_EXIT); - } - - class squeue_data_t - { - T* const m_data; - const u32 m_pos; - const u32 m_count; - - squeue_data_t(T* data, u32 pos, u32 count) - : m_data(data) - , m_pos(pos) - , m_count(count) - { - } - - public: - T& operator [] (u32 index) - { - ensure(index < m_count); - index += m_pos; - index = index < sq_size ? index : index - sq_size; - return m_data[index]; - } - }; - - void process(void(*proc)(squeue_data_t data)) - { - u32 pos, count; - - while (m_sync.atomic_op([&pos, &count](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (sync.pop_lock || sync.push_lock) - { - return SQSVR_LOCKED; - } - - pos = sync.position; - count = sync.count; - sync.pop_lock = 1; - sync.push_lock = 1; - return SQSVR_OK; - })) - { - std::unique_lock rcv_lock(m_rcv_mutex); - m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1)); - } - - proc(squeue_data_t(m_data, pos, count)); - - m_sync.atomic_op([](squeue_sync_var_t& sync) - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - ensure(!!sync.pop_lock); - ensure(!!sync.push_lock); - sync.pop_lock = 0; - sync.push_lock = 0; - }); - - m_wcv.notify_one(); - m_rcv.notify_one(); - } - - void clear() - { - while (m_sync.atomic_op([](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (sync.pop_lock || sync.push_lock) - { - return SQSVR_LOCKED; - } - - sync.pop_lock = 1; - sync.push_lock = 1; - return SQSVR_OK; - })) - { - std::unique_lock rcv_lock(m_rcv_mutex); - m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1)); - } - - m_sync.exchange({}); - m_wcv.notify_one(); - m_rcv.notify_one(); - } -}; diff --git a/rpcs3/Emu/Cell/Modules/cellSaveData.cpp b/rpcs3/Emu/Cell/Modules/cellSaveData.cpp index a4d160fdcb..7878e86642 100644 --- a/rpcs3/Emu/Cell/Modules/cellSaveData.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSaveData.cpp @@ -876,39 +876,42 @@ static NEVER_INLINE error_code savedata_op(ppu_thread& ppu, u32 operation, u32 v // Sort the entries { - const u32 order = setList->sortOrder; const u32 type = setList->sortType; - std::sort(save_entries.begin(), save_entries.end(), [order, type](const SaveDataEntry& entry1, const SaveDataEntry& entry2) -> bool + auto comp = [type](const SaveDataEntry& entry1, const SaveDataEntry& entry2) -> bool { const bool mtime_lower = entry1.mtime < entry2.mtime; const bool mtime_equal = entry1.mtime == entry2.mtime; const bool subtitle_lower = entry1.subtitle < entry2.subtitle; const bool subtitle_equal = entry1.subtitle == entry2.subtitle; - const bool revert_order = order == CELL_SAVEDATA_SORTORDER_DESCENT; if (type == CELL_SAVEDATA_SORTTYPE_MODIFIEDTIME) { if (mtime_equal) { - return subtitle_lower != revert_order; + return subtitle_lower; } - return mtime_lower != revert_order; + return mtime_lower; } else if (type == CELL_SAVEDATA_SORTTYPE_SUBTITLE) { if (subtitle_equal) { - return mtime_lower != revert_order; + return mtime_lower; } - return subtitle_lower != revert_order; + return subtitle_lower; } ensure(false); return true; - }); + }; + + if (setList->sortOrder == CELL_SAVEDATA_SORTORDER_ASCENT) + std::sort(save_entries.begin(), save_entries.end(), comp); + else + std::sort(save_entries.rbegin(), save_entries.rend(), comp); } // Fill the listGet->dirList array diff --git a/rpcs3/Emu/Cell/Modules/cellVdec.cpp b/rpcs3/Emu/Cell/Modules/cellVdec.cpp index 5cf5e64a51..1850416ba3 100644 --- a/rpcs3/Emu/Cell/Modules/cellVdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellVdec.cpp @@ -1385,8 +1385,8 @@ error_code cellVdecGetPictureExt(ppu_thread& ppu, u32 handle, vm::cptrsws = sws_getCachedContext(vdec->sws, w, h, in_f, w, h, out_f, SWS_POINT, nullptr, nullptr, nullptr); - u8* in_data[4] = { frame->data[0], frame->data[1], frame->data[2], alpha_plane.get() }; - int in_line[4] = { frame->linesize[0], frame->linesize[1], frame->linesize[2], w * 1 }; + const u8* in_data[4] = { frame->data[0], frame->data[1], frame->data[2], alpha_plane.get() }; + const int in_line[4] = { frame->linesize[0], frame->linesize[1], frame->linesize[2], w * 1 }; u8* out_data[4] = { outBuff.get_ptr() }; int out_line[4] = { w * 4 }; // RGBA32 or ARGB32 @@ -1462,7 +1462,12 @@ error_code cellVdecGetPicItem(ppu_thread& ppu, u32 handle, vm::pptr picInfo; + union + { + CellVdecAvcInfo avcInfo; + CellVdecDivxInfo divxInfo; + CellVdecMpeg2Info mpeg2Info; + } picInfo; }; AVFrame* frame{}; diff --git a/rpcs3/Emu/Cell/Modules/sceNp.cpp b/rpcs3/Emu/Cell/Modules/sceNp.cpp index 2a2075f0ca..e82491eac1 100644 --- a/rpcs3/Emu/Cell/Modules/sceNp.cpp +++ b/rpcs3/Emu/Cell/Modules/sceNp.cpp @@ -871,7 +871,7 @@ error_code sceNpDrmGetTimelimit(vm::cptr path, vm::ptr time_remain) } // Convert time to milliseconds - s64 msec = *sec * 1000ll + *nsec / 1000ll; + s64 msec = *sec * 1000ll + *nsec / 1'000'000ll; // Return the remaining time in microseconds if (npd.activate_time != 0 && msec < npd.activate_time) @@ -1199,7 +1199,7 @@ error_code _sceNpBasicSendMessage(vm::cptr to, vm::cptr data, u32 .msgFeatures = {}, .data = std::vector(static_cast(data.get_ptr()), static_cast(data.get_ptr()) + size)}; std::set npids; - npids.insert(std::string(to->handle.data)); + npids.insert(np::npid_to_string(*to)); nph.send_message(msg_data, npids); @@ -1228,7 +1228,7 @@ error_code sceNpBasicSendMessageGui(ppu_thread& ppu, vm::cptrmsgId, msg->mainType, msg->subType, msg->msgFeatures, msg->count, msg->npids); for (u32 i = 0; i < msg->count && msg->npids; i++) { - sceNp.trace("sceNpBasicSendMessageGui: NpId[%d] = %s", i, static_cast(&msg->npids[i].handle.data[0])); + sceNp.trace("sceNpBasicSendMessageGui: NpId[%d] = %s", i, np::npid_to_string(msg->npids[i])); } sceNp.notice("sceNpBasicSendMessageGui: subject: %s", msg->subject); sceNp.notice("sceNpBasicSendMessageGui: body: %s", msg->body); @@ -1398,7 +1398,7 @@ error_code sceNpBasicSendMessageGui(ppu_thread& ppu, vm::cptrcount; i++) { - npids.insert(std::string(msg->npids[i].handle.data)); + npids.insert(np::npid_to_string(msg->npids[i])); } } @@ -4242,19 +4242,16 @@ error_code sceNpManagerGetTicket(vm::ptr buffer, vm::ptr bufferSize) } const auto& ticket = nph.get_ticket(); - *bufferSize = static_cast(ticket.size()); if (!buffer) { + *bufferSize = static_cast(ticket.size()); return CELL_OK; } - if (*bufferSize < ticket.size()) - { - return SCE_NP_ERROR_INVALID_ARGUMENT; - } - - memcpy(buffer.get_ptr(), ticket.data(), ticket.size()); + const u32 size_read = std::min(::size32(ticket), static_cast(*bufferSize)); + std::memcpy(buffer.get_ptr(), ticket.data(), size_read); + *bufferSize = size_read; return CELL_OK; } @@ -5676,7 +5673,7 @@ error_code scenp_score_record_score(s32 transId, SceNpScoreBoardId boardId, SceN else { data = &gameInfo->nativeData[0]; - data_size = 64; + data_size = sizeof(gameInfo->nativeData); } nph.record_score(trans_ctx, boardId, score, scoreComment, data, data_size, tmpRank, async); @@ -7144,7 +7141,7 @@ error_code sceNpUtilCanonicalizeNpIdForPsp(vm::ptr npId) error_code sceNpUtilCmpNpId(vm::ptr id1, vm::ptr id2) { - sceNp.trace("sceNpUtilCmpNpId(id1=*0x%x(%s), id2=*0x%x(%s))", id1, id1 ? id1->handle.data : "", id2, id2 ? id2->handle.data : ""); + sceNp.trace("sceNpUtilCmpNpId(id1=*0x%x(%s), id2=*0x%x(%s))", id1, id1 ? np::npid_to_string(*id1) : std::string(), id2, id2 ? np::npid_to_string(*id2) : std::string()); if (!id1 || !id2) { diff --git a/rpcs3/Emu/Cell/Modules/sceNp.h b/rpcs3/Emu/Cell/Modules/sceNp.h index 88dd2d816b..e6b8bff945 100644 --- a/rpcs3/Emu/Cell/Modules/sceNp.h +++ b/rpcs3/Emu/Cell/Modules/sceNp.h @@ -1267,6 +1267,11 @@ struct SceNpOnlineId { char data[SCE_NET_NP_ONLINEID_MAX_LENGTH + 1]; // char term; char dummy[3]; + + bool operator<(const SceNpOnlineId& other) const + { + return memcmp(data, other.data, sizeof(data)) < 0; + } }; // NP ID structure @@ -1283,6 +1288,11 @@ struct SceNpId }; u8 reserved[8]; + + bool operator<(const SceNpId& other) const + { + return handle < other.handle; + } }; CHECK_SIZE_ALIGN(SceNpId, 0x24, 1); @@ -1397,9 +1407,9 @@ struct SceNpBasicMessageDetails // Presence details of an user struct SceNpBasicPresenceDetails { - s8 title[SCE_NP_BASIC_PRESENCE_TITLE_SIZE_MAX]; - s8 status[SCE_NP_BASIC_PRESENCE_STATUS_SIZE_MAX]; - s8 comment[SCE_NP_BASIC_PRESENCE_COMMENT_SIZE_MAX]; + char title[SCE_NP_BASIC_PRESENCE_TITLE_SIZE_MAX]; + char status[SCE_NP_BASIC_PRESENCE_STATUS_SIZE_MAX]; + char comment[SCE_NP_BASIC_PRESENCE_COMMENT_SIZE_MAX]; u8 data[SCE_NP_BASIC_MAX_PRESENCE_SIZE]; be_t size; be_t state; @@ -1410,9 +1420,9 @@ struct SceNpBasicPresenceDetails2 { be_t struct_size; be_t state; - s8 title[SCE_NP_BASIC_PRESENCE_TITLE_SIZE_MAX]; - s8 status[SCE_NP_BASIC_PRESENCE_EXTENDED_STATUS_SIZE_MAX]; - s8 comment[SCE_NP_BASIC_PRESENCE_COMMENT_SIZE_MAX]; + char title[SCE_NP_BASIC_PRESENCE_TITLE_SIZE_MAX]; + char status[SCE_NP_BASIC_PRESENCE_EXTENDED_STATUS_SIZE_MAX]; + char comment[SCE_NP_BASIC_PRESENCE_COMMENT_SIZE_MAX]; u8 data[SCE_NP_BASIC_MAX_PRESENCE_SIZE]; be_t size; }; @@ -1420,9 +1430,9 @@ struct SceNpBasicPresenceDetails2 // Country/region code struct SceNpCountryCode { - s8 data[2]; - s8 term; - s8 padding[1]; + char data[2]; + char term; + char padding[1]; }; // Date information @@ -1451,8 +1461,8 @@ struct SceNpScoreGameInfo // Ranking comment structure struct SceNpScoreComment { - s8 data[SCE_NP_SCORE_COMMENT_MAXLEN]; - s8 term[1]; + char data[SCE_NP_SCORE_COMMENT_MAXLEN]; + char term[1]; }; // Ranking information structure @@ -1524,15 +1534,15 @@ struct SceNpScoreNpIdPcId // Basic clan information to be used in raking struct SceNpScoreClanBasicInfo { - s8 clanName[SCE_NP_CLANS_CLAN_NAME_MAX_LENGTH + 1]; - s8 clanTag[SCE_NP_CLANS_CLAN_TAG_MAX_LENGTH + 1]; + char clanName[SCE_NP_CLANS_CLAN_NAME_MAX_LENGTH + 1]; + char clanTag[SCE_NP_CLANS_CLAN_TAG_MAX_LENGTH + 1]; u8 reserved[10]; }; // Clan member information handled in ranking struct SceNpScoreClansMemberDescription { - s8 description[SCE_NP_CLANS_CLAN_DESCRIPTION_MAX_LENGTH + 1]; + char description[SCE_NP_CLANS_CLAN_DESCRIPTION_MAX_LENGTH + 1]; }; // Clan ranking information @@ -1689,12 +1699,22 @@ struct SceNpLobbyId { u8 opt[28]; u8 reserved[8]; + + bool operator<(const SceNpLobbyId& other) const + { + return memcmp(opt, other.opt, sizeof(opt)) < 0; + } }; struct SceNpRoomId { u8 opt[28]; u8 reserved[8]; + + bool operator<(const SceNpRoomId& other) const + { + return memcmp(opt, other.opt, sizeof(opt)) < 0; + } }; struct SceNpMatchingAttr diff --git a/rpcs3/Emu/Cell/Modules/sceNp2.cpp b/rpcs3/Emu/Cell/Modules/sceNp2.cpp index c9816b60f5..7809676078 100644 --- a/rpcs3/Emu/Cell/Modules/sceNp2.cpp +++ b/rpcs3/Emu/Cell/Modules/sceNp2.cpp @@ -1135,7 +1135,7 @@ error_code sceNpMatching2ContextStartAsync(SceNpMatching2ContextId ctxId, u32 ti { sysutil_register_cb([=, context_callback = ctx->context_callback, context_callback_param = ctx->context_callback_param](ppu_thread& cb_ppu) -> s32 { - context_callback(cb_ppu, ctxId, SCE_NP_MATCHING2_CONTEXT_EVENT_Start, SCE_NP_MATCHING2_EVENT_CAUSE_CONTEXT_ACTION, 0, ctx->context_callback_param); + context_callback(cb_ppu, ctxId, SCE_NP_MATCHING2_CONTEXT_EVENT_Start, SCE_NP_MATCHING2_EVENT_CAUSE_CONTEXT_ACTION, 0, context_callback_param); return 0; }); } @@ -1760,7 +1760,7 @@ error_code sceNpMatching2ContextStop(SceNpMatching2ContextId ctxId) const auto ctx = get_match2_context(ctxId); if (!ctx) - return SCE_NP_MATCHING2_ERROR_INVALID_CONTEXT_ID; + return SCE_NP_MATCHING2_ERROR_CONTEXT_NOT_FOUND; if (!ctx->started.compare_and_swap_test(1, 0)) return SCE_NP_MATCHING2_ERROR_CONTEXT_NOT_STARTED; diff --git a/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp b/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp index 803d174549..866db860ec 100644 --- a/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp +++ b/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp @@ -1026,14 +1026,14 @@ error_code sceNpTrophyUnlockTrophy(ppu_thread& ppu, u32 context, u32 handle, s32 auto& trophy_manager = g_fxo->get(); - reader_lock lock(trophy_manager.mtx); + std::scoped_lock lock(trophy_manager.mtx); if (!trophy_manager.is_initialized) { return SCE_NP_TROPHY_ERROR_NOT_INITIALIZED; } - const auto [ctxt, error] = trophy_manager.get_context_ex(context, handle); + const auto [ctxt, error] = trophy_manager.get_context_ex(context, handle, true); if (error) { @@ -1184,9 +1184,9 @@ error_code sceNpTrophyGetTrophyUnlockState(u32 context, u32 handle, vm::ptrGetTrophyUnlockState(id)) - flags->flag_bits[id / 32] |= 1 << (id % 32); + flags->flag_bits[id / 32] |= 1u << (id % 32); else - flags->flag_bits[id / 32] &= ~(1 << (id % 32)); + flags->flag_bits[id / 32] &= ~(1u << (id % 32)); } return CELL_OK; diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index 56d4398d09..783ed9e477 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -2535,7 +2535,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con // SLDI mnemonic reg_state_t rs = get_reg(op.rs); - if (!rs.shift_left(op.sh32, reg_tag_allocator)) + if (!rs.shift_left(sh, reg_tag_allocator)) { unmap_reg(op.ra); } diff --git a/rpcs3/Emu/Cell/PPUDisAsm.cpp b/rpcs3/Emu/Cell/PPUDisAsm.cpp index 484688ee12..ebfdffb5bc 100644 --- a/rpcs3/Emu/Cell/PPUDisAsm.cpp +++ b/rpcs3/Emu/Cell/PPUDisAsm.cpp @@ -330,7 +330,7 @@ void comment_constant(std::string& last_opcode, u64 value, bool print_float = fa // Comment constant formation fmt::append(last_opcode, " #0x%xh", value); - if (print_float && ((value >> 31) <= 1u || (value >> 31) == 0x1'ffff'ffffu)) + if (print_float && ((value >> 31) <= 1u || (value >> 31) == 0x1'ffff'ffffu) && (value > 0x3fffff && (value << 32 >> 32) < 0xffc00000)) { const f32 float_val = std::bit_cast(static_cast(value)); diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index b298539519..e95a1542d8 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -1004,7 +1004,7 @@ static import_result_t ppu_load_imports(const ppu_module& _module, std: // Check address // TODO: The address of use should be extracted from analyser instead - if (fstub && fstub >= _module.segs[0].addr && fstub <= _module.segs[0].addr + _module.segs[0].size) + if (fstub && fstub >= _module.segs[0].addr && fstub < _module.segs[0].addr + _module.segs[0].size) { nid_to_use_addr.emplace(fnid, fstub); } @@ -1895,7 +1895,7 @@ shared_ptr ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c } else { - ppu_loader.error("Library %s: PRX library info not found"); + ppu_loader.error("Library: PRX library info not found"); } prx->start.set(prx->specials[0xbc9a0086]); @@ -3192,7 +3192,7 @@ bool ppu_load_rel_exec(const ppu_rel_object& elf) for (const auto& s : elf.shdrs) { - if (s.sh_type != sec_type::sht_progbits) + if (s.sh_type == sec_type::sht_progbits) { memsize = utils::align(memsize + vm::cast(s.sh_size), 128); } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index a9bef5e640..f5d91cc519 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1286,7 +1286,7 @@ extern bool ppu_patch(u32 addr, u32 value) { if (addr % 4) { - ppu_log.fatal("Patch failed at 0x%x: unanligned memory address.", addr); + ppu_log.fatal("Patch failed at 0x%x: unaligned memory address.", addr); return false; } @@ -1364,9 +1364,7 @@ void ppu_thread::dump_regs(std::string& ret, std::any& custom_data) const u32 preferred_cr_field_index = 7; }; - dump_registers_data_t* func_data = nullptr; - - func_data = std::any_cast(&custom_data); + dump_registers_data_t* func_data = std::any_cast(&custom_data); if (!func_data) { @@ -2039,9 +2037,9 @@ std::vector> ppu_thread::dump_callstack_list() const return call_stack_list; } -std::string ppu_thread::dump_misc() const +void ppu_thread::dump_misc(std::string& ret, std::any& custom_data) const { - std::string ret = cpu_thread::dump_misc(); + cpu_thread::dump_misc(ret, custom_data); if (ack_suspend) { @@ -2096,7 +2094,6 @@ std::string ppu_thread::dump_misc() const { ret += '\n'; } - return ret; } void ppu_thread::dump_all(std::string& ret) const @@ -3867,12 +3864,12 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector offs; + for (u32 j = 0; j < hdr.count; j++) { mself_record rec{}; - std::set offs; - if (mself.read(rec) && rec.get_pos(mself.size())) { if (rec.size <= 0x20) diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 97c705aed5..cf5b91c487 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -145,7 +145,7 @@ public: virtual void dump_regs(std::string&, std::any& custom_data) const override; virtual std::string dump_callstack() const override; virtual std::vector> dump_callstack_list() const override; - virtual std::string dump_misc() const override; + virtual void dump_misc(std::string& ret, std::any& custom_data) const override; virtual void dump_all(std::string&) const override; virtual void cpu_task() override final; virtual void cpu_sleep() override; diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 70d34aa775..0205715328 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -340,7 +340,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) const auto ftype = FunctionType::get(get_type(), { get_type(), // Exec base - m_ir->getPtrTy(), // PPU context + get_type(), // PPU context get_type(), // Segment address (for PRX) get_type(), // Memory base get_type(), // r0 @@ -386,7 +386,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) const auto addr_array = new GlobalVariable(*m_module, addr_array_type, false, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, vec_addrs)); // Create an array of function pointers - const auto func_table_type = ArrayType::get(m_ir->getPtrTy(), functions.size()); + const auto func_table_type = ArrayType::get(get_type(), functions.size()); const auto init_func_table = ConstantArray::get(func_table_type, functions); const auto func_table = new GlobalVariable(*m_module, func_table_type, false, GlobalVariable::PrivateLinkage, init_func_table); @@ -413,7 +413,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) const auto func_pc = ZExt(m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst), get_type()); ptr_inst = dyn_cast(m_ir->CreateGEP(func_table->getValueType(), func_table, {m_ir->getInt64(0), index_value})); - assert(ptr_inst->getResultElementType() == m_ir->getPtrTy()); + assert(ptr_inst->getResultElementType() == get_type()); const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst); const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc; @@ -550,11 +550,12 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) else if (_target >= caddr && _target <= cend) { u32 target_last = static_cast(_target); - std::unordered_set passed_targets{target_last}; // Try to follow unconditional branches as long as there is no infinite loop - while (target_last != _target) + // !! Triggers compilation issues in Asura's Wrath in other parts of the code + // !! See https://github.com/RPCS3/rpcs3/issues/18287 + while (false) { const ppu_opcode_t op{*ensure(m_info.get_ptr(target_last))}; const ppu_itype::type itype = g_ppu_itype.decode(op.opcode); @@ -621,7 +622,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) const auto pos = m_ir->CreateShl(indirect, 1); const auto ptr = m_ir->CreatePtrAdd(m_exec, pos); const auto val = m_ir->CreateLoad(get_type(), ptr); - callee = FunctionCallee(type, m_ir->CreateIntToPtr(val, m_ir->getPtrTy())); + callee = FunctionCallee(type, m_ir->CreateIntToPtr(val, get_type())); // Load new segment address const auto seg_base_ptr = m_ir->CreatePtrAdd(m_exec, m_ir->getInt64(vm::g_exec_addr_seg_offset)); @@ -1304,7 +1305,7 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op) if (!m_use_fma && data == v128{}) { set_vr(op.vd, vec_handle_result(a * c + fsplat(0.f))); - ppu_log.notice("LLVM: VMADDFP with -0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0)); + ppu_log.notice("LLVM: VMADDFP with +0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0)); return; } } @@ -3680,9 +3681,7 @@ void PPUTranslator::STVLX(ppu_opcode_t op) const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb); const auto data = pshufb(get_vr(op.vs), build(127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112) + vsplat(trunc(value(addr) & 0xf))); const auto mask = bitcast(splat(0xffff) << trunc(value(addr) & 0xf)); - const auto ptr = value(GetMemory(m_ir->CreateAnd(addr, ~0xfull))); - const auto align = splat(16); - eval(llvm_calli{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}}); + m_ir->CreateMaskedStore(data.eval(m_ir), GetMemory(m_ir->CreateAnd(addr, ~0xfull)), llvm::Align(16), mask.eval(m_ir)); } void PPUTranslator::STDBRX(ppu_opcode_t op) @@ -3710,9 +3709,7 @@ void PPUTranslator::STVRX(ppu_opcode_t op) const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb); const auto data = pshufb(get_vr(op.vs), build(255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240) + vsplat(trunc(value(addr) & 0xf))); const auto mask = bitcast(trunc(splat(0xffff) << (value(addr) & 0xf) >> 16)); - const auto ptr = value(GetMemory(m_ir->CreateAnd(addr, ~0xfull))); - const auto align = splat(16); - eval(llvm_calli{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}}); + m_ir->CreateMaskedStore(data.eval(m_ir), GetMemory(m_ir->CreateAnd(addr, ~0xfull)), llvm::Align(16), mask.eval(m_ir)); } void PPUTranslator::STFSUX(ppu_opcode_t op) @@ -5417,7 +5414,7 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo) void PPUTranslator::build_interpreter() { #define BUILD_VEC_INST(i) { \ - m_function = llvm::cast(m_module->getOrInsertFunction("op_" #i, get_type(), m_ir->getPtrTy()).getCallee()); \ + m_function = llvm::cast(m_module->getOrInsertFunction("op_" #i, get_type(), get_type()).getCallee()); \ std::fill(std::begin(m_globals), std::end(m_globals), nullptr); \ std::fill(std::begin(m_locals), std::end(m_locals), nullptr); \ IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function)); \ diff --git a/rpcs3/Emu/Cell/SPUAnalyser.h b/rpcs3/Emu/Cell/SPUAnalyser.h index 103c655a9e..1598551c7d 100644 --- a/rpcs3/Emu/Cell/SPUAnalyser.h +++ b/rpcs3/Emu/Cell/SPUAnalyser.h @@ -13,6 +13,7 @@ struct spu_itype static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values static constexpr struct zregmod_tag{} zregmod{}; // Instructions not modifying any GPR + static constexpr struct pure_tag{} pure{}; // Instructions that always produce the same values as long as arguments are equal enum class type : unsigned char { @@ -51,22 +52,22 @@ struct spu_itype RDCH, RCHCNT, - BR, // branch_tag first + BR, // branch_tag first, zregmod_tag (2) first BRA, BRNZ, BRZ, BRHNZ, BRHZ, - BRSL, - BRASL, IRET, BI, BISLED, - BISL, BIZ, BINZ, BIHZ, - BIHNZ, // branch_tag last + BIHNZ, // zregmod_tag (2) last + BRSL, + BRASL, + BISL, // branch_tag last ILH, // constant_tag_first ILHU, @@ -158,6 +159,15 @@ struct spu_itype CUFLT, FRDS, // xfloat_tag last + CFLTS, + CFLTU, + FCEQ, + FCMEQ, + FCGT, + FCMGT, // floating_tag last + FSCRWR, + FSCRRD, + DFA, DFS, DFM, @@ -167,20 +177,11 @@ struct spu_itype DFNMA, FESD, - CFLTS, - CFLTU, - FCEQ, - FCMEQ, - FCGT, - FCMGT, - FSCRWR, - FSCRRD, - DFCEQ, DFCMEQ, DFCGT, DFCMGT, - DFTSV, // floating_tag last + DFTSV, SHLH, // shiftrot_tag first SHLHI, @@ -245,13 +246,13 @@ struct spu_itype // Test for branch instruction friend constexpr bool operator &(type value, branch_tag) { - return value >= BR && value <= BIHNZ; + return value >= BR && value <= BISL; } - // Test for floating point instruction + // Test for floating point instruction (32-bit float) friend constexpr bool operator &(type value, floating_tag) { - return value >= FMA && value <= DFTSV; + return value >= FMA && value <= FCMGT; } // Test for 4-op instruction @@ -299,10 +300,18 @@ struct spu_itype // Test for non register-modifying instruction friend constexpr bool operator &(type value, zregmod_tag) { - return value >= HEQ && value <= STQR; + return (value >= HEQ && value <= STQR) || (value >= BR && value <= BIHNZ); + } + + // Test for instructions which always produce the same values as long as arguments and immediate values are equal + friend constexpr bool operator &(type value, pure_tag) + { + return (value >= ILH && value <= CLGTI); } }; +using spu_itype_t = spu_itype::type; + struct spu_iflag { enum @@ -528,6 +537,8 @@ struct spu_iflag } }; +using spu_iflag_t = spu_iflag::flag; + #define NAME(x) static constexpr const char& x = *#x struct spu_iname diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 4d04b13666..6fa68a2d4b 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -52,6 +52,36 @@ struct span_less template inline constexpr span_less s_span_less{}; +template <> +void fmt_class_string::format(std::string& out, u64 arg) +{ + format_enum(out, arg, [](spu_recompiler_base::compare_direction arg) + { + switch (arg) + { + case spu_recompiler_base::CMP_SLESS: return "SLT"; + case spu_recompiler_base::CMP_SGREATER: return "SGT"; + case spu_recompiler_base::CMP_EQUAL: return "IEQ"; + case spu_recompiler_base::CMP_LLESS: return "ULT"; + case spu_recompiler_base::CMP_LGREATER: return "UGT"; + case spu_recompiler_base::CMP_SGREATER_EQUAL: return "SGE"; + case spu_recompiler_base::CMP_SLOWER_EQUAL: return "SLE"; + case spu_recompiler_base::CMP_NOT_EQUAL: return "INE"; + case spu_recompiler_base::CMP_LGREATER_EQUAL: return "UGE"; + case spu_recompiler_base::CMP_LLOWER_EQUAL: return "ULE"; + case spu_recompiler_base::CMP_UNKNOWN: + case spu_recompiler_base::CMP_NOT_EQUAL2: + case spu_recompiler_base::CMP_EQUAL2: + default: + { + break; + } + } + + return unknown; + }); +} + // Move 4 args for calling native function from a GHC calling convention function #if defined(ARCH_X64) static u8* move_args_ghc_to_native(u8* raw) @@ -1176,108 +1206,6 @@ void spu_cache::initialize(bool build_existing_cache) if ((g_cfg.core.spu_decoder == spu_decoder_type::asmjit || g_cfg.core.spu_decoder == spu_decoder_type::llvm) && !func_list.empty()) { spu_log.success("SPU Runtime: Built %u functions.", func_list.size()); - - if (g_cfg.core.spu_debug) - { - std::string dump; - dump.reserve(10'000'000); - - std::map, spu_program*, span_less> sorted; - - for (auto&& f : func_list) - { - // Interpret as a byte string - std::span data = {reinterpret_cast(f.data.data()), f.data.size() * sizeof(u32)}; - - sorted[data] = &f; - } - - std::unordered_set depth_n; - - u32 n_max = 0; - - for (auto&& [bytes, f] : sorted) - { - { - sha1_context ctx; - u8 output[20]; - - sha1_starts(&ctx); - sha1_update(&ctx, bytes.data(), bytes.size()); - sha1_finish(&ctx, output); - fmt::append(dump, "\n\t[%s] ", fmt::base57(output)); - } - - u32 depth_m = 0; - - for (auto&& [data, f2] : sorted) - { - u32 depth = 0; - - if (f2 == f) - { - continue; - } - - for (u32 i = 0; i < bytes.size(); i++) - { - if (i < data.size() && data[i] == bytes[i]) - { - depth++; - } - else - { - break; - } - } - - depth_n.emplace(depth); - depth_m = std::max(depth, depth_m); - } - - fmt::append(dump, "c=%06d,d=%06d ", depth_n.size(), depth_m); - - bool sk = false; - - for (u32 i = 0; i < std::min(bytes.size(), std::max(256, depth_m)); i++) - { - if (depth_m == i) - { - dump += '|'; - sk = true; - } - - fmt::append(dump, "%02x", bytes[i]); - - if (i % 4 == 3) - { - if (sk) - { - sk = false; - } - else - { - dump += ' '; - } - - dump += ' '; - } - } - - fmt::append(dump, "\n\t%49s", ""); - - for (u32 i = 0; i < std::min(f->data.size(), std::max(64, utils::aligned_div(depth_m, 4))); i++) - { - fmt::append(dump, "%-10s", g_spu_iname.decode(std::bit_cast>(f->data[i]))); - } - - n_max = std::max(n_max, ::size32(depth_n)); - - depth_n.clear(); - } - - spu_log.notice("SPU Cache Dump (max_c=%d): %s", n_max, dump); - } } // Initialize global cache instance @@ -3029,7 +2957,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (g_cfg.core.spu_block_size == spu_block_size_type::safe) { // Stop on special instructions (TODO) - m_targets[pos]; + m_targets[pos].push_back(SPU_LS_SIZE); next_block(); break; } @@ -3050,7 +2978,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.error("[0x%x] Invalid interrupt flags (DE)", pos); } - m_targets[pos]; + m_targets[pos].push_back(SPU_LS_SIZE); next_block(); break; } @@ -3083,7 +3011,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s bool is_no_return = false; - if (pos_next >= lsa && pos_next < limit) + if (sl && pos_next >= lsa && pos_next < limit) { const u32 data_next = ls[pos_next / 4]; const auto type_next = g_spu_itype.decode(data_next); @@ -3104,7 +3032,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); } - if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + if (+iflags & +spu_iflag::use_rc) { is_no_return = is_no_return || (op_next.ra >= 4 && op_next.rb < 10); } @@ -3346,6 +3274,12 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.notice("[0x%x] At 0x%x: ignoring indirect branch (SYNC)", entry_point, pos); } + if (!(af & vf::is_const)) + { + // Possible unknown target + m_targets[pos].emplace_back(SPU_LS_SIZE); + } + if (type == spu_itype::BI || sl || is_no_return) { if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe || is_no_return) @@ -3410,7 +3344,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); } - if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + if (+iflags & +spu_iflag::use_rc) { is_no_return = is_no_return || (op_next.rc >= 4 && op_next.rc < 10); } @@ -3705,6 +3639,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s default: { + if (type & spu_itype::zregmod) + { + break; + } + // Unconst const u32 op_rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt; m_regmod[pos / 4] = op_rt; @@ -3931,17 +3870,26 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s continue; } + bool removed = false; + for (auto it2 = it->second.begin(); it2 != it->second.end();) { if (*it2 < lsa || *it2 >= limit) { it2 = it->second.erase(it2); + removed = true; continue; } it2++; } + if (removed) + { + it->second.emplace_back(SPU_LS_SIZE); + } + + std::sort(it->second.begin(), it->second.end()); it++; } @@ -3992,7 +3940,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto type = g_spu_itype.decode(op.opcode); - u8 reg_save = 255; + u8 reg_save = s_reg_max; if (type == spu_itype::STQD && op.ra == s_reg_sp && !block.reg_mod[op.rt] && !block.reg_use[op.rt]) { @@ -4012,7 +3960,17 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Register reg use only if it happens before reg mod if (!block.reg_mod[reg]) { - block.reg_use.set(reg); + if (type & spu_itype::floating) + { + block.reg_maybe_float.set(reg); + } + + if (type == spu_itype::SHUFB && reg == op.rc) + { + block.reg_maybe_shuffle_mask.set(reg); + } + + block.reg_use[reg]++; if (reg_save != reg && block.reg_save_dom[reg]) { @@ -4029,7 +3987,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u8 reg : {s_reg_mfc_lsa, s_reg_mfc_tag, s_reg_mfc_size}) { if (!block.reg_mod[reg]) - block.reg_use.set(reg); + block.reg_use[reg]++; } } @@ -4083,7 +4041,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (i == s_reg_lr || (i >= 2 && i < s_reg_80) || i > s_reg_127) { if (!block.reg_mod[i]) - block.reg_use.set(i); + block.reg_use[i]++; if (!is_tail) { @@ -4960,19 +4918,24 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s return map; }; - struct putllc16_statistics_t + struct stats_t { atomic_t all = 0; atomic_t single = 0; - atomic_t nowrite = 0; std::array, 128> breaking_reason{}; }; - struct rchcnt_statistics_t + struct putllc16_statistics_t : stats_t + { + atomic_t nowrite = 0; + }; + + struct rchcnt_statistics_t : stats_t + { + }; + + struct reduced_statistics_t : stats_t { - atomic_t all = 0; - atomic_t single = 0; - std::array, 128> breaking_reason{}; }; // Pattern structures @@ -5084,6 +5047,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // RDCH/RCHCNT Loop analysis tracker rchcnt_loop_t rchcnt_loop{}; + reduced_loop_t reduced_loop{}; + block_reg_state_iterator(u32 _pc, usz _parent_iterator_index = umax, usz _parent_target_index = 0) noexcept : pc(_pc) , parent_iterator_index(_parent_iterator_index) @@ -5096,6 +5061,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s std::map atomic16_all; // RdAtomicStat location -> atomic loop optimization state std::map rchcnt_loop_all; // RDCH/RCHCNT location -> channel read loop optimization state + std::map reduced_loop_all; std::map getllar_starts; // True for failed loops std::map run_on_block; std::map logged_block; @@ -5104,6 +5070,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s atomic16_t dummy16{}; rchcnt_loop_t dummy_loop{}; + reduced_loop_t dummy_rloop{}; bool likely_putllc_loop = false; bool had_putllc_evaluation = false; @@ -5150,6 +5117,194 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 iterator_id_alloc = 0; + auto get_block_targets = [&](u32 pc) -> std::span + { + if (m_block_info[pc / 4] && m_bbs.count(pc)) + { + return ::at32(m_bbs, pc).targets; + } + + return {}; + }; + + auto get_block_preds = [&](u32 pc) -> std::span + { + if (m_block_info[pc / 4] && m_bbs.count(pc)) + { + return ::at32(m_bbs, pc).preds; + } + + return {}; + }; + + const auto initiate_patterns = [&](block_reg_state_iterator& block_state_it, u32 bpc, bool is_multi_block) + { + // Initiate patterns (that are initiated on block start) + const auto& bb_body = ::at32(m_bbs, bpc); + + bool invalid = bb_body.size <= 2; + bool valid = true; + + u32 expected_sup_conds = 0; + u32 first_pred_of_loop = SPU_LS_SIZE; + + for (u32 pred : get_block_preds(bpc)) + { + if (is_multi_block ? pred >= bpc : pred == bpc) + { + first_pred_of_loop = std::min(pred, first_pred_of_loop); + } + } + + valid = first_pred_of_loop != SPU_LS_SIZE; + + const auto& bb_connect = ::at32(m_bbs, valid ? first_pred_of_loop : bpc); + + invalid = invalid || !valid; + valid = false; + + // Check loop connector block (must jump to block-next or to loop-start) + u32 targets_count = 0; + + for (u32 target : get_block_targets(first_pred_of_loop)) + { + valid = true; + targets_count++; + + if (first_pred_of_loop == bpc) + { + continue; + } + + if (target != bpc) + { + if (target != first_pred_of_loop + bb_connect.size * 4) + { + invalid = true; + } + } + } + + if (targets_count > 2) + { + invalid = true; + } + + const bool is_two_block_loop = targets_count == 1; + + invalid = invalid || !valid; + valid = false; + + // Check loop body block (must jump to last-block or another location) + + for (u32 block_pc = bpc; !invalid;) + { + targets_count = 0; + + const u32 cond_next = block_pc + ::at32(m_bbs, block_pc).size * 4; + valid = false; + + bool is_end = false; + + for (u32 target : get_block_targets(block_pc)) + { + targets_count++; + + if (target == cond_next) + { + // Conditional branch + valid = true; + } + + if (target <= block_pc && target > bpc) + { + // Branch backwards + invalid = true; + } + + if (target == bpc) + { + is_end = true; + } + } + + // if (bpc != block_pc) + // { + // for (u32 pred : get_block_preds(block_pc)) + // { + // if (pred < bpc || pred > first_pred_of_loop + ::at32(m_bbs, first_pred_of_loop).size * 4) + // { + // invalid = true; + // break; + // } + // } + // } + + if (targets_count > 2) + { + invalid = true; + break; + } + + if (cond_next == first_pred_of_loop && is_two_block_loop) + { + valid = true; + break; + } + + if (!valid) + { + break; + } + + if (bpc == first_pred_of_loop || is_end) + { + break; + } + + if (targets_count == 2) + { + expected_sup_conds++; + } + + block_pc = cond_next; + } + + invalid = invalid || !valid; + + if (bb_body.size > 2 && !invalid) + { + // Early filtering of false positives + const spu_opcode_t op{std::bit_cast>(::at32(result.data, (bpc - entry_point) / 4 + bb_body.size - 2))}; + const spu_opcode_t op2{std::bit_cast>(::at32(result.data, (bpc - entry_point) / 4))}; + + switch (g_spu_itype.decode(op.opcode)) + { + case spu_itype::RDCH: invalid = op.ra != SPU_RdDec; break; + case spu_itype::RCHCNT: invalid = true; break; + default: break; + } + + switch (g_spu_itype.decode(op2.opcode)) + { + case spu_itype::RDCH: invalid = invalid || op2.ra != SPU_RdDec; break; + case spu_itype::RCHCNT: invalid = true; break; + default: break; + } + } + + if (valid && !invalid && !reduced_loop_all.count(bpc) && expected_sup_conds == 0) + { + const auto reduced_loop = &block_state_it.reduced_loop; + reduced_loop->discard(); + reduced_loop->active = true; + reduced_loop->loop_pc = bpc; + reduced_loop->loop_end = first_pred_of_loop; + reduced_loop->expected_sup_conds = expected_sup_conds; + reduced_loop->is_two_block_loop = is_two_block_loop; + } + }; + for (u32 wf = 0, wi = 0, wa = entry_point, bpc = wa; wf <= 1;) { const bool is_form_block = wf == 0; @@ -5218,6 +5373,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s auto& vregs = is_form_block ? infos[bpc]->local_state : *true_state_walkby; const auto atomic16 = is_pattern_match ? &::at32(reg_state_it, wi).atomic16 : &dummy16; const auto rchcnt_loop = is_pattern_match ? &::at32(reg_state_it, wi).rchcnt_loop : &dummy_loop; + const auto reduced_loop = &::at32(reg_state_it, wi).reduced_loop; const u32 pos = wa; @@ -5341,10 +5497,71 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } }; + const auto break_reduced_loop_pattern = [&](u32 cause, reduced_loop_t previous) + { + if (previous.active && previous.loop_pc != SPU_LS_SIZE && reduced_loop_all.count(previous.loop_pc) == 0) + { + g_fxo->get().breaking_reason[cause]++; + + if (!spu_log.notice) + { + return; + } + + previous.active = false; + previous.failed = true; + + reduced_loop_all[previous.loop_pc] = previous; + + std::string break_error = fmt::format("Reduced loop pattern breakage [%x cause=%u] (read_pc=0x%x)", pos, cause, previous.loop_pc); + + const auto values = sort_breakig_reasons(g_fxo->get().breaking_reason); + + std::string tracing = "Top Breaking Reasons:"; + + usz i = 0; + usz fail_count = 0; + bool switched_to_minimal = false; + + for (auto it = values.begin(); it != values.end(); i++, it++) + { + fail_count += it->second; + + if (i >= 12) + { + continue; + } + + if (i < 8 && it->second > 1) + { + fmt::append(tracing, " [cause=%u, n=%d]", it->first, it->second); + } + else + { + if (!std::exchange(switched_to_minimal, true)) + { + fmt::append(tracing, "; More:"); + } + + fmt::append(tracing, " %u", it->first); + } + } + + fmt::append(tracing, " of %d failures", fail_count); + spu_log.notice("%s\n%s", break_error, tracing); + + std::string block_dump; + this->dump(result, block_dump, previous.loop_pc, previous.loop_end + 1); + + spu_log.notice("SPU Block Dump:\n%s", block_dump); + } + }; + const auto break_all_patterns = [&](u32 cause) { break_putllc16(cause, atomic16->discard()); break_channel_pattern(cause, rchcnt_loop->discard()); + break_reduced_loop_pattern(cause, reduced_loop->discard()); }; const auto calculate_absolute_ls_difference = [](u32 addr1, u32 addr2) @@ -5406,16 +5623,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 stackframe_pc = SPU_LS_SIZE; usz entry_index = umax; - auto get_block_targets = [&](u32 pc) -> std::span - { - if (m_block_info[pc / 4] && m_bbs.count(pc)) - { - return m_bbs.at(pc).targets; - } - - return {}; - }; - u32 target_pc = SPU_LS_SIZE; bool insert_entry = false; bool is_code_backdoor = false; @@ -5605,7 +5812,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } - const u32 previous_pc = m_bbs.at(reg_state_it[stackframe_it].pc).size * 4 + reg_state_it[stackframe_it].pc - 4; + const u32 previous_pc = ::at32(m_bbs, reg_state_it[stackframe_it].pc).size * 4 + reg_state_it[stackframe_it].pc - 4; bool may_return = previous_pc + 4 != entry_point + result.data.size() * 4 && (m_ret_info[(previous_pc / 4) + 1] || m_entry_info[previous_pc / 4]); @@ -5634,6 +5841,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Backup analyser information const auto atomic16_info = reg_state_it[stackframe_it].atomic16; const auto rchcnt_loop_info = reg_state_it[stackframe_it].rchcnt_loop; + const auto reduced_loop_info = reg_state_it[stackframe_it].reduced_loop; // Clean from the back possible because it does not affect old indices // Technically should always do a full cleanup at the moment @@ -5659,6 +5867,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.trace("Emplacing: block_id=%d, pc=0x%x, target_it=%d/%d, new_pc=0x%x (has_it=%d)", reg_state_it[stackframe_it].iterator_id, stackframe_pc, entry_index + 1, target_size, target_pc, atomic16_info.active); auto& next = reg_state_it.emplace_back(target_pc, stackframe_it, 0); + initiate_patterns(next, target_pc, true); + if (!is_code_backdoor) { // Restore analyser information (if not an entry) @@ -5666,6 +5876,9 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (previous_pc != rchcnt_loop_info.branch_pc || target_pc == rchcnt_loop_info.branch_target) next.rchcnt_loop = rchcnt_loop_info; + + if (previous_pc + 4 == target_pc && reduced_loop_info.loop_pc != reduced_loop_info.loop_end && reduced_loop_info.active && target_pc <= reduced_loop_info.loop_end) + next.reduced_loop = reduced_loop_info; } else { @@ -5701,7 +5914,9 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (!infos.empty()) { - reg_state_it.emplace_back(::at32(infos, entry_point)->pc).iterator_id = iterator_id_alloc++;; + reg_state_it.emplace_back(::at32(infos, entry_point)->pc).iterator_id = iterator_id_alloc++; + + initiate_patterns(reg_state_it.back(), ::at32(infos, entry_point)->pc, true); } } } @@ -5710,6 +5925,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { wa = ::at32(reg_state_it, wi).pc; bpc = wa; + + initiate_patterns(::at32(reg_state_it, wi), bpc, false); } }; @@ -5834,7 +6051,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (!is_form_block) { // Call for external code - break_all_patterns(25); + break_putllc16(25, atomic16->discard()); + break_channel_pattern(25, rchcnt_loop->discard()); } } @@ -5859,6 +6077,147 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto op = spu_opcode_t{data}; const auto type = g_spu_itype.decode(data); + if (reduced_loop->active && !(type & spu_itype::zregmod)) + { + const u32 op_rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt; + + u32 ra = s_reg_max, rb = s_reg_max, rc = s_reg_max; + + if (::at32(m_use_ra, pos / 4)) + { + ra = op.ra; + } + + if (::at32(m_use_rb, pos / 4)) + { + rb = op.rb; + } + + if (::at32(m_use_rc, pos / 4)) + { + rc = op.rc; + } + + bool is_move_register_op = false; + + switch (type) + { + case spu_itype::SHLQBYI: + { + is_move_register_op = op.i7 == 0; + break; + } + // Technically only ORI is needed but I am taking into account possible third-party SPU compilers or hand-written assembly + case spu_itype::ORI: + case spu_itype::ORHI: + case spu_itype::ORBI: + case spu_itype::AI: + case spu_itype::AHI: + case spu_itype::XORI: + case spu_itype::XORHI: + case spu_itype::XORBI: + { + is_move_register_op = op.si10 == 0; + break; + } + case spu_itype::ANDI: + case spu_itype::ANDHI: + case spu_itype::ANDBI: + { + is_move_register_op = op.si10 == -1; + break; + } + default: + { + break; + } + } + + auto org = reduced_loop->get_reg(op_rt); + + u32 reg_first = s_reg_max; + + for (u32 reg : {ra, rb, rc}) + { + if (reg != s_reg_max && reg != reg_first) + { + const auto arg = reduced_loop->find_reg(reg); + + if (arg && arg->modified >= 1) + { + reg_first = reg; + + if (reg_first != s_reg_max && !is_move_register_op) + { + // Multiple origins + org.add_instruction_modifier(spu_itype::UNK, op.opcode); + break; + } + } + } + } + + if (reg_first == s_reg_max) + { + org = {}; + + if (!is_move_register_op) + { + org.add_instruction_modifier(type, op.opcode); + } + } + else if (reg_first == rb) + { + std::swap(ra, rb); + } + else if (reg_first == rc) + { + std::swap(ra, rc); + } + + for (u32 reg : {ra, rb, rc}) + { + if (reg != s_reg_max) + { + const auto arg = reduced_loop->find_reg(reg); + + if (arg && arg->regs.count() != 0) + { + if (reg_first == reg) + { + org = *arg; + + if (!is_move_register_op) + { + org.add_instruction_modifier(type, op.opcode); + } + + continue; + } + + org.join_with_this(*arg); + } + else + { + org.add_register_origin(reg); + } + } + } + + if (type & spu_itype::memory || type == spu_itype::RDCH || type == spu_itype::RCHCNT) + { + // Register external origin + org.add_register_origin(s_reg_max); + } + + *ensure(reduced_loop->find_reg(op_rt)) = org; + } + + if (reduced_loop->active && ((type & spu_itype::memory) || type == spu_itype::STOP || type == spu_itype::STOPD)) + { + reduced_loop->is_constant_expression = false; + } + // For debugging if (false && likely_putllc_loop && is_pattern_match) { @@ -5945,12 +6304,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } - if (type == spu_itype::SYNC) - { - // Remember - sync = true; - } - + break_reduced_loop_pattern(19, reduced_loop->discard()); break; } @@ -5958,10 +6312,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s case spu_itype::BI: case spu_itype::BISL: case spu_itype::BISLED: - case spu_itype::BIZ: - case spu_itype::BINZ: - case spu_itype::BIHZ: - case spu_itype::BIHNZ: { if (op.e || op.d) { @@ -5977,8 +6327,86 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } + case spu_itype::BR: case spu_itype::BRA: { + if (reduced_loop->active) + { + if (!reduced_loop->is_two_block_loop || !reduced_loop->has_cond_state) + { + break_reduced_loop_pattern(20, reduced_loop->discard()); + break; + } + + for (const auto& [reg_num, reg] : reduced_loop->regs) + { + if (reg.is_loop_dictator(reg_num)) + { + if (reg.is_non_predictable_loop_dictator(reg_num)) + { + //break_reduced_loop_pattern(13, reduced_loop->discard()); + reduced_loop->is_constant_expression = false; + } + + reduced_loop->loop_dicts.set(reg_num); + } + } + + std::array reg_use{}; + std::bitset reg_maybe_float{}; + std::bitset reg_mod{}; + + for (auto it = m_bbs.find(reduced_loop->loop_pc); it != m_bbs.end() && it->first <= bpc; it++) + { + for (u32 i = 0; i < s_reg_max; i++) + { + if (!reg_mod[i]) + { + reg_use[i] += it->second.reg_use[i]; + } + } + + reg_maybe_float |= it->second.reg_maybe_float; + reg_mod |= it->second.reg_mod; + + // Note: update when sup_conds are implemented + if (it->first == bpc && it->first != reduced_loop->loop_pc) + { + reduced_loop->loop_may_update |= it->second.reg_mod; + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + if (!::at32(reduced_loop->loop_dicts, i)) + { + if (reg_use[i] && reg_mod[i]) + { + reduced_loop->is_constant_expression = false; + reduced_loop->loop_writes.set(i); + reduced_loop->loop_may_update.reset(i); + } + else if (reg_use[i]) + { + reduced_loop->loop_args.set(i); + + if (reg_use[i] >= 3 && reg_maybe_float[i]) + { + reduced_loop->gpr_not_nans.set(i); + } + } + } + else + { + // Cleanup + reduced_loop->loop_may_update.reset(i); + } + } + + reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop); + reduced_loop->discard(); + } + break; } @@ -5988,7 +6416,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const u32 next_pc = spu_branch_target(pos, 1); const u32 target = spu_branch_target(pos, op.i16); - if (rchcnt_loop->active) + while (rchcnt_loop->active) { const reg_state_t& rt = vregs[op.rt]; @@ -6004,16 +6432,698 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s rchcnt_loop->conditioned = true; rchcnt_loop->branch_pc = pos; rchcnt_loop->branch_target = rchcnt_loop->product_test_negate != (type == spu_itype::BRZ) ? target : next_pc; + } + + break; + } + + [[fallthrough]]; + } + case spu_itype::BRHZ: + case spu_itype::BRHNZ: + + case spu_itype::BIZ: + case spu_itype::BINZ: + case spu_itype::BIHZ: + case spu_itype::BIHNZ: + { + if (type == spu_itype::spu_itype::BIZ || type == spu_itype::BINZ || type == spu_itype::BIHZ || type == spu_itype::BIHNZ) + { + if (op.e || op.d) + { + break_all_patterns(27); break; } } - break; - } - case spu_itype::BR: - case spu_itype::BRHZ: - case spu_itype::BRHNZ: - { + const bool is_u16_jump = type == spu_itype::BRHZ || type == spu_itype::BRHNZ || type == spu_itype::BIHZ || type == spu_itype::BIHNZ; + const bool is_jump_zero = (type == spu_itype::BRZ || type == spu_itype::BRHZ || type == spu_itype::BIZ || type == spu_itype::BIHZ) ^ reduced_loop->is_two_block_loop; + + while (reduced_loop->active) + { + if (reduced_loop->expected_sup_conds) + { + break_reduced_loop_pattern(50, reduced_loop->discard()); + break; + } + + const u32 op_rt = op.rt; + + const auto reg = reduced_loop->find_reg(op_rt); + + if (!reg/* || reg->modified == 0*/) // See special case regarding branch with direct comparison with 0 + { + break_reduced_loop_pattern(1, reduced_loop->discard()); + break; + } + + bool should_have_argument_dictator = false; + bool cond_val_incr_before_cond = false; + bool ends_with_comparison = false; + + bool pattern_ok1 = true; + + switch (reg->mod1_type) + { + case spu_itype::A: + case spu_itype::AI: + case spu_itype::AHI: + { + cond_val_incr_before_cond = true; + pattern_ok1 = true; + break; + } + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + ends_with_comparison = true; + should_have_argument_dictator = true; + break; + } + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + ends_with_comparison = true; + pattern_ok1 = true; + break; + } + default: + { + if (reg->modified == 0) + { + // Special case: target may be sourced from another register which would be the loop dictator + break; + } + + pattern_ok1 = false; + break; + } + } + + if (!pattern_ok1) + { + break_reduced_loop_pattern(9, reduced_loop->discard()); + break; + } + + if (reg->modified >= 2) + { + switch (reg->mod2_type) + { + case spu_itype::A: + case spu_itype::AI: + case spu_itype::AHI: + { + if (cond_val_incr_before_cond) + { + // AI twice + break_reduced_loop_pattern(8, reduced_loop->discard()); + pattern_ok1 = false; + break; + } + + cond_val_incr_before_cond = false; + pattern_ok1 = true; + break; + } + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + if (!cond_val_incr_before_cond) + { + // Double comparison + break_reduced_loop_pattern(19, reduced_loop->discard()); + pattern_ok1 = false; + break; + } + + pattern_ok1 = true; + ends_with_comparison = true; + should_have_argument_dictator = true; + break; + } + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + if (!cond_val_incr_before_cond) + { + // Double comparison + break_reduced_loop_pattern(19, reduced_loop->discard()); + pattern_ok1 = false; + break; + } + + ends_with_comparison = true; + pattern_ok1 = true; + break; + } + default: + { + pattern_ok1 = false; + break; + } + } + } + + if (!pattern_ok1) + { + break_reduced_loop_pattern(10, reduced_loop->discard()); + break; + } + + bool found_loop_dictator = false; + bool found_loop_argument_for_dictator = false; + u32 null_regs_found = 0; + + for (u32 i = 0; i < reg->regs.size() && reduced_loop->active; i++) + { + if (::at32(reg->regs, i)) + { + if (0) if (i == op_rt || reg->modified == 0) + { + // Special case: direct comparison with zero for 32-bits (the only supported form by SPU) + + if (is_jump_zero) + { + // Infinite or single-time "loop" + break_reduced_loop_pattern(3, reduced_loop->discard()); + break; + } + + if (reg->modified >= 2) + { + break_reduced_loop_pattern(22, reduced_loop->discard()); + break; + } + + reduced_loop->cond_val_mask = u32{umax}; + reduced_loop->cond_val_min = 0; + reduced_loop->cond_val_size = u32{umax}; + + auto comp_reg = i == op_rt ? reg : reduced_loop->find_reg(i); + + if (!comp_reg || !comp_reg->is_predictable_loop_dictator(i)) + { + break_reduced_loop_pattern(4, reduced_loop->discard()); + break; + } + + ensure(reg->modified == 1 || i != op_rt); + + reduced_loop->cond_val_incr = static_cast(comp_reg->IMM); + reduced_loop->cond_val_incr_before_cond = reg->modified == 1; + reduced_loop->cond_val_register_idx = i; + reduced_loop->cond_val_compare = CMP_NOT_EQUAL; + reduced_loop->cond_val_is_immediate = true; + + found_loop_dictator = true; + break; + } + + auto reg_org = reduced_loop->find_reg(i); + u32 reg_index = i; + + if (reg_org && !cond_val_incr_before_cond && reg_org->modified == 0 && reg_org->regs.count() - 1u <= 1u && !::at32(reg_org->regs, i)) + { + for (u32 j = 0; j <= s_reg_127; j++) + { + if (::at32(reg_org->regs, j)) + { + if (const auto reg_found = reduced_loop->find_reg(j)) + { + if (reg_found->modified) + { + reg_org = reg_found; + reg_index = j; + break; + } + } + } + } + } + + if (!reg_org || reg_org->is_null(reg_index)) + { + // if (found_loop_dictator && !reduced_loop->cond_val_incr_is_immediate) + // { + // ensure(reduced_loop->cond_val_incr < s_reg_max); + + // } + // if (!should_have_argument_dictator) + // { + // break_reduced_loop_pattern(11, reduced_loop->discard()); + // break; + // } + + // if (found_loop_argument_for_dictator) + // { + // break_reduced_loop_pattern(6, reduced_loop->discard()); + // break; + // } + + // found_loop_argument_for_dictator = true; + // reduced_loop->cond_val_is_immediate = false; + + // if (found_loop_dictator) + // { + // ensure(i == reduced_loop->cond_val_register_argument_idx); + // } + // else + // { + // reduced_loop->cond_val_register_argument_idx = i; + // } + + // if (found_loop_dictator && reg->regs.count() == 2) + // { + // break; + // } + + null_regs_found++; + continue; + } + + if (found_loop_dictator) + { + break_reduced_loop_pattern(13, reduced_loop->discard()); + break; + } + + found_loop_dictator = true; + + if (!reg_org->is_predictable_loop_dictator(i)) + { + break_reduced_loop_pattern(7, reduced_loop->discard()); + break; + } + + if (reg_index != i && ::at32(reg->regs, reg_index)) + { + // Unimplemented + break_reduced_loop_pattern(30, reduced_loop->discard()); + break; + } + + if (reg_org->mod1_type == spu_itype::AI || reg_org->mod1_type == spu_itype::AHI) + { + reduced_loop->cond_val_incr_is_immediate = true; + reduced_loop->cond_val_incr = static_cast(reg_org->IMM); + } + else if (reg_org->mod1_type == spu_itype::A) + { + reduced_loop->cond_val_incr_is_immediate = false; + + const u32 op_ra = spu_opcode_t{reg_org->IMM}.ra; + const u32 op_rb = spu_opcode_t{reg_org->IMM}.rb; + + if (!(op_ra == reg_index || op_rb == reg_index)) + { + break_reduced_loop_pattern(25, reduced_loop->discard()); + break; + } + + const u32 incr_arg_reg = reg_index == op_ra ? op_rb : op_ra; + + if (!reduced_loop->is_reg_null(incr_arg_reg)) + { + break_reduced_loop_pattern(26, reduced_loop->discard()); + break; + } + + reduced_loop->cond_val_incr = incr_arg_reg; + } + else + { + break_reduced_loop_pattern(28, reduced_loop->discard()); + break; + } + + reduced_loop->cond_val_incr_before_cond = cond_val_incr_before_cond; + + u64 cmp_mask = 0; + compare_direction cmp_direction{}; + + if (!ends_with_comparison) + { + if (is_jump_zero) + { + // Infinite or single-time "loop" + break_reduced_loop_pattern(3, reduced_loop->discard()); + break; + } + + cmp_mask = is_u16_jump ? u16{umax} : u32{umax}; + reduced_loop->cond_val_min = 0; + reduced_loop->cond_val_is_immediate = true; + cmp_direction = CMP_NOT_EQUAL; + } + else if (!should_have_argument_dictator) + { + reduced_loop->cond_val_min = reg->IMM; + reduced_loop->cond_val_is_immediate = true; + + const auto cmp_optype = reg->reverse1_type() == spu_itype::XSBH ? reg->reverse2_type() : reg->reverse1_type(); + + switch (cmp_optype) + { + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + { + cmp_direction = CMP_EQUAL; + break; + } + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + { + cmp_direction = CMP_SGREATER; + break; + } + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + cmp_direction = CMP_LGREATER; + break; + } + default: + { + break_reduced_loop_pattern(21, reduced_loop->discard()); + } + } + + switch (cmp_optype) + { + case spu_itype::CEQI: + case spu_itype::CGTI: + case spu_itype::CLGTI: + { + cmp_mask = u32{umax}; + break; + } + case spu_itype::CLGTHI: + case spu_itype::CEQHI: + case spu_itype::CGTHI: + { + cmp_mask = u16{umax}; + break; + } + case spu_itype::CEQBI: + case spu_itype::CGTBI: + case spu_itype::CLGTBI: + { + cmp_mask = u8{umax}; + break; + } + default: break_reduced_loop_pattern(21, reduced_loop->discard()); + } + + if (is_jump_zero) + { + cmp_direction = compare_direction{cmp_direction ^ CMP_NEGATE_FLAG}; + } + + if (cmp_direction == CMP_EQUAL2 || cmp_direction == CMP_NOT_EQUAL2) + { + // Fixup (no sense in remembering the turnaround for euqality comparison) + cmp_direction = compare_direction{cmp_direction & ~CMP_TURNAROUND_FLAG}; + } + } + else + { + const u32 op_ra = spu_opcode_t{reg->IMM}.ra; + const u32 op_rb = spu_opcode_t{reg->IMM}.rb; + + if (!(op_ra == reg_index || op_rb == reg_index)) + { + break_reduced_loop_pattern(20, reduced_loop->discard()); + break; + } + + const auto cmp_optype = reg->reverse1_type() == spu_itype::XSBH ? reg->reverse2_type() : reg->reverse1_type(); + + switch (cmp_optype) + { + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + { + cmp_direction = CMP_EQUAL; + break; + } + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + { + cmp_direction = CMP_SGREATER; + break; + } + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + cmp_direction = CMP_LGREATER; + break; + } + default: ensure(false); + } + + switch (cmp_optype) + { + case spu_itype::CEQ: + case spu_itype::CGT: + case spu_itype::CLGT: + { + cmp_mask = u32{umax}; + break; + } + case spu_itype::CLGTH: + case spu_itype::CEQH: + case spu_itype::CGTH: + { + cmp_mask = u16{umax}; + break; + } + case spu_itype::CEQB: + case spu_itype::CGTB: + case spu_itype::CLGTB: + { + cmp_mask = u8{umax}; + break; + } + default: ensure(false); + } + + if (op_ra != i) + { + // Compare is on the oppsoite direction + // This variation exists only via register mode (due to lack of SPU opcodes) + cmp_direction = compare_direction{cmp_direction ^ CMP_TURNAROUND_FLAG}; + } + + if (is_jump_zero) + { + cmp_direction = compare_direction{cmp_direction ^ CMP_NEGATE_FLAG}; + } + + if (cmp_direction == CMP_EQUAL2 || cmp_direction == CMP_NOT_EQUAL2) + { + // Fixup (no sense in remembering the turnaround for euqality comparison) + cmp_direction = compare_direction{cmp_direction & ~CMP_TURNAROUND_FLAG}; + } + + // The loop dictator is the register that is not the argument + const u32 loop_arg_reg = reg_index == op_ra ? op_rb : op_ra; + reduced_loop->cond_val_is_immediate = false; + + if (found_loop_argument_for_dictator) + { + ensure(loop_arg_reg == reduced_loop->cond_val_register_argument_idx); + } + else + { + reduced_loop->cond_val_register_argument_idx = loop_arg_reg; + } + + if (!reduced_loop->is_reg_null(loop_arg_reg)) + { + break_reduced_loop_pattern(27, reduced_loop->discard()); + break; + } + + found_loop_argument_for_dictator = true; + } + + if (cmp_direction == CMP_EQUAL) + { + // Infinite or single-time "loop" + break_reduced_loop_pattern(18, reduced_loop->discard()); + break; + } + + if (cmp_mask == u16{umax} && !is_u16_jump) + { + break_reduced_loop_pattern(14, reduced_loop->discard()); + break; + } + + if (cmp_mask == u8{umax}) + { + bool instructions_ok = false; + + if (is_u16_jump) + { + // If ANDI(0xff) is used, although unlikely, it fine as well for 16-bits + instructions_ok = FN(x == spu_itype::XSBH || x == spu_itype::ANDI)(!cond_val_incr_before_cond ? reg->mod2_type : reg->mod3_type); + } + else + { + instructions_ok = FN(x == spu_itype::ANDI)(!cond_val_incr_before_cond ? reg->mod2_type : reg->mod3_type); + } + + if (!instructions_ok) + { + break_reduced_loop_pattern(15, reduced_loop->discard()); + break; + } + } + + reduced_loop->cond_val_compare = cmp_direction; + reduced_loop->cond_val_mask = cmp_mask; + reduced_loop->cond_val_register_idx = reg_index; + + // if (!should_have_argument_dictator && reg->regs.count() == 1) + // { + // break; + // } + + // if (found_loop_argument_for_dictator && reg->regs.count() == 2) + // { + // break; + // } + } + } + + if (!found_loop_dictator) + { + break_reduced_loop_pattern(16, reduced_loop->discard()); + } + + if (should_have_argument_dictator && !found_loop_argument_for_dictator) + { + break_reduced_loop_pattern(17, reduced_loop->discard()); + } + + if (reduced_loop->active) + { + ensure(reduced_loop->cond_val_register_idx != umax); + + if (reduced_loop->is_two_block_loop) + { + reduced_loop->has_cond_state = true; + break; + } + + for (const auto& [reg_num, reg] : reduced_loop->regs) + { + if (reg.is_loop_dictator(reg_num)) + { + if (reg.is_non_predictable_loop_dictator(reg_num)) + { + //break_reduced_loop_pattern(13, reduced_loop->discard()); + reduced_loop->is_constant_expression = false; + } + + reduced_loop->loop_dicts.set(reg_num); + } + } + + std::array reg_use{}; + std::bitset reg_maybe_float{}; + std::bitset reg_mod{}; + + for (auto it = m_bbs.find(reduced_loop->loop_pc); it != m_bbs.end() && it->first <= bpc; it++) + { + for (u32 i = 0; i < s_reg_max; i++) + { + if (!reg_mod[i]) + { + reg_use[i] += it->second.reg_use[i]; + } + } + + reg_maybe_float |= it->second.reg_maybe_float; + reg_mod |= it->second.reg_mod; + + // Note: update when sup_conds are implemented + if (it->first == bpc && it->first != reduced_loop->loop_pc) + { + reduced_loop->loop_may_update |= it->second.reg_mod; + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + if (!::at32(reduced_loop->loop_dicts, i)) + { + if (reg_use[i] && reg_mod[i]) + { + reduced_loop->is_constant_expression = false; + reduced_loop->loop_writes.set(i); + reduced_loop->loop_may_update.reset(i); + } + else if (reg_use[i]) + { + reduced_loop->loop_args.set(i); + + if (reg_use[i] >= 3 && reg_maybe_float[i]) + { + reduced_loop->gpr_not_nans.set(i); + } + } + } + else + { + // Cleanup + reduced_loop->loop_may_update.reset(i); + } + } + + reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop); + reduced_loop->discard(); + } + + break; + } + break; } @@ -6026,17 +7136,49 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s case spu_itype::HLGTI: case spu_itype::LNOP: case spu_itype::NOP: - case spu_itype::MTSPR: case spu_itype::FSCRWR: { // Do nothing break; } - + + case spu_itype::MTSPR: + { + break_all_patterns(99); + break; + } + case spu_itype::WRCH: { break_channel_pattern(56, rchcnt_loop->discard()); + if (reduced_loop->active) + { + switch (op.ra) + { + case MFC_EAL: + case MFC_LSA: + case MFC_TagID: + case MFC_Size: + case MFC_EAH: + case SPU_WrDec: + case SPU_WrSRR0: + case SPU_WrEventAck: + case SPU_Set_Bkmk_Tag: + case SPU_PM_Start_Ev: + case SPU_PM_Stop_Ev: + case MFC_WrTagMask: + { + break; + } + default: + { + break_reduced_loop_pattern(18, reduced_loop->discard()); + break; + } + } + } + switch (op.ra) { case MFC_EAL: @@ -6299,6 +7441,14 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const bool is_read = type == spu_itype::RDCH; bool invalidate = true; + if (!is_read || op.ra != SPU_RdDec) + { + if (reduced_loop->active) + { + break_reduced_loop_pattern(17, reduced_loop->discard()); + } + } + const auto it = rchcnt_loop_all.find(pos); if (it != rchcnt_loop_all.end()) @@ -7208,17 +8358,17 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 ra = s_reg_max, rb = s_reg_max, rc = s_reg_max; - if (m_use_ra.test(pos / 4)) + if (::at32(m_use_ra, pos / 4)) { ra = op.ra; } - if (m_use_rb.test(pos / 4)) + if (::at32(m_use_rb, pos / 4)) { rb = op.rb; } - if (type & spu_itype::_quadrop && m_use_rc.test(pos / 4)) + if (::at32(m_use_rc, pos / 4)) { rc = op.rc; } @@ -7266,6 +8416,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { for (u32 next_target : ::at32(m_targets, pos)) { + if (next_target == SPU_LS_SIZE) + { + continue; + } + add_block(next_target); } @@ -7450,6 +8605,74 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } + for (const auto& [loop_pc, pattern] : reduced_loop_all) + { + if (!pattern.active || pattern.loop_pc == SPU_LS_SIZE) + { + continue; + } + + if (inst_attr attr = m_inst_attrs[(loop_pc - entry_point) / 4]; attr == inst_attr::none) + { + add_pattern(inst_attr::reduced_loop, loop_pc - result.entry_point, 0, std::make_shared(pattern)); + + std::string regs = "{"; + + for (const auto& [reg_num, reg] : pattern.regs) + { + if (reg.is_loop_dictator(reg_num)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u", reg_num); + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + if (::at32(pattern.loop_writes, i)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u-w", i); + } + + if (::at32(pattern.loop_args, i)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u-r", i); + } + + if (::at32(pattern.loop_may_update, i)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u-m", i); + } + } + + regs += " }"; + + spu_log.success("Reduced Loop Pattern Detected! (REGS: %s, DICT: r%d, ARG: %s, Incr: %s (%s), CMP/Size: %s/%u, loop_pc=0x%x, 0x%x-%s)", regs, pattern.cond_val_register_idx + , pattern.cond_val_is_immediate ? fmt::format("0x%x", pattern.cond_val_min) : fmt::format("r%d", pattern.cond_val_register_argument_idx) + , pattern.cond_val_incr_is_immediate ? fmt::format("%d", static_cast(pattern.cond_val_incr)) : fmt::format("r%d", pattern.cond_val_incr), pattern.cond_val_incr_before_cond ? "BEFORE" : "AFTER" + , pattern.cond_val_compare, std::popcount(pattern.cond_val_mask), loop_pc, entry_point, func_hash); + } + } + if (likely_putllc_loop && !had_putllc_evaluation) { spu_log.notice("Likely missed PUTLLC16 patterns. (entry=0x%x)", entry_point); @@ -7460,7 +8683,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback } - if (!m_patterns.empty()) + if (!m_patterns.empty() && g_cfg.core.spu_debug) { std::string out_dump; dump(result, out_dump); @@ -7483,11 +8706,12 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s return result; } -void spu_recompiler_base::dump(const spu_program& result, std::string& out) +void spu_recompiler_base::dump(const spu_program& result, std::string& out, u32 block_min, u32 block_max) { SPUDisAsm dis_asm(cpu_disasm_mode::dump, reinterpret_cast(result.data.data()), result.lower_bound); std::string hash; + be_t hash_start{}; if (!result.data.empty()) { @@ -7498,19 +8722,28 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out) sha1_update(&ctx, reinterpret_cast(result.data.data()), result.data.size() * 4); sha1_finish(&ctx, output); fmt::append(hash, "%s", fmt::base57(output)); + std::memcpy(&hash_start, output, sizeof(hash_start)); } else { hash = "N/A"; } - fmt::append(out, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", result.entry_point, result.data.size(), hash); + if (block_min == 0) + { + fmt::append(out, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", result.entry_point, result.data.size(), hash); + } for (auto& bb : m_bbs) { + if (bb.first < block_min || bb.first >= block_max) + { + continue; + } + if (m_block_info[bb.first / 4]) { - fmt::append(out, "A: [0x%05x] %s\n", bb.first, m_entry_info[bb.first / 4] ? (m_ret_info[bb.first / 4] ? "Chunk" : "Entry") : "Block"); + fmt::append(out, "A: [0x%05x] %s [%s]\n", bb.first, m_entry_info[bb.first / 4] ? (m_ret_info[bb.first / 4] ? "Chunk" : "Entry") : "Block", spu_block_hash{(hash_start & -65536) + bb.first / 4}); fmt::append(out, "\t F: 0x%05x\n", bb.second.func); @@ -8530,9 +9763,9 @@ std::array& block_reg_info::evaluate_start_state(const s return walkby_state; } -void spu_recompiler_base::add_pattern(inst_attr attr, u32 start, u64 info) +void spu_recompiler_base::add_pattern(inst_attr attr, u32 start, u64 info, std::shared_ptr info_ptr) { - m_patterns[start] = pattern_info{info}; + m_patterns[start] = pattern_info{info, info_ptr}; m_inst_attrs[start / 4] = attr; } diff --git a/rpcs3/Emu/Cell/SPUDisAsm.h b/rpcs3/Emu/Cell/SPUDisAsm.h index 0d5862025b..5b1f097393 100644 --- a/rpcs3/Emu/Cell/SPUDisAsm.h +++ b/rpcs3/Emu/Cell/SPUDisAsm.h @@ -903,8 +903,14 @@ public: if (auto [is_const, value] = try_get_const_equal_value_array(+op.ra); is_const) { + if (value % 0x200 != 0) + { + // si10 is overwritten - likely an analysis mistake + return; + } + // Comment constant formation - comment_constant(last_opcode, value | static_cast(op.si10)); + comment_constant(last_opcode, value | static_cast(op.si10), false); } } void ORHI(spu_opcode_t op) @@ -941,8 +947,14 @@ public: if (auto [is_const, value] = try_get_const_equal_value_array(op.ra); is_const) { + if (value % 0x200 != 0) + { + // si10 is overwritten - likely an analysis mistake + return; + } + // Comment constant formation - comment_constant(last_opcode, value + static_cast(op.si10)); + comment_constant(last_opcode, value + static_cast(op.si10), false); } } void AHI(spu_opcode_t op) @@ -963,8 +975,14 @@ public: if (auto [is_const, value] = try_get_const_equal_value_array(op.ra); is_const) { + if (value % 0x200 != 0) + { + // si10 is overwritten - likely an analysis mistake + return; + } + // Comment constant formation - comment_constant(last_opcode, value ^ static_cast(op.si10)); + comment_constant(last_opcode, value ^ static_cast(op.si10), false); } } void XORHI(spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index eb44289320..b6d0791ab9 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -132,6 +132,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator llvm::MDNode* m_md_unlikely; llvm::MDNode* m_md_likely; + llvm::MDNode* m_md_spu_memory_domain; + llvm::MDNode* m_md_spu_context_domain; struct block_info { @@ -139,7 +141,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator spu_recompiler_base::block_info* bb{}; // Current block's entry block - llvm::BasicBlock* block; + llvm::BasicBlock* block{}; // Final block (for PHI nodes, set after completion) llvm::BasicBlock* block_end{}; @@ -150,11 +152,15 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Current register values std::array reg{}; + // Opimization: restoring register state for registers that would be rewritten in other blocks + std::array reg_save_and_restore{}; + // PHI nodes created for this block (if any) std::array phi{}; // Store instructions std::array store{}; + bool block_wide_reg_store_elimination = false; // Store reordering/elimination protection std::array store_context_last_id = fill_array(0); // Protects against illegal forward ordering @@ -189,10 +195,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator }; // Current block - block_info* m_block; + block_info* m_block = nullptr; // Current function or chunk - function_info* m_finfo; + function_info* m_finfo = nullptr; + + // Reduced Loop Pattern information (if available) + reduced_loop_t* m_reduced_loop_info = nullptr; // All blocks in the current function chunk std::unordered_map> m_blocks; @@ -364,7 +373,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { if (i != s_reg_lr && i != s_reg_sp && (i < s_reg_80 || i > s_reg_127)) { - m_block->reg[i] = m_ir->CreateLoad(get_reg_type(i), init_reg_fixed(i)); + m_block->reg[i] = get_reg_fixed(i, get_reg_type(i)); } } @@ -549,6 +558,40 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return _ptr(m_thread, ::offset32(offset_args...)); } + template + T* spu_mem_attr(T* inst) + { + if (auto load_inst = llvm::dyn_cast(inst)) + { + load_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_context_domain); + load_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_memory_domain); + } + else if (auto store_inst = llvm::dyn_cast(inst)) + { + store_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_context_domain); + store_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_memory_domain); + } + + return inst; + } + + template + T* spu_context_attr(T* inst) + { + if (auto load_inst = llvm::dyn_cast(inst)) + { + load_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_context_domain); + load_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_memory_domain); + } + else if (auto store_inst = llvm::dyn_cast(inst)) + { + store_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_context_domain); + store_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_memory_domain); + } + + return inst; + } + // Return default register type llvm::Type* get_reg_type(u32 index) { @@ -709,8 +752,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (!reg) { + if (m_block && m_block->block_wide_reg_store_elimination) + { + fmt::throw_exception("Unexpected load: [%s] at 0x%x (gpr=r%d)", m_hash, m_pos, index); + } + // Load register value if necessary reg = m_finfo && m_finfo->load[index] ? m_finfo->load[index] : m_ir->CreateLoad(get_reg_type(index), init_reg_fixed(index)); + spu_context_attr(reg); } if (reg->getType() == get_type()) @@ -920,6 +969,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (m_block) { + if (m_block->block_wide_reg_store_elimination) + { + // Don't save registers for the current block iteration + // Affected optimizations: + // 1. Single-block reduced loop + return; + } + // Keep the store's location in history of gpr preservaions m_block->store_context_last_id[index] = m_block->store_context_ctr[index]; m_block->store_context_first_id[index] = std::min(m_block->store_context_first_id[index], m_block->store_context_ctr[index]); @@ -935,7 +992,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } // Write register to the context - _store = m_ir->CreateStore(is_xfloat ? double_to_xfloat(saved_value) : m_ir->CreateBitCast(value, get_reg_type(index)), addr); + _store = m_ir->CreateStore(is_xfloat ? double_to_xfloat(saved_value) : bitcast(value, get_reg_type(index)), addr); + + spu_context_attr(_store); } template @@ -1046,7 +1105,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Update PC for current or explicitly specified instruction address void update_pc(u32 target = -1) { - m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr(&spu_thread::pc))->setVolatile(true); + spu_context_attr(m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr(&spu_thread::pc)))->setVolatile(true); } // Call cpu_thread::check_state if necessary and return or continue (full check) @@ -1055,7 +1114,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto pstate = spu_ptr(&spu_thread::state); const auto _body = llvm::BasicBlock::Create(m_context, "", m_function); const auto check = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_ir->CreateLoad(get_type(), pstate, true), m_ir->getInt32(0)), _body, check, m_md_likely); + m_ir->CreateCondBr(m_ir->CreateICmpEQ(spu_context_attr(m_ir->CreateLoad(get_type(), pstate, true)), m_ir->getInt32(0)), _body, check, m_md_likely); m_ir->SetInsertPoint(check); update_pc(addr); @@ -1066,14 +1125,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (may_be_unsafe_for_savestate) { - m_ir->CreateStore(m_ir->getInt8(1), spu_ptr(&spu_thread::unsavable))->setVolatile(true); + spu_context_attr(m_ir->CreateStore(m_ir->getInt8(1), spu_ptr(&spu_thread::unsavable)))->setVolatile(true); } m_ir->CreateCall(m_test_state, {m_thread}); if (may_be_unsafe_for_savestate) { - m_ir->CreateStore(m_ir->getInt8(0), spu_ptr(&spu_thread::unsavable))->setVolatile(true); + spu_context_attr(m_ir->CreateStore(m_ir->getInt8(0), spu_ptr(&spu_thread::unsavable)))->setVolatile(true); } m_ir->CreateBr(_body); @@ -1509,6 +1568,16 @@ public: m_md_likely = llvm::MDTuple::get(m_context, {md_name, md_high, md_low}); m_md_unlikely = llvm::MDTuple::get(m_context, {md_name, md_low, md_high}); + const auto domain = llvm::MDNode::getDistinct(m_context, {llvm::MDString::get(m_context, "SPU_mem")}); + const auto scope = llvm::MDNode::get(m_context, {llvm::MDString::get(m_context, "SPU_mem_scope"), domain}); + + m_md_spu_memory_domain = llvm::MDNode::get(m_context, scope); + + const auto domain2 = llvm::MDNode::getDistinct(m_context, {llvm::MDString::get(m_context, "SPU_ctx")}); + const auto scope2 = llvm::MDNode::get(m_context, {llvm::MDString::get(m_context, "SPU_ctx_scope"), domain2}); + + m_md_spu_context_domain = llvm::MDNode::get(m_context, scope2); + // Initialize transform passes clear_transforms(); #ifdef ARCH_ARM64 @@ -1678,7 +1747,7 @@ public: // Emit state check const auto pstate = spu_ptr(&spu_thread::state); - m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type(), pstate), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely); + m_ir->CreateCondBr(m_ir->CreateICmpNE(spu_context_attr(m_ir->CreateLoad(get_type(), pstate)), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely); // Emit code check u32 check_iterations = 0; @@ -2059,6 +2128,43 @@ public: bool need_check = false; m_block->bb = &bb; + // [1gJ45f2-0x00a40]: 16.4982% (113258) + // [ZsQTud1-0x0924c]: 6.1202% (42014) + // [ZsQTud1-0x08e54]: 5.6610% (38862) + // [0000000-0x3fffc]: 4.3764% (30043) + // [Zh4tpJM-0x00bcc]: 3.7908% (26023) + // [CFt8hXu-0x063b8]: 3.6177% (24835) + // [8YJCUjv-0x0ad18]: 3.2417% (22254) + // [Try3XHn-0x0f018]: 2.3721% (16284) + // [s6ti9iu-0x07678]: 1.8464% (12675) + // [oyxkAPv-0x0c22c]: 1.7776% (12203) + // [Q0jLqH4-0x00324]: 1.6015% (10994) + static const std::array, 4> to_nop + { + { } + }; + + bool found_block = false; + + for (auto& [hash, pos] : to_nop) + { + if (m_hash.find(hash) <= 2 && baddr == pos) + { + found_block = true; + break; + } + } + + if (found_block) + { + for (u32 i = 0; i < 100; i++) + { + auto value = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::last_getllar_lsa)); + auto mod_val = m_ir->CreateFDiv(value, llvm::ConstantFP::get(value->getType(), 1.1 + i)); + m_ir->CreateStore(value, spu_ptr(&spu_thread::last_getllar_lsa)); + } + } + if (!bb.preds.empty()) { // Initialize registers and build PHI nodes if necessary @@ -2160,12 +2266,540 @@ public: } } + if (bb.preds.size() >= 2) + { + if (g_cfg.core.spu_prof || g_cfg.core.spu_debug) + { + m_ir->CreateStore(m_ir->getInt64((m_hash_start & -65536) | (baddr >> 2)), spu_ptr(&spu_thread::block_hash)); + } + } + // State check at the beginning of the chunk if (need_check || (bi == 0 && g_cfg.core.spu_block_size != spu_block_size_type::safe)) { check_state(baddr); } + const bool is_reduced_loop = m_inst_attrs[(baddr - start) / 4] == inst_attr::reduced_loop; + m_reduced_loop_info = is_reduced_loop ? std::static_pointer_cast(ensure(m_patterns.at(baddr - start).info_ptr)).get() : nullptr; + + BasicBlock* block_optimization_phi_parent = nullptr; + const auto block_optimization_inner = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b-loop-it-0x%x", m_pos), m_function) : nullptr; + const auto block_optimization_exit_early = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b-loop-exit-0x%x", m_pos), m_function) : nullptr; + const auto block_optimization_next = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b2-0x%x", m_pos), m_function) : nullptr; + + std::array reduced_loop_phi_nodes{}; + std::array reduced_loop_init_regs{}; + + // Reserve additional iteration for rare case where GPR may not be rewritten after the iteration + // So that it would have to be rewritten by future code + // This avoids using additional PHI connectors + const u32 reserve_iterations = m_reduced_loop_info && m_reduced_loop_info->loop_may_update.count() != 0 ? 3 : 2; + + for (u32 i = 0; i < s_reg_max; i++) + { + if (m_reduced_loop_info && m_reduced_loop_info->loop_may_update.test(i)) + { + m_block->reg_save_and_restore[i] = m_block->reg[i]; + } + } + + auto make_reduced_loop_condition = [&](llvm::BasicBlock* optimization_block, bool is_second_time) + { + llvm::ICmpInst::Predicate compare{}; + + switch (m_reduced_loop_info->cond_val_compare) + { + case CMP_SLESS: compare = ICmpInst::ICMP_SLT; break; + case CMP_SGREATER: compare = ICmpInst::ICMP_SGT; break; + case CMP_EQUAL: compare = ICmpInst::ICMP_EQ; break; + case CMP_LLESS: compare = ICmpInst::ICMP_ULT; break; + case CMP_LGREATER: compare = ICmpInst::ICMP_UGT; break; + case CMP_SGREATER_EQUAL: compare = ICmpInst::ICMP_SGE; break; + case CMP_SLOWER_EQUAL: compare = ICmpInst::ICMP_SLE; break; + case CMP_NOT_EQUAL: compare = ICmpInst::ICMP_NE; break; + case CMP_LGREATER_EQUAL: compare = ICmpInst::ICMP_UGE; break; + case CMP_LLOWER_EQUAL: compare = ICmpInst::ICMP_ULE; break; + { + break; + } + case CMP_UNKNOWN: + case CMP_NOT_EQUAL2: + case CMP_EQUAL2: + default: + { + ensure(false); + break; + } + } + + llvm::Value* loop_dictator_before_adjustment{}; + llvm::Value* loop_dictator_after_adjustment{}; + + spu_opcode_t reg_target{}; + reg_target.rt = static_cast(m_reduced_loop_info->cond_val_register_idx); + + if (reg_target.rt != m_reduced_loop_info->cond_val_register_idx) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition register index: 0x%llx", m_reduced_loop_info->cond_val_register_idx); + } + + if (!m_block->reg[reg_target.rt]) + { + m_block->reg[reg_target.rt] = reduced_loop_init_regs[reg_target.rt]; + } + + switch (m_reduced_loop_info->cond_val_mask) + { + case u8{umax}: + { + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + case u16{umax}: + { + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + case u32{umax}: + { + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + case u64{umax}: + { + ensure(false); // TODO + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + default: + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition bit mask: 0x%llx", m_reduced_loop_info->cond_val_mask); + } + } + + const u32 type_bits = std::popcount(m_reduced_loop_info->cond_val_mask); + + llvm::Value* cond_val_incr = nullptr; + + if (m_reduced_loop_info->cond_val_incr_is_immediate) + { + cond_val_incr = m_ir->getIntN(type_bits, m_reduced_loop_info->cond_val_incr & m_reduced_loop_info->cond_val_mask); + } + else + { + spu_opcode_t reg_incr{}; + reg_incr.rt = static_cast(m_reduced_loop_info->cond_val_incr); + + if (reg_incr.rt != m_reduced_loop_info->cond_val_incr) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal increment arguemnt register index: 0x%llx", m_reduced_loop_info->cond_val_incr); + } + switch (m_reduced_loop_info->cond_val_mask) + { + case u8{umax}: + { + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + case u16{umax}: + { + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + case u32{umax}: + { + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + case u64{umax}: + { + ensure(false); // TODO + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + } + } + + if (m_reduced_loop_info->cond_val_incr_before_cond && !m_reduced_loop_info->cond_val_incr_before_cond_taken_in_account) + { + loop_dictator_after_adjustment = m_ir->CreateAdd(loop_dictator_before_adjustment, cond_val_incr); + } + else + { + loop_dictator_after_adjustment = loop_dictator_before_adjustment; + } + + llvm::Value* loop_argument = nullptr; + + if (m_reduced_loop_info->cond_val_is_immediate) + { + loop_argument = m_ir->CreateTrunc(m_ir->getInt64(m_reduced_loop_info->cond_val_min & m_reduced_loop_info->cond_val_mask), loop_dictator_before_adjustment->getType()); + } + else + { + spu_opcode_t reg_target2{}; + reg_target2.rt = static_cast(m_reduced_loop_info->cond_val_register_argument_idx); + + if (reg_target2.rt != m_reduced_loop_info->cond_val_register_argument_idx) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition arguemnt register index: 0x%llx", m_reduced_loop_info->cond_val_register_argument_idx); + } + + switch (m_reduced_loop_info->cond_val_mask) + { + case u8{umax}: + { + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + case u16{umax}: + { + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + case u32{umax}: + { + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + case u64{umax}: + { + ensure(false); // TODO + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + } + } + + llvm::Value* condition = nullptr; + + if (reserve_iterations == 1) + { + condition = m_ir->CreateICmp(compare, loop_dictator_after_adjustment, loop_argument); + } + // else if ((m_reduced_loop_info->cond_val_compare == CMP_LGREATER || (m_reduced_loop_info->cond_val_compare == CMP_LGREATER_EQUAL && m_reduced_loop_info->cond_val_is_immediate && m_reduced_loop_info->cond_val_incr)) && cond_val_incr->getSExtValue() < 0) + // { + // const auto cond_val_incr_multiplied = m_ir->CreateMul(cond_val_incr, reserve_iterations - 1); + // condition = m_ir->CreateICmp(compare, select(m_ir->CreateICmpUGE(cond_val_incr_multiplied, loop_dictator_after_adjustment), m_ir->CreateAdd(loop_dictator_after_adjustment, cond_val_incr_multiplied), m_ir->getIntN(type_bits, 0)), loop_argument); + // } + else + { + //debugtrap(); + + llvm::Value* prev_it = loop_dictator_after_adjustment; + + for (u32 i = 0; i < reserve_iterations; i++) + { + if (i) + { + prev_it = m_ir->CreateAdd(prev_it, cond_val_incr); + } + + const auto also_cond = m_ir->CreateICmp(compare, prev_it, loop_argument); + condition = condition ? m_ir->CreateAnd(condition, also_cond) : also_cond; + } + } + + if (!is_second_time) + { + for (u32 i = 0, count = 0, prev_i = umax;; i++) + { + const bool is_last = !(count <= 20 && i < s_reg_max); + + if (is_last || m_reduced_loop_info->is_gpr_not_NaN_hint(i)) + { + count++; + + if (prev_i == umax) + { + if (!is_last) + { + prev_i = i; + continue; + } + + break; + } + + auto access_gpr = [&](u32 index) + { + spu_opcode_t op_arg{}; + op_arg.ra = index; + return get_vr(op_arg.ra); + }; + + // OR LSB to convert infinity to NaN + llvm::Value* arg1 = bitcast(access_gpr(prev_i) | splat(1)).eval(m_ir); + llvm::Value* arg2 = is_last ? arg1 : bitcast(access_gpr(i) | splat(1)).eval(m_ir); + + llvm::Value* acc = m_ir->CreateSExt(m_ir->CreateFCmpUNO(arg1, arg2), get_type()); + + // Pattern for PTEST + acc = m_ir->CreateBitCast(acc, get_type()); + + llvm::Value* elem = m_ir->CreateExtractElement(acc, u64{0}); + + for (u64 i = 1; i < 2; i++) + { + elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, i)); + } + + // Compare result with zero + const auto cond_nans = m_ir->CreateICmpEQ(elem, m_ir->getInt64(0)); + condition = m_ir->CreateAnd(cond_nans, condition); + prev_i = umax; + } + + if (is_last) + { + break; + } + } + + // TODO: Optimze so constant evalatuated cases will not be checked + const bool is_cond_need_runtime_verify = compare == ICmpInst::ICMP_NE && (!m_reduced_loop_info->cond_val_is_immediate || m_reduced_loop_info->cond_val_incr % 2 == 0); + + if (is_cond_need_runtime_verify) + { + // Verify that it is actually possible to finish the loop and it is not an infinite loop + + // First: create a mask of the bits that definitely do not change between iterations (0 results in umax which is accurate here) + const auto no_change_bits = m_ir->CreateAnd(m_ir->CreateNot(cond_val_incr), m_ir->CreateSub(cond_val_incr, m_ir->getIntN(type_bits, 1))); + + // Compare that when the mask applied to both the result and the original value is the same + const auto cond_verify = m_ir->CreateICmpEQ(m_ir->CreateAnd(loop_dictator_after_adjustment, no_change_bits), m_ir->CreateAnd(loop_argument, no_change_bits)); + + // Amend condition + condition = m_ir->CreateAnd(cond_verify, condition); + } + } + + m_ir->CreateCondBr(condition, optimization_block, block_optimization_next); + }; + + if (is_reduced_loop) + { + for (u32 i = 0; i < s_reg_max; i++) + { + llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); + + if (i < m_reduced_loop_info->loop_dicts.size() && (m_reduced_loop_info->loop_dicts.test(i) || m_reduced_loop_info->loop_writes.test(i))) + { + // Connect registers which are used and then modified by the block + auto value = m_block->reg[i]; + + if (!value || value->getType() != type) + { + value = get_reg_fixed(i, type); + } + + reduced_loop_init_regs[i] = value; + } + else if (i < m_reduced_loop_info->loop_dicts.size() && m_reduced_loop_info->loop_args.test(i)) + { + // Load registers used as arguments of the loop + if (!m_block->reg[i]) + { + m_block->reg[i] = get_reg_fixed(i, type); + } + } + } + + const auto prev_insert_block = m_ir->GetInsertBlock(); + + block_optimization_phi_parent = prev_insert_block; + + make_reduced_loop_condition(block_optimization_inner, false); + m_ir->SetInsertPoint(block_optimization_inner); + + for (u32 i = 0; i < s_reg_max; i++) + { + if (auto init_val = reduced_loop_init_regs[i]) + { + const auto _phi = m_ir->CreatePHI(init_val->getType(), 2, fmt::format("reduced_0x%05x_r%u", baddr, i)); + _phi->addIncoming(init_val, prev_insert_block); + + reduced_loop_phi_nodes[i] = _phi; + m_block->reg[i] = _phi; + } + } + + m_block->block_wide_reg_store_elimination = true; + } + + // Instructions emitting optimizations: Loop iteration is not the last + m_pos = baddr; + + // Masked opcodde -> register modification times + std::map>> masked_times; + std::array reg_states{}; + u32 s_reg_state{1}; + + for (u32 iteration_emit = 0; is_reduced_loop; m_pos += 4) + { + if (m_pos != baddr && m_block_info[m_pos / 4] && m_reduced_loop_info->loop_end < m_pos) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(1) too early at 0x%x", m_pos); + } + + if (!(m_pos >= start && m_pos < end)) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(2) too early at 0x%x", m_pos); + } + + if (m_ir->GetInsertBlock()->getTerminator()) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(3) too early at 0x%x", m_pos); + } + + const u32 op = std::bit_cast>(func.data[(m_pos - start) / 4]); + const auto itype = g_spu_itype.decode(op); + + if (itype & spu_itype::branch) + { + bool branches_back = false; + + for (u32 dest : op_branch_targets(m_pos, spu_opcode_t{op})) + { + branches_back = branches_back || dest == baddr; + } + + if (!branches_back) + { + continue; + } + + iteration_emit++; + + if (iteration_emit < 2) + { + // Reset mpos (with fixup) + m_pos = baddr - 4; + continue; + } + + // Optimization block body + const auto block_inner = m_ir->GetInsertBlock(); + + std::array block_reg_results{}; + + for (u32 i = 0; i < s_reg_max; i++) + { + if (auto phi = reduced_loop_phi_nodes[i]) + { + const auto type = phi->getType() == get_type() ? get_type() : get_reg_type(i); + block_reg_results[i] = ensure(get_reg_fixed(i, type)); + phi->addIncoming(block_reg_results[i], block_inner); + } + } + + ensure(!!m_block->reg[m_reduced_loop_info->cond_val_register_idx]); + make_reduced_loop_condition(block_optimization_inner, true); + m_ir->SetInsertPoint(block_optimization_next); + m_block->block_wide_reg_store_elimination = false; + + for (u32 i = 0; i < s_reg_max; i++) + { + if (const auto loop_value = block_reg_results[i]) + { + const auto phi = m_ir->CreatePHI(loop_value->getType(), 2, fmt::format("redres_0x%05x_r%u", baddr, i)); + + phi->addIncoming(loop_value, block_inner); + phi->addIncoming(reduced_loop_init_regs[i], block_optimization_phi_parent); + m_block->reg[i] = phi; + } + } + + + break; + } + + if (!op) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: [%s] Unexpected fallthrough to 0x%x (chunk=0x%x, entry=0x%x)", m_hash, m_pos, m_entry, m_function_queue[0]); + } + + const auto [reg_rt, reg_access, masked_op] = op_register_targets(m_pos, spu_opcode_t{op}); + + bool erased = false; + + const auto inst_times = std::array{reg_states[reg_access[0]], reg_states[reg_access[1]], reg_states[reg_access[2]]}; + + // Try to reuse the reult of the previous iteration (if argumnent registers have not been modified) + if (reg_rt < 128 && masked_times.count(masked_op) && masked_times[masked_op].first && m_inst_attrs[(m_pos - start) / 4] == inst_attr::none) + { + auto times = masked_times[masked_op].second; + + bool is_ok = true; + for (u32 regi = 0; regi < 3; regi++) + { + if (reg_access[regi] < 128 && times[regi] != inst_times[regi]) + { + is_ok = false; + } + } + + if (is_ok) + { + m_block->reg[reg_rt] = masked_times[masked_op].first; + erased = true; + } + } + + if (reg_rt < 128) + { + reg_states[reg_rt] = s_reg_state++; + } + + if (erased) + { + continue; + } + + m_next_op = 0; + + masked_times[masked_op] = {}; + + switch (m_inst_attrs[(m_pos - start) / 4]) + { + case inst_attr::putllc0: + { + putllc0_pattern(func, m_patterns.at(m_pos - start).info); + continue; + } + case inst_attr::putllc16: + { + putllc16_pattern(func, m_patterns.at(m_pos - start).info); + continue; + } + case inst_attr::omit: + { + // TODO + continue; + } + default: break; + } + + // Execute recompiler function (TODO) + (this->*decode(op))({op}); + + if (reg_rt < 128 && itype & spu_itype::pure && reg_rt != reg_access[0] && reg_rt != reg_access[1] && reg_rt != reg_access[2]) + { + masked_times[masked_op] = {ensure(m_block->reg[reg_rt]), inst_times}; + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + if (m_reduced_loop_info && m_reduced_loop_info->loop_may_update.test(i)) + { + m_block->reg[i] = m_block->reg_save_and_restore[i]; + } + } + + m_reduced_loop_info = nullptr; + // Emit instructions for (m_pos = baddr; m_pos >= start && m_pos < end && !m_ir->GetInsertBlock()->getTerminator(); m_pos += 4) { @@ -2632,6 +3266,8 @@ public: m_ir->SetInsertPoint(ins); auto si = llvm::cast(m_ir->Insert(bs->clone())); + spu_context_attr(si); + if (b2->store[i] == nullptr) { // Protect against backwards ordering now @@ -2697,7 +3333,7 @@ public: continue; m_ir->SetInsertPoint(ins); - m_ir->Insert(bs->clone()); + m_ir->Insert(spu_context_attr(bs->clone())); } bs->eraseFromParent(); @@ -2802,12 +3438,9 @@ public: std::string& llvm_log = function_log; raw_string_ostream out(llvm_log); - if (g_cfg.core.spu_debug) - { - fmt::append(llvm_log, "LLVM IR at 0x%x:\n", func.entry_point); - out << *_module; // print IR - out << "\n\n"; - } + fmt::append(llvm_log, "LLVM IR at 0x%x:\n", func.entry_point); + out << *_module; // print IR + out << "\n\n"; if (verifyModule(*_module, &out)) { @@ -2950,7 +3583,7 @@ public: // Create interpreter table const auto if_type = get_ftype(); - m_function_table = new GlobalVariable(*m_module, ArrayType::get(m_ir->getPtrTy(), 1ull << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr); + m_function_table = new GlobalVariable(*m_module, ArrayType::get(get_type(), 1ull << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr); init_luts(); @@ -2994,7 +3627,7 @@ public: m_ir->CreateStore(m_ir->CreateCall(get_intrinsic(Intrinsic::read_register), {rsp_name}), native_sp); // Decode (shift) and load function pointer - const auto first = m_ir->CreateLoad(m_ir->getPtrTy(), m_ir->CreateGEP(m_ir->getPtrTy(), m_interp_table, m_ir->CreateLShr(m_interp_op, 32u - m_interp_magn))); + const auto first = m_ir->CreateLoad(get_type(), m_ir->CreateGEP(get_type(), m_interp_table, m_ir->CreateLShr(m_interp_op, 32u - m_interp_magn))); const auto call0 = m_ir->CreateCall(if_type, first, {m_lsptr, m_thread, m_interp_pc, m_interp_op, m_interp_table, m_interp_7f0, m_interp_regs}); call0->setCallingConv(CallingConv::GHC); m_ir->CreateRetVoid(); @@ -3138,7 +3771,7 @@ public: const auto next_pc = itype & spu_itype::branch ? m_interp_pc : m_interp_pc_next; const auto be32_op = m_ir->CreateLoad(get_type(), _ptr(m_lsptr, m_ir->CreateZExt(next_pc, get_type()))); const auto next_op = m_ir->CreateCall(get_intrinsic(Intrinsic::bswap), {be32_op}); - const auto next_if = m_ir->CreateLoad(m_ir->getPtrTy(), m_ir->CreateGEP(m_ir->getPtrTy(), m_interp_table, m_ir->CreateLShr(next_op, 32u - m_interp_magn))); + const auto next_if = m_ir->CreateLoad(get_type(), m_ir->CreateGEP(get_type(), m_interp_table, m_ir->CreateLShr(next_op, 32u - m_interp_magn))); llvm::cast(next_if)->setVolatile(true); if (!(itype & spu_itype::branch)) @@ -3263,7 +3896,7 @@ public: } } - m_function_table->setInitializer(ConstantArray::get(ArrayType::get(m_ir->getPtrTy(), 1ull << m_interp_magn), iptrs)); + m_function_table->setInitializer(ConstantArray::get(ArrayType::get(get_type(), 1ull << m_interp_magn), iptrs)); m_function_table = nullptr; for (auto& f : *_module) @@ -3274,12 +3907,9 @@ public: std::string llvm_log; raw_string_ostream out(llvm_log); - if (g_cfg.core.spu_debug) - { - fmt::append(llvm_log, "LLVM IR (interpreter):\n"); - out << *_module; // print IR - out << "\n\n"; - } + fmt::append(llvm_log, "LLVM IR (interpreter):\n"); + out << *_module; // print IR + out << "\n\n"; if (verifyModule(*_module, &out)) { @@ -3724,16 +4354,6 @@ public: { switch (op.ra) { - case SPU_WrOutMbox: - { - res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_mbox), true); - break; - } - case SPU_WrOutIntrMbox: - { - res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_intr_mbox), true); - break; - } case SPU_RdSigNotify1: { res.value = wait_rchcnt(::offset32(&spu_thread::ch_snr1)); @@ -4757,6 +5377,50 @@ public: } const auto a = get_vr(op.ra); + +#ifdef ARCH_ARM64 + // Use dot product instructions with special values to shift then sum results into the preferred slot + if (m_use_dotprod) + { + if (match_vr(op.ra, [&](auto c, auto MP) + { + using VT = typename decltype(MP)::type; + + if (auto [ok, x] = match_expr(c, sext(match]>())); ok) + { + const auto zeroes = splat(0); + + const auto es = zshuffle(bitcast(a), 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 4, 8, 12); + + set_vr(op.rt, sdot(zeroes, es, build( + -0x01, -0x02, -0x04, -0x08, + -0x01, -0x02, -0x04, -0x08, + -0x01, -0x02, -0x04, -0x08, + -0x01, -0x02, -0x04, -0x08 + ))); + return true; + } + return false; + })) + { + return; + } + + const auto zeroes = splat(0); + const auto masked = a & 0x01; + + const auto es = zshuffle(bitcast(masked), 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 4, 8, 12); + + set_vr(op.rt, udot(zeroes, es, build( + 0x01, 0x02, 0x04, 0x08, + 0x01, 0x02, 0x04, 0x08, + 0x01, 0x02, 0x04, 0x08, + 0x01, 0x02, 0x04, 0x08 + ))); + return; + } +#endif + const auto m = zext(bitcast(trunc(a))); set_vr(op.rt, insert(splat(0), 3, eval(m))); } @@ -4772,6 +5436,54 @@ public: } const auto a = get_vr(op.ra); + +#ifdef ARCH_ARM64 + // Use dot product instructions with special values to shift then sum results into the preferred slot + if (m_use_dotprod) + { + if (match_vr(op.ra, [&](auto c, auto MP) + { + using VT = typename decltype(MP)::type; + + if (auto [ok, x] = match_expr(c, sext(match]>())); ok) + { + const auto zeroes = splat(0); + + const auto es = zshuffle(bitcast(a), 16, 16, 16, 16, 16, 16, 16, 16, 0, 2, 4, 6, 8, 10, 12, 14); + + const auto extracted = sdot(zeroes, es, build( + -0x01, -0x02, -0x04, -0x08, + -0x10, -0x20, -0x40, -0x80, + -0x01, -0x02, -0x04, -0x08, + -0x10, -0x20, -0x40, -0x80 + )); + + set_vr(op.rt, addp(zeroes, bitcast(extracted))); + return true; + } + return false; + })) + { + return; + } + + const auto zeroes = splat(0); + const auto masked = a & 0x01; + + const auto es = zshuffle(bitcast(masked), 16, 16, 16, 16, 16, 16, 16, 16, 0, 2, 4, 6, 8, 10, 12, 14); + + const auto extracted = udot(zeroes, es, build( + 0x01, 0x02, 0x04, 0x08, + 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x04, 0x08, + 0x10, 0x20, 0x40, 0x80 + )); + + set_vr(op.rt, addp(zeroes, bitcast(extracted))); + return; + } +#endif + const auto m = zext(bitcast(trunc(a))); set_vr(op.rt, insert(splat(0), 3, eval(m))); } @@ -4780,6 +5492,53 @@ public: { const auto a = get_vr(op.ra); +#ifdef ARCH_ARM64 + // Use dot product instructions with special values to shift then sum results into the preferred slot + if (m_use_dotprod) + { + if (match_vr(op.ra, [&](auto c, auto MP) + { + using VT = typename decltype(MP)::type; + + if (auto [ok, x] = match_expr(c, sext(match]>())); ok) + { + const auto zeroes = splat(0); + + const auto extracted = sdot(zeroes, a, build( + -0x01, -0x02, -0x04, -0x08, + -0x10, -0x20, -0x40, -0x80, + -0x01, -0x02, -0x04, -0x08, + -0x10, -0x20, -0x40, -0x80 + )); + + const auto es = zshuffle(bitcast(extracted), 16, 16, 16, 16, 16, 16, 16, 16, 0, 8, 4, 12, 16, 16, 16, 16); + const auto zeroes16 = splat(0); + set_vr(op.rt, addp(zeroes16, bitcast(es))); + return true; + } + return false; + })) + { + return; + } + + const auto zeroes = splat(0); + const auto masked = a & 0x01; + + const auto extracted = udot(zeroes, masked, build( + 0x01, 0x02, 0x04, 0x08, + 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x04, 0x08, + 0x10, 0x20, 0x40, 0x80 + )); + + const auto es = zshuffle(bitcast(extracted), 16, 16, 16, 16, 16, 16, 16, 16, 0, 8, 4, 12, 16, 16, 16, 16); + const auto zeroes16 = splat(0); + set_vr(op.rt, addp(zeroes16, bitcast(es))); + return; + } +#endif + if (m_use_gfni) { const auto as = zshuffle(a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); @@ -5311,13 +6070,24 @@ public: return; } +#ifdef ARCH_ARM64 + if (m_use_dotprod) +#else if (m_use_vnni) +#endif { - const auto [a, b] = get_vrs(op.ra, op.rb); const auto zeroes = splat(0); +#ifdef ARCH_ARM64 + const auto [a, b] = get_vrs(op.ra, op.rb); + const auto ones = splat(0x01); + const auto ax = bitcast(udot(zeroes, a, ones)); + const auto bx = bitcast(udot(zeroes, b, ones)); +#else + const auto [a, b] = get_vrs(op.ra, op.rb); const auto ones = splat(0x01010101); const auto ax = bitcast(vpdpbusd(zeroes, a, ones)); const auto bx = bitcast(vpdpbusd(zeroes, b, ones)); +#endif set_vr(op.rt, shuffle2(ax, bx, 0, 8, 2, 10, 4, 12, 6, 14)); return; } @@ -5604,11 +6374,59 @@ public: void CEQI(spu_opcode_t op) { + // CEQHI following a comparison instruction (compare-equal negation) + if (!m_interp_magn && !op.si10 && match_vr(op.ra, [&](auto c, auto MT) + { + using VT = typename decltype(MT)::type; + using VT_HALF = s16[8]; + + if (auto [ok, a, b] = match_expr(c, bitcast(sext(match() == match())) << 16 >> 16); ok && m_block->block_wide_reg_store_elimination) + { + set_vr(op.rt, bitcast(sext(a != b)) << 16 >> 16); + return true; + } + + if (auto [ok, a, b] = match_expr(c, sext(MT == MT)); ok) + { + set_vr(op.rt, sext(a != b)); + return true; + } + + return false; + })) + { + return; + } + set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); } void CEQHI(spu_opcode_t op) { + // CEQHI following a comparison instruction (compare-equal negation) + if (!m_interp_magn && !op.si10 && match_vr(op.ra, [&](auto c, auto MT) + { + using VT = typename decltype(MT)::type; + using VT_HALF = s8[16]; + + if (auto [ok, a, b] = match_expr(c, bitcast(sext(match() == match())) << 8 >> 8); ok && m_block->block_wide_reg_store_elimination) + { + set_vr(op.rt, bitcast(sext(a != b)) << 8 >> 8); + return true; + } + + if (auto [ok, a, b] = match_expr(c, sext(match() == match())); ok) + { + set_vr(op.rt, sext(a != b)); + return true; + } + + return false; + })) + { + return; + } + set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); } @@ -6185,8 +7003,13 @@ public: return eval(bitcast(min(bitcast(v),splat(0xff7fffff)))); } - value_t clamp_smax(value_t v) + value_t clamp_smax(value_t v, u32 gpr = s_reg_max) { + if (m_reduced_loop_info && gpr < s_reg_max && m_reduced_loop_info->is_gpr_not_NaN_hint(gpr)) + { + return v; + } + if (m_use_avx512) { if (is_input_positive(v)) @@ -6206,16 +7029,6 @@ public: return eval(clamp_positive_smax(clamp_negative_smax(v))); } - // FMA favouring zeros - value_t xmuladd(value_t a, value_t b, value_t c) - { - const auto ma = eval(sext(fcmp_uno(a != fsplat(0.)))); - const auto mb = eval(sext(fcmp_uno(b != fsplat(0.)))); - const auto ca = eval(bitcast(bitcast(a) & mb)); - const auto cb = eval(bitcast(bitcast(b) & ma)); - return eval(fmuladd(ca, cb, c)); - } - // Checks for postive and negative zero, or Denormal (treated as zero) // If sign is +-1 check equality againts all sign bits bool is_spu_float_zero(v128 a, int sign = 0) @@ -6302,12 +7115,6 @@ public: set_vr(op.rt, frsqest(get_vr(op.ra))); } - template - static llvm_calli fcgt(T&& a, U&& b) - { - return {"spu_fcgt", {std::forward(a), std::forward(b)}}; - } - void FCGT(spu_opcode_t op) { if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) @@ -6316,11 +7123,8 @@ public: return; } - register_intrinsic("spu_fcgt", [&](llvm::CallInst* ci) + const auto fcgt = [&](value_t a, value_t b) { - const auto a = value(ci->getOperand(0)); - const auto b = value(ci->getOperand(1)); - const value_t ab[2]{a, b}; std::bitset<2> safe_int_compare(0); @@ -6352,6 +7156,16 @@ public: } } + if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra)) + { + safe_finite_compare.set(0); + } + + if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb)) + { + safe_finite_compare.set(1); + } + if (safe_int_compare.any()) { return eval(sext(bitcast(a) > bitcast(b))); @@ -6371,7 +7185,7 @@ public: const auto bi = eval(bitcast(b)); return eval(sext(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi))); - }); + }; set_vr(op.rt, fcgt(get_vr(op.ra), get_vr(op.rb))); } @@ -6468,12 +7282,6 @@ public: set_vr(op.rt, fa(get_vr(op.ra), get_vr(op.rb))); } - template - static llvm_calli fs(T&& a, U&& b) - { - return {"spu_fs", {std::forward(a), std::forward(b)}}; - } - void FS(spu_opcode_t op) { if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) @@ -6482,29 +7290,26 @@ public: return; } - register_intrinsic("spu_fs", [&](llvm::CallInst* ci) + const auto fs = [&](value_t a, value_t b) { - const auto a = value(ci->getOperand(0)); - const auto b = value(ci->getOperand(1)); - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { - const auto bc = clamp_smax(b); // for #4478 + const auto bc = clamp_smax(b, op.rb); // for #4478 return eval(a - bc); } else { return eval(a - b); } - }); + }; set_vr(op.rt, fs(get_vr(op.ra), get_vr(op.rb))); } - template - static llvm_calli fm(T&& a, U&& b) + template , typename W = llvm_place_stealer_t> + static auto fm(T&& a, U&& b, V&& a_not_nan = match_stealer(), W&& b_not_nan = match_stealer()) { - return llvm_calli{"spu_fm", {std::forward(a), std::forward(b)}}.set_order_equality_hint(1, 1); + return llvm_calli{"spu_fm", {std::forward(a), std::forward(b), a_not_nan, b_not_nan}}.set_order_equality_hint(1, 1, 2, 3); } void FM(spu_opcode_t op) @@ -6519,14 +7324,27 @@ public: { const auto a = value(ci->getOperand(0)); const auto b = value(ci->getOperand(1)); + const bool a_notnan = llvm::cast(ci->getOperand(2))->getZExtValue() != 0; + const bool b_notnan = llvm::cast(ci->getOperand(3))->getZExtValue() != 0; if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { - if (a.value == b.value) + if (a.value == b.value || (a_notnan && b_notnan)) { return eval(a * b); } + if (a_notnan) + { + const auto ma = sext(fcmp_uno(a != fsplat(0.))); + return eval(bitcast(bitcast(a * b) & ma)); + } + else if (b_notnan) + { + const auto mb = sext(fcmp_uno(b != fsplat(0.))); + return eval(bitcast(bitcast(a * b) & mb)); + } + const auto ma = sext(fcmp_uno(a != fsplat(0.))); const auto mb = sext(fcmp_uno(b != fsplat(0.))); return eval(bitcast(bitcast(a * b) & ma & mb)); @@ -6537,10 +7355,13 @@ public: } }); + const u32 a_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; + const u32 b_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; + if (op.ra == op.rb && !m_interp_magn) { const auto a = get_vr(op.ra); - set_vr(op.rt, fm(a, a)); + set_vr(op.rt, fm(a, a, splat(a_notnan), splat(a_notnan))); return; } @@ -6579,7 +7400,7 @@ public: } } - set_vr(op.rt, fm(a, b)); + set_vr(op.rt, fm(a, b, splat(a_notnan), splat(b_notnan))); } template @@ -6872,10 +7693,10 @@ public: set_vr(op.rt4, fnms(get_vr(op.ra), get_vr(op.rb), get_vr(op.rc))); } - template - static llvm_calli fma(T&& a, U&& b, V&& c) + template , typename X = llvm_place_stealer_t> + static llvm_calli fma(T&& a, U&& b, V&& c, W&& d = match_stealer(), X&& e = match_stealer()) { - return llvm_calli{"spu_fma", {std::forward(a), std::forward(b), std::forward(c)}}.set_order_equality_hint(1, 1, 0); + return llvm_calli{"spu_fma", {std::forward(a), std::forward(b), std::forward(c), std::forward(d), std::forward(e)}}.set_order_equality_hint(1, 1, 2, 3, 4); } template @@ -6894,14 +7715,35 @@ public: return; } + register_intrinsic("spu_fma", [&](llvm::CallInst* ci) { const auto a = value(ci->getOperand(0)); const auto b = value(ci->getOperand(1)); const auto c = value(ci->getOperand(2)); - + const bool a_notnan = llvm::cast(ci->getOperand(3))->getZExtValue() != 0; + const bool b_notnan = llvm::cast(ci->getOperand(4))->getZExtValue() != 0; + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { + if (a.value == b.value || (a_notnan && b_notnan)) + { + return fma32x4(a, b, c); + } + + if (a_notnan) + { + const auto ma = sext(fcmp_uno(a != fsplat(0.))); + const auto cb = bitcast(bitcast(b) & ma); + return fma32x4(a, eval(cb), c); + } + else if (b_notnan) + { + const auto mb = sext(fcmp_uno(b != fsplat(0.))); + const auto ca = bitcast(bitcast(a) & mb); + return fma32x4(eval(ca), b, c); + } + const auto ma = sext(fcmp_uno(a != fsplat(0.))); const auto mb = sext(fcmp_uno(b != fsplat(0.))); const auto ca = bitcast(bitcast(a) & mb); @@ -6950,6 +7792,9 @@ public: const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); static const auto MT = match(); + const u32 a_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; + const u32 b_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; + auto check_sqrt_pattern_for_float = [&](f32 float_value) -> bool { auto match_fnms = [&](f32 float_value) @@ -7145,7 +7990,13 @@ public: spu_log.todo("[%s:0x%05x] Unmatched spu_rsqrte(c) found in FMA", m_hash, m_pos); } - set_vr(op.rt4, fma(a, b, c)); + if (!m_interp_magn && op.ra == op.rb) + { + set_vr(op.rt4, fma(a, a, c, splat(a_notnan), splat(a_notnan))); + return; + } + + set_vr(op.rt4, fma(a, b, c, splat(a_notnan), splat(b_notnan))); } template @@ -7568,13 +8419,13 @@ public: void make_store_ls(value_t addr, value_t data) { const auto bswapped = byteswap(data); - m_ir->CreateStore(bswapped.eval(m_ir), _ptr(m_lsptr, addr.value)); + spu_mem_attr(m_ir->CreateStore(bswapped.eval(m_ir), _ptr(m_lsptr, addr.value))); } auto make_load_ls(value_t addr) { value_t data; - data.value = m_ir->CreateLoad(get_type(), _ptr(m_lsptr, addr.value)); + data.value = spu_mem_attr(m_ir->CreateLoad(get_type(), _ptr(m_lsptr, addr.value))); return byteswap(data); } @@ -7589,12 +8440,18 @@ public: { data._u32[3] %= SPU_LS_SIZE; - if (data._u32[3] % 0x10 == 0) + if (const u32 remainder = data._u32[3] % 0x10; remainder == 0) { value_t addr = eval(splat(data._u32[3]) + zext(extract(pair.second, 3) & 0x3fff0)); make_store_ls(addr, get_vr(op.rt)); return; } + else + { + value_t addr = eval(splat(data._u32[3] - remainder) + zext((extract(pair.second, 3) + remainder) & 0x3fff0)); + make_store_ls(addr, get_vr(op.rt)); + return; + } } } @@ -7613,12 +8470,18 @@ public: { data._u32[3] %= SPU_LS_SIZE; - if (data._u32[3] % 0x10 == 0) + if (const u32 remainder = data._u32[3] % 0x10; remainder == 0) { value_t addr = eval(splat(data._u32[3]) + zext(extract(pair.second, 3) & 0x3fff0)); set_vr(op.rt, make_load_ls(addr)); return; } + else + { + value_t addr = eval(splat(data._u32[3] - remainder) + zext((extract(pair.second, 3) + remainder) & 0x3fff0)); + set_vr(op.rt, make_load_ls(addr)); + return; + } } } @@ -7672,13 +8535,51 @@ public: } } - value_t addr = eval(zext(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm(op.si10) << 4)); + const auto a = get_vr(op.ra); + + if (auto [ok, x, y] = match_expr(a, match() + match()); ok) + { + if (auto [ok1, data] = get_const_vector(x.value, m_pos + 1); ok1 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(y, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + make_store_ls(addr, get_vr(op.rt)); + return; + } + + if (auto [ok2, data] = get_const_vector(y.value, m_pos + 2); ok2 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(x, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + make_store_ls(addr, get_vr(op.rt)); + return; + } + } + + value_t addr = eval(zext(extract(a, 3) & 0x3fff0) + (get_imm(op.si10) << 4)); make_store_ls(addr, get_vr(op.rt)); } void LQD(spu_opcode_t op) { - value_t addr = eval(zext(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm(op.si10) << 4)); + const auto a = get_vr(op.ra); + + if (auto [ok, x1, y1] = match_expr(a, match() + match()); ok) + { + if (auto [ok1, data] = get_const_vector(x1.value, m_pos + 1); ok1 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(y1, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + set_vr(op.rt, make_load_ls(addr)); + return; + } + + if (auto [ok2, data] = get_const_vector(y1.value, m_pos + 2); ok2 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(x1, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + set_vr(op.rt, make_load_ls(addr)); + return; + } + } + + value_t addr = eval(zext(extract(a, 3) & 0x3fff0) + (get_imm(op.si10) << 4)); set_vr(op.rt, make_load_ls(addr)); } diff --git a/rpcs3/Emu/Cell/SPUOpcodes.h b/rpcs3/Emu/Cell/SPUOpcodes.h index cea4513e3f..42d76792a2 100644 --- a/rpcs3/Emu/Cell/SPUOpcodes.h +++ b/rpcs3/Emu/Cell/SPUOpcodes.h @@ -24,6 +24,20 @@ union spu_opcode_t bf_t i16; // 9..24 bf_t si16; // 9..24, signed bf_t i18; // 7..24 + + // For 16-bit instructions in the context of 32-bits + u32 duplicate_si10() const + { + const u32 _16 = static_cast(static_cast(si10)); + return (_16 << 16) | _16; + } + + // For 8-bit instructions in the context of 32-bits + u32 duplicate_duplicate_si10() const + { + const u32 _8 = static_cast(si10 & 0xff); + return (_8 << 24) | (_8 << 16) | (_8 << 8) | _8; + } }; constexpr u32 spu_branch_target(u32 pc, u32 imm = 0) @@ -42,6 +56,7 @@ constexpr u32 spu_decode(u32 inst) } std::array op_branch_targets(u32 pc, spu_opcode_t op); +std::tuple, u32> op_register_targets(u32 /*pc*/, spu_opcode_t op); // SPU decoder object. D provides functions. T is function pointer type returned. template diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 57d842e69d..fc74bcec90 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -4,12 +4,24 @@ #include "Utilities/lockless.h" #include "Utilities/address_range.h" #include "SPUThread.h" +#include "SPUAnalyser.h" #include #include #include #include #include +// std::bitset +template + requires requires(std::remove_cvref_t& x, T&& y) { x.count(); x.test(y); x.flip(y); } +[[nodiscard]] constexpr bool at32(CT&& container, T&& index, std::source_location src_loc = std::source_location::current()) +{ + const usz csv = container.size(); + if (csv <= std::forward(index)) [[unlikely]] + fmt::raw_range_error(src_loc, format_object_simplified(index), csv); + return container[std::forward(index)]; +} + // Helper class class spu_cache { @@ -201,6 +213,25 @@ public: __bitset_enum_max }; + enum compare_direction : u32 + { + CMP_TURNAROUND_FLAG = 0x1, + CMP_NEGATE_FLAG = 0x100, + CMP_SLESS = 0, + CMP_SGREATER = CMP_SLESS | CMP_TURNAROUND_FLAG, + CMP_EQUAL, + CMP_EQUAL2 = CMP_EQUAL | CMP_TURNAROUND_FLAG, + CMP_LLESS, + CMP_LGREATER = CMP_LLESS | CMP_TURNAROUND_FLAG, + CMP_SGREATER_EQUAL = CMP_SLESS | CMP_NEGATE_FLAG, + CMP_SLOWER_EQUAL = CMP_SGREATER | CMP_NEGATE_FLAG, + CMP_NOT_EQUAL = CMP_EQUAL | CMP_NEGATE_FLAG, + CMP_NOT_EQUAL2 = CMP_NOT_EQUAL | CMP_TURNAROUND_FLAG, + CMP_LGREATER_EQUAL = CMP_LLESS | CMP_NEGATE_FLAG, + CMP_LLOWER_EQUAL = CMP_LGREATER | CMP_NEGATE_FLAG, + CMP_UNKNOWN, + }; + struct reg_state_t { bs_t flag{+vf::is_null}; @@ -273,6 +304,408 @@ public: static u32 alloc_tag(bool reset = false) noexcept; }; + struct reduced_loop_t + { + bool active = false; // Single block loop detected + bool failed = false; + u32 loop_pc = SPU_LS_SIZE; + u32 loop_end = SPU_LS_SIZE; + + // False: single-block loop + // True: loop with a trailing block of aftermath (iteration update) stuff (like for (u32 i = 0; i < 10; /*update*/ i++)) + bool is_two_block_loop = false; + bool has_cond_state = false; + + // Loop stay-in state requirement + u64 cond_val_mask = umax; + u64 cond_val_min = 0; + u64 cond_val_size = 0; + compare_direction cond_val_compare{}; + u64 cond_val_incr = 0; + bool cond_val_incr_is_immediate = false; + u64 cond_val_register_argument_idx = umax; + u64 cond_val_register_idx = umax; + bool cond_val_incr_before_cond = false; + bool cond_val_incr_before_cond_taken_in_account = false; + bool cond_val_is_immediate = false; + + // Loop attributes + bool is_constant_expression = false; + bool is_secret = false; + + struct supplemental_condition_t + { + u64 immediate_value = umax; + u64 type_size = 0; + compare_direction val_compare{}; + }; + + // Supplemental loop condition: + // Inner conditions that depend on extrnal values (not produced inside the loop) + // all should evaluate to false in order for the optimization to work (at the moment) + // So succeeding can be treated linearly + u64 expected_sup_conds = 0; + u64 current_sup_conds_index = 0; + std::vector sup_conds; + + void take_cond_val_incr_before_cond_into_account() + { + if (cond_val_is_immediate && cond_val_incr_before_cond_taken_in_account && !cond_val_incr_before_cond_taken_in_account) + { + cond_val_min -= cond_val_incr; + cond_val_min &= cond_val_mask; + cond_val_incr_before_cond_taken_in_account = true; + } + } + + std::bitset loop_args; + std::bitset loop_dicts; + std::bitset loop_writes; + std::bitset loop_may_update; + std::bitset gpr_not_nans; + + struct origin_t + { + std::bitset regs{}; + u32 modified = 0; + spu_itype_t mod1_type = spu_itype::UNK; + spu_itype_t mod2_type = spu_itype::UNK; + spu_itype_t mod3_type = spu_itype::UNK; + u32 IMM = 0; + +private: + // Internal, please access using fixed order + spu_itype_t access_type(u32 i) const + { + if (i > modified) + { + return spu_itype::UNK; + } + + switch (i) + { + case 1: return mod1_type; + case 2: return mod2_type; + case 3: return mod3_type; + default: return spu_itype::UNK; + } + + return spu_itype::UNK; + } +public: + + spu_itype_t reverse1_type() + { + return access_type(modified); + } + + spu_itype_t reverse2_type() + { + return access_type(modified - 1); + } + + spu_itype_t reverse3_type() + { + return access_type(modified - 2); + } + + origin_t& join_with_this(const origin_t& rhs) + { + regs |= rhs.regs; + return *this; + } + + origin_t& join_with_this(u32 rhs) + { + regs.set(rhs); + return *this; + } + + origin_t& add_register_origin(u32 reg_val) + { + regs.set(reg_val); + return *this; + } + + bool is_single_reg_access(u32 reg_val) const + { + if (!modified) + { + return true; + } + + return regs.count() == 1 && ::at32(regs, reg_val); + } + + bool is_loop_dictator(u32 reg_val, bool test_predictable = false, bool should_predictable = true) const + { + if (!modified) + { + return false; + } + + if (regs.count() >= 1 && ::at32(regs, reg_val)) + { + if (!test_predictable) + { + return true; + } + + if (modified > 1) + { + return should_predictable ^ true; + } + + switch (mod1_type) + { + case spu_itype::A: + { + if (regs.count() == 2) + { + return should_predictable; + } + + return should_predictable ^ true; + } + case spu_itype::AI: + case spu_itype::AHI: + { + if (IMM && regs.count() == 1) + { + return should_predictable; + } + + return should_predictable ^ true; + } + default: break; + } + + return should_predictable ^ true; + } + + return false; + } + + bool is_predictable_loop_dictator(u32 reg_val) const + { + return is_loop_dictator(reg_val, true, true); + } + + bool is_non_predictable_loop_dictator(u32 reg_val) const + { + return is_loop_dictator(reg_val, true, false); + } + + bool is_null(u32 reg_val) const noexcept + { + if (modified) + { + return false; + } + + if (regs.count() - (::at32(regs, reg_val) ? 1 : 0)) + { + return false; + } + + return true; + } + + origin_t& add_instruction_modifier(spu_itype_t inst_type, u32 imm = 0) + { + if (inst_type == spu_itype::UNK) + { + mod1_type = spu_itype::UNK; + mod2_type = spu_itype::UNK; + mod3_type = spu_itype::UNK; + IMM = umax; + modified = 1; + return *this; + } + + if (modified == 1) + { + if (modified == 3) + { + mod1_type = spu_itype::UNK; + mod2_type = spu_itype::UNK; + mod3_type = spu_itype::UNK; + IMM = umax; + modified = 1; + return *this; + } + + bool is_ok = false; + switch (inst_type) + { + case spu_itype::XSBH: + { + is_ok &= mod1_type == spu_itype::CEQB || mod1_type == spu_itype::CEQBI || mod1_type == spu_itype::CGTB || mod1_type == spu_itype::CGTBI || mod1_type == spu_itype::CLGTB || mod1_type == spu_itype::CLGTBI; + break; + } + case spu_itype::ANDI: + { + is_ok &= mod1_type == spu_itype::CEQB || mod1_type == spu_itype::CEQBI || mod1_type == spu_itype::CGTB || mod1_type == spu_itype::CGTBI || mod1_type == spu_itype::CLGTB || mod1_type == spu_itype::CLGTBI; + is_ok &= (spu_opcode_t{imm}.si10 & 0xff) == 0xff; + break; + } + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + is_ok = modified == 1 && (mod1_type == spu_itype::AI || mod1_type == spu_itype::AHI); + IMM = imm; + break; + } + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + is_ok = modified == 1 && (mod1_type == spu_itype::AI || mod1_type == spu_itype::AHI); + IMM = spu_opcode_t{imm}.si10; + break; + } + default: + { + break; + } + } + + if (!is_ok) + { + mod1_type = spu_itype::UNK; + mod2_type = spu_itype::UNK; + mod3_type = spu_itype::UNK; + IMM = umax; + modified = 1; + return *this; + } + + (modified == 1 ? mod2_type : mod3_type) = inst_type; + modified++; + return *this; + } + + mod1_type = inst_type; + modified = 1; + + switch (inst_type) + { + case spu_itype::AHI: + { + IMM = spu_opcode_t{imm}.duplicate_si10(); + return *this; + } + case spu_itype::AI: + case spu_itype::ORI: + case spu_itype::XORI: + case spu_itype::ANDI: + + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + IMM = spu_opcode_t{imm}.si10; + return *this; + } + case spu_itype::ILA: + { + IMM = spu_opcode_t{imm}.i18; + return *this; + } + case spu_itype::IOHL: + case spu_itype::ILH: + case spu_itype::ILHU: + { + IMM = spu_opcode_t{imm}.i16; + return *this; + } + default: + { + IMM = imm; + break; + } + } + + return *this; + } + }; + + static origin_t make_reg(u32 reg_val) noexcept + { + origin_t org{}; + org.add_register_origin(reg_val); + return org; + } + + const origin_t* find_reg(u32 reg_val) const noexcept + { + for (auto& pair : regs) + { + if (pair.first == reg_val) + { + return &pair.second; + } + } + + return nullptr; + } + + origin_t* find_reg(u32 reg_val) noexcept + { + return const_cast(std::as_const(*this).find_reg(reg_val)); + } + + bool is_reg_null(u32 reg_val) const noexcept + { + if (const auto reg_found = find_reg(reg_val)) + { + return reg_found->is_null(reg_val); + } + + return true; + } + + bool is_gpr_not_NaN_hint(u32 i) const noexcept + { + return ::at32(gpr_not_nans, i); + } + + origin_t get_reg(u32 reg_val) noexcept + { + const auto org = find_reg(reg_val); + return org ? *org : regs.emplace_back(reg_val, std::remove_reference_t{}).second; + } + + std::vector> regs; + + // Return old state for error reporting + reduced_loop_t discard() + { + const reduced_loop_t old = *this; + *this = reduced_loop_t{}; + return old; + } + }; + protected: spu_runtime* m_spurt{}; @@ -326,8 +759,14 @@ protected: // Set if the initial register value in this block may be xfloat std::bitset reg_maybe_xf{}; - // Bit mask of the registers used (before modified) - std::bitset reg_use{}; + // Set if register is used in floating pont instruction + std::bitset reg_maybe_float{}; + + // Set if register is used as shuffle mask + std::bitset reg_maybe_shuffle_mask{}; + + // Number of times registers are used (before modified) + std::array reg_use{}; // Bit mask of the trivial (u32 x 4) constant value resulting in this block std::bitset reg_const{}; @@ -391,18 +830,23 @@ protected: putllc16, putllc0, rchcnt_loop, + reduced_loop, }; std::vector m_inst_attrs; struct pattern_info { - u64 info; + // Info via integral + u64 info{}; + + // Info via additional erased-typed pointer + std::shared_ptr info_ptr; }; - std::unordered_map m_patterns; + std::map m_patterns; - void add_pattern(inst_attr attr, u32 start, u64 info); + void add_pattern(inst_attr attr, u32 start, u64 info, std::shared_ptr info_ptr = nullptr); private: // For private use @@ -435,7 +879,7 @@ public: spu_program analyse(const be_t* ls, u32 entry_point, std::map>* out_target_list = nullptr); // Print analyser internal state - void dump(const spu_program& result, std::string& out); + void dump(const spu_program& result, std::string& out, u32 block_min = 0, u32 block_max = SPU_LS_SIZE); // Get SPU Runtime spu_runtime& get_runtime() diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 7b70dcdbd2..60e0f99cca 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -164,7 +164,7 @@ void fmt_class_string::format(std::string& out, u64 arg) out.resize(out.size() - 4); // Print chunk address from lowest 16 bits - fmt::append(out, "...chunk-0x%05x", (arg & 0xffff) * 4); + fmt::append(out, "-0x%05x", (arg & 0xffff) * 4); } enum class spu_block_hash_short : u64{}; @@ -495,7 +495,8 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write); extern thread_local u64 g_tls_fault_spu; -const spu_decoder s_spu_itype; +const extern spu_decoder g_spu_itype; +const extern spu_decoder g_spu_iflag; namespace vm { @@ -598,7 +599,7 @@ std::array op_branch_targets(u32 pc, spu_opcode_t op) { std::array res{spu_branch_target(pc + 4), umax}; - switch (const auto type = s_spu_itype.decode(op.opcode)) + switch (const auto type = g_spu_itype.decode(op.opcode)) { case spu_itype::BR: case spu_itype::BRA: @@ -639,6 +640,54 @@ std::array op_branch_targets(u32 pc, spu_opcode_t op) return res; } +std::tuple, u32> op_register_targets(u32 /*pc*/, spu_opcode_t op) +{ + std::tuple, u32> result{u32{umax}, std::array{128, 128, 128}, op.opcode}; + + const auto type = g_spu_itype.decode(op.opcode); + + if (type & spu_itype::zregmod) + { + std::get<2>(result) = 0; + return result; + } + + std::get<0>(result) = type & spu_itype::_quadrop ? op.rt4 : op.rt; + + spu_opcode_t op_masked = op; + + if (type & spu_itype::_quadrop) + { + op_masked.rt4 = 0; + } + else + { + op_masked.rt = 0; + } + + std::get<2>(result) = op_masked.opcode; + + if (auto iflags = g_spu_iflag.decode(op.opcode)) + { + if (+iflags & +spu_iflag::use_ra) + { + std::get<1>(result)[0] = op.ra; + } + + if (+iflags & +spu_iflag::use_rb) + { + std::get<1>(result)[1] = op.rb; + } + + if (+iflags & +spu_iflag::use_rc) + { + std::get<1>(result)[2] = op.rc; + } + } + + return result; +} + void spu_int_ctrl_t::set(u64 ints) { // leave only enabled interrupts @@ -988,7 +1037,7 @@ std::vector> spu_thread::dump_callstack_list() const passed[i / 4] = true; const spu_opcode_t op{_ref(i)}; - const auto type = s_spu_itype.decode(op.opcode); + const auto type = g_spu_itype.decode(op.opcode); if (start == 0 && type == spu_itype::STQD && op.ra == 1u && op.rt == 0u) { @@ -1090,11 +1139,62 @@ std::vector> spu_thread::dump_callstack_list() const return call_stack_list; } -std::string spu_thread::dump_misc() const +void spu_thread::dump_misc(std::string& ret, std::any& custom_data) const { - std::string ret = cpu_thread::dump_misc(); + cpu_thread::dump_misc(ret, custom_data); - fmt::append(ret, "Block Weight: %u (Retreats: %u)", block_counter, block_failure); + struct dump_misc_data_t + { + u32 cpu_id = umax; + u64 last_read_time = umax; + u64 last_block_counter = umax; + u64 update_count = 0; + + std::pair update(u64 current_block_counter, u64 current_timestamp = get_system_time()) + { + const u64 diff_time = current_timestamp <= last_read_time ? 0 : current_timestamp - last_read_time; + const u64 diff_block = current_block_counter <= last_block_counter ? 0 : current_block_counter - last_block_counter; + + if (last_read_time == umax || update_count >= 1000) + { + last_read_time = current_timestamp; + last_block_counter = current_block_counter; + update_count = 0; + } + else if (diff_time >= 100000 && diff_block >= 100) + { + // Update values to measure rate (but not fully so rate can be measured later) + last_read_time += diff_time / 10 * 9; + last_block_counter += diff_block / 10 * 9; + update_count++; + } + + return {diff_time, diff_block}; + } + }; + + dump_misc_data_t* func_data = std::any_cast(&custom_data); + + if (!func_data) + { + custom_data.reset(); + custom_data = std::make_any(); + func_data = ensure(std::any_cast(&custom_data)); + } + + if (func_data->cpu_id != this->id) + { + *func_data = {}; + func_data->cpu_id = this->id; + } + + const u64 current_block_counter = atomic_storage::load(block_counter); + + const auto [diff_time, diff_block] = func_data->update(current_block_counter); + + const u64 rate_of_diff = diff_block ? std::max(1, utils::rational_mul(diff_block, 1'000'000, std::max(diff_time, 1))) : 0; + + fmt::append(ret, "Block Weight: log10(%u/second): %.1f (Retreats: %u)", rate_of_diff, std::log10(std::max(rate_of_diff, 10)), block_failure); if (u64 hash = atomic_storage::load(block_hash)) { @@ -1145,8 +1245,6 @@ std::string spu_thread::dump_misc() const break; } } - - return ret; } void spu_thread::cpu_on_stop() @@ -3761,7 +3859,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add const u32 addr0 = spu_branch_target(addr); const spu_opcode_t op{read_from_ptr>(ls_ptr, addr0 - base_addr)}; - const auto type = s_spu_itype.decode(op.opcode); + const auto type = g_spu_itype.decode(op.opcode); if (type == spu_itype::UNK || !op.opcode) { @@ -3907,7 +4005,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add // Test the validity of a single instruction of the optional target // This function can't be too slow and is unlikely to improve results by a great deal const u32 op0 = read_from_ptr>(ls_ptr, route_pc - base_addr); - const spu_itype::type type0 = s_spu_itype.decode(op0); + const spu_itype::type type0 = g_spu_itype.decode(op0); if (type0 == spu_itype::UNK || !op0) { @@ -6878,7 +6976,7 @@ spu_exec_object spu_thread::capture_memory_as_elf(std::span>(all_data, pc0 - 4); // Try to find function entry (if they are placed sequentially search for BI $LR of previous function) - if (!op || op == 0x35000000u || s_spu_itype.decode(op) == spu_itype::UNK) + if (!op || op == 0x35000000u || g_spu_itype.decode(op) == spu_itype::UNK) { if (is_exec_code(pc0, { all_data.data(), SPU_LS_SIZE })) break; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 9596f7b006..889d6f291c 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -630,7 +630,7 @@ public: virtual void dump_regs(std::string&, std::any& custom_data) const override; virtual std::string dump_callstack() const override; virtual std::vector> dump_callstack_list() const override; - virtual std::string dump_misc() const override; + virtual void dump_misc(std::string& ret, std::any& custom_data) const override; virtual void cpu_task() override final; virtual void cpu_on_stop() override; virtual void cpu_return() override; diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 3b923e8c41..dbe11039e7 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -2219,6 +2219,28 @@ void lv2_obj::prepare_for_sleep(cpu_thread& cpu) cpu_counter::remove(&cpu); } +ppu_thread* lv2_obj::get_running_ppu(u32 index) +{ + usz thread_count = g_cfg.core.ppu_threads; + + if (index >= thread_count) + { + return nullptr; + } + + auto target = atomic_storage::load(g_ppu); + + for (usz cur = 0; target; target = atomic_storage::load(target->next_ppu), cur++) + { + if (cur == index) + { + return target; + } + } + + return nullptr; +} + void lv2_obj::notify_all() noexcept { for (auto cpu : g_to_notify) diff --git a/rpcs3/Emu/Cell/lv2/sys_cond.cpp b/rpcs3/Emu/Cell/lv2/sys_cond.cpp index 401ed3bd66..f66146ae01 100644 --- a/rpcs3/Emu/Cell/lv2/sys_cond.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_cond.cpp @@ -59,6 +59,7 @@ CellError lv2_cond::on_id_create() if (!mutex) { _mutex = static_cast>(ensure(idm::get_unlocked(mtx_id))); + mutex = static_cast(_mutex.get()); } // Defer function diff --git a/rpcs3/Emu/Cell/lv2/sys_config.cpp b/rpcs3/Emu/Cell/lv2/sys_config.cpp index 0179a6b7f2..54cb2d6ce8 100644 --- a/rpcs3/Emu/Cell/lv2/sys_config.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_config.cpp @@ -174,20 +174,28 @@ bool lv2_config_service_listener::check_service(const lv2_config_service& servic return true; } -bool lv2_config_service_listener::notify(const shared_ptr& event) -{ - service_events.emplace_back(event); - return event->notify(); -} - bool lv2_config_service_listener::notify(const shared_ptr& service) { - if (!check_service(*service)) - return false; + { + std::lock_guard lock(mutex_service_events); - // Create service event and notify queue! - const auto event = lv2_config_service_event::create(handle, service, *this); - return notify(event); + if (!check_service(*service)) + return false; + + // Create service event and notify queue! + const auto event = lv2_config_service_event::create(handle, service, *this); + service_events.emplace_back(event); + + if (!event->notify()) + { + // If we fail to deliver the event to the queue just clean the event up or it'll hold the listener alive forever + g_fxo->get().remove_service_event(event->id); + service_events.pop_back(); + return false; + } + } + + return true; } void lv2_config_service_listener::notify_all() @@ -267,7 +275,7 @@ void lv2_config_service_event::write(sys_config_service_event_t *dst) const { const auto registered = service->is_registered(); - dst->service_listener_handle = listener.get_id(); + dst->service_listener_handle = listener_id; dst->registered = registered; dst->service_id = service->id; dst->user_id = service->user_id; @@ -346,7 +354,7 @@ error_code sys_config_get_service_event(u32 config_hdl, u32 event_id, vm::ptrget().find_event(event_id); - if (!event) + if (!event || event->handle != cfg) { return CELL_ESRCH; } diff --git a/rpcs3/Emu/Cell/lv2/sys_config.h b/rpcs3/Emu/Cell/lv2/sys_config.h index 0804671141..aaf4c6f5d0 100644 --- a/rpcs3/Emu/Cell/lv2/sys_config.h +++ b/rpcs3/Emu/Cell/lv2/sys_config.h @@ -296,11 +296,10 @@ private: // The service listener owns the service events - service events will not be freed as long as their corresponding listener exists // This has been confirmed to be the case in realhw + shared_mutex mutex_service_events; std::vector> service_events; shared_ptr handle; - bool notify(const shared_ptr& event); - public: const sys_config_service_id service_id; const u64 min_verbosity; @@ -370,14 +369,14 @@ public: // This has been confirmed to be the case in realhw const shared_ptr handle; const shared_ptr service; - const lv2_config_service_listener& listener; + const u32 listener_id; // Constructors (should not be used directly) lv2_config_service_event(shared_ptr _handle, shared_ptr _service, const lv2_config_service_listener& _listener) noexcept : id(get_next_id()) , handle(std::move(_handle)) , service(std::move(_service)) - , listener(_listener) + , listener_id(_listener.get_id()) { } diff --git a/rpcs3/Emu/Cell/lv2/sys_dbg.cpp b/rpcs3/Emu/Cell/lv2/sys_dbg.cpp index 27fa51b148..eb04cff61b 100644 --- a/rpcs3/Emu/Cell/lv2/sys_dbg.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_dbg.cpp @@ -105,7 +105,7 @@ error_code sys_dbg_write_process_memory(s32 pid, u32 address, u32 size, vm::cptr i += op_size; } - if (!is_exec || i >= end) + if ((!is_exec || i >= end) && exec_update_size > 0) { // Commit executable data update // The read memory is also super ptr so memmove can work correctly on all implementations diff --git a/rpcs3/Emu/Cell/lv2/sys_event.cpp b/rpcs3/Emu/Cell/lv2/sys_event.cpp index 8b3de14bf7..c5fa15cf55 100644 --- a/rpcs3/Emu/Cell/lv2/sys_event.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_event.cpp @@ -170,8 +170,7 @@ CellError lv2_event_queue::send(lv2_event event, bool* notified_thread, lv2_even { if (auto cpu = get_current_cpu_thread()) { - cpu->state += cpu_flag::again; - cpu->state += cpu_flag::exit; + cpu->state += cpu_flag::again + cpu_flag::exit; } sys_event.warning("Ignored event!"); @@ -309,6 +308,15 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode) return CELL_EBUSY; } + for (auto cpu = head; cpu; cpu = cpu->get_next_cpu()) + { + if (cpu->state & cpu_flag::again) + { + ppu.state += cpu_flag::again; + return CELL_EAGAIN; + } + } + if (!queue.events.empty()) { // Copy events for logging, does not empty @@ -321,17 +329,6 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode) { qlock.unlock(); } - else - { - for (auto cpu = head; cpu; cpu = cpu->get_next_cpu()) - { - if (cpu->state & cpu_flag::again) - { - ppu.state += cpu_flag::again; - return CELL_EAGAIN; - } - } - } return {}; }); @@ -621,7 +618,7 @@ error_code sys_event_port_create(cpu_thread& cpu, vm::ptr eport_id, s32 por sys_event.warning("sys_event_port_create(eport_id=*0x%x, port_type=%d, name=0x%llx)", eport_id, port_type, name); - if (port_type != SYS_EVENT_PORT_LOCAL && port_type != 3) + if (port_type != SYS_EVENT_PORT_LOCAL && port_type != SYS_EVENT_PORT_IPC) { sys_event.error("sys_event_port_create(): unknown port type (%d)", port_type); return CELL_EINVAL; @@ -675,8 +672,9 @@ error_code sys_event_port_connect_local(cpu_thread& cpu, u32 eport_id, u32 equeu std::lock_guard lock(id_manager::g_mutex); const auto port = idm::check_unlocked(eport_id); + auto queue = idm::get_unlocked(equeue_id); - if (!port || !idm::check_unlocked(equeue_id)) + if (!port || !queue) { return CELL_ESRCH; } @@ -691,7 +689,7 @@ error_code sys_event_port_connect_local(cpu_thread& cpu, u32 eport_id, u32 equeu return CELL_EISCONN; } - port->queue = idm::get_unlocked(equeue_id); + port->queue = std::move(queue); return CELL_OK; } diff --git a/rpcs3/Emu/Cell/lv2/sys_event.h b/rpcs3/Emu/Cell/lv2/sys_event.h index 6c43798a30..8364361e6c 100644 --- a/rpcs3/Emu/Cell/lv2/sys_event.h +++ b/rpcs3/Emu/Cell/lv2/sys_event.h @@ -7,7 +7,6 @@ #include class cpu_thread; -class spu_thrread; // Event Queue Type enum : u32 diff --git a/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp b/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp index c28efaf711..89a6c42ac5 100644 --- a/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp @@ -19,6 +19,22 @@ lv2_event_flag::lv2_event_flag(utils::serial& ar) ar(pattern); } +// Always set result +struct sys_event_store_result +{ + vm::ptr ptr; + u64 val = 0; + + ~sys_event_store_result() noexcept + { + if (ptr) + { + cpu_thread::get_current()->check_state(); + *ptr = val; + } + } +}; + std::function lv2_event_flag::load(utils::serial& ar) { return load_func(make_shared(stx::exact_t(ar))); @@ -120,21 +136,7 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm ppu.gpr[5] = mode; ppu.gpr[6] = 0; - // Always set result - struct store_result - { - vm::ptr ptr; - u64 val = 0; - - ~store_result() noexcept - { - if (ptr) - { - cpu_thread::get_current()->check_state(); - *ptr = val; - } - } - } store{result}; + sys_event_store_result store{result}; if (!lv2_event_flag::check_mode(mode)) { @@ -273,21 +275,7 @@ error_code sys_event_flag_trywait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, sys_event_flag.trace("sys_event_flag_trywait(id=0x%x, bitptn=0x%llx, mode=0x%x, result=*0x%x)", id, bitptn, mode, result); - // Always set result - struct store_result - { - vm::ptr ptr; - u64 val = 0; - - ~store_result() noexcept - { - if (ptr) - { - cpu_thread::get_current()->check_state(); - *ptr = val; - } - } - } store{result}; + sys_event_store_result store{result}; if (!lv2_event_flag::check_mode(mode)) { @@ -556,8 +544,6 @@ error_code sys_event_flag_get(ppu_thread& ppu, u32 id, vm::ptr flags) return +flag.pattern; }); - ppu.check_state(); - if (!flag) { if (flags) *flags = 0; @@ -569,6 +555,8 @@ error_code sys_event_flag_get(ppu_thread& ppu, u32 id, vm::ptr flags) return CELL_EFAULT; } + ppu.check_state(); + *flags = flag.ret; return CELL_OK; } diff --git a/rpcs3/Emu/Cell/lv2/sys_fs.cpp b/rpcs3/Emu/Cell/lv2/sys_fs.cpp index 961d0d6ad3..cfbab23419 100644 --- a/rpcs3/Emu/Cell/lv2/sys_fs.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_fs.cpp @@ -14,7 +14,6 @@ #include "Emu/system_utils.hpp" #include "Emu/Cell/lv2/sys_process.h" -#include #include #include @@ -93,15 +92,22 @@ void fmt_class_string::format(std::string& out, u64 arg) bool has_fs_write_rights(std::string_view vpath) { // VSH has access to everything - if (g_ps3_process_info.has_root_perm()) - return true; + const bool has_root_perm = g_ps3_process_info.has_root_perm(); - const auto norm_vpath = lv2_fs_object::get_normalized_path(vpath); - const auto parent_dir = fs::get_parent_dir_view(norm_vpath); + const auto parent_dir = fs::get_parent_dir_view(vpath); + const auto [dev_root, trail] = lv2_fs_object::get_path_root_and_trail(parent_dir); // This is not exhaustive, PS3 has a unix filesystem with rights for each directory and files - // This is mostly meant to protect against games doing insane things(ie NPUB30003 => NPUB30008) - if (parent_dir == "/dev_hdd0" || parent_dir == "/dev_hdd0/game") + // This is mostly meant to protect against games doing insane things (ie NPUB30003 => NPUB30008) + if (dev_root == "dev_hdd0"sv && (trail.empty() || trail == "game"sv)) + return has_root_perm; + + // This is read-only for games + if (dev_root.starts_with("dev_flash"sv)) + return has_root_perm; + + // Technically should not reach here, but handle it anyways + if (dev_root == "dev_bdvd"sv || dev_root == "dev_ps2disc"sv || dev_root.empty()) return false; return true; @@ -140,6 +146,32 @@ bool verify_mself(const fs::file& mself_file) return true; } +// TODO: May not be thread-safe (or even, process-safe) +bool has_non_directory_components(std::string_view path) +{ + std::string path0{path}; + + while (true) + { + const std::string sub_path = fs::get_parent_dir(path0); + + if (sub_path.size() >= path0.size()) + { + break; + } + + fs::stat_t stat{}; + if (fs::get_stat(sub_path, stat)) + { + return !stat.is_directory; + } + + path0 = std::move(sub_path); + } + + return false; +} + lv2_fs_mount_info_map::lv2_fs_mount_info_map() { for (auto mp = &g_mp_sys_dev_root; mp; mp = mp->next) // Scan and keep track of pre-mounted devices @@ -179,27 +211,29 @@ bool lv2_fs_mount_info_map::remove(std::string_view path) const lv2_fs_mount_info& lv2_fs_mount_info_map::lookup(std::string_view path, bool no_cell_fs_path, std::string* mount_path) const { - if (path.starts_with("/"sv)) + const auto [dev_root, trail] = lv2_fs_object::get_path_root_and_trail(path); + + if (dev_root.empty()) + { + if (trail.empty()) + { + return map.find("/")->second; + } + + return g_mi_sys_not_found; + } + + if (const auto iterator = map.find("/" + std::string{dev_root}); iterator != map.end()) { constexpr std::string_view cell_fs_path = "CELL_FS_PATH:"sv; - const std::string normalized_path = lv2_fs_object::get_normalized_path(path); - std::string_view parent_dir; - u32 parent_level = 0; - do - { - parent_dir = fs::get_parent_dir_view(normalized_path, parent_level++); - if (const auto iterator = map.find(parent_dir); iterator != map.end()) - { - if (iterator->second == &g_mp_sys_dev_root && parent_level > 1) - break; - if (no_cell_fs_path && iterator->second.device.starts_with(cell_fs_path)) - return lookup(iterator->second.device.substr(cell_fs_path.size()), no_cell_fs_path, mount_path); // Recursively look up the parent mount info - if (mount_path) - *mount_path = iterator->first; - return iterator->second; - } - } while (parent_dir.length() > 1); // Exit the loop when parent_dir == "/" or empty + if (no_cell_fs_path && iterator->second.device.starts_with(cell_fs_path)) + return lookup(iterator->second.device.substr(cell_fs_path.size()), no_cell_fs_path, mount_path); // Recursively look up the parent mount info + + if (mount_path) + *mount_path = iterator->first; + + return iterator->second; } return g_mi_sys_not_found; @@ -261,36 +295,89 @@ bool lv2_fs_mount_info_map::vfs_unmount(std::string_view vpath, bool remove_from return result; } -std::string lv2_fs_object::get_normalized_path(std::string_view path) +std::pair lv2_fs_object::get_path_root_and_trail(std::string_view filename) { - std::string normalized_path = std::filesystem::path(path).lexically_normal().string(); - -#ifdef _WIN32 - std::replace(normalized_path.begin(), normalized_path.end(), '\\', '/'); -#endif - - if (normalized_path.ends_with('/')) - normalized_path.pop_back(); - - return normalized_path.empty() ? "/" : normalized_path; -} - -std::string lv2_fs_object::get_device_root(std::string_view filename) -{ - std::string path = get_normalized_path(filename); // Prevent getting fooled by ".." trick such as "/dev_usb000/../dev_flash" - - if (const auto first = path.find_first_not_of("/"sv); first != umax) + if (filename.empty()) { - if (const auto pos = path.substr(first).find_first_of("/"sv); pos != umax) - path = path.substr(0, first + pos); - path = path.substr(std::max>(0, first - 1)); // Remove duplicate leading '/' while keeping only one - } - else - { - path = path.substr(0, 1); + // Should CELL_ENOENT later - root cannot have a trail + return {""sv, "ENOENT"}; } - return path; + std::string_view root; + std::string trail; + + usz level = 0; + usz pos = 0; + + while (pos != umax) + { + const usz ndl_pos = filename.find_first_not_of("/", pos); + + if (ndl_pos == pos) + { + // Should CELL_ENOENT later - root cannot have a trail + return {""sv, "ENOENT"}; + } + + if (ndl_pos == umax) + { + break; + } + + const usz dl_pos = ndl_pos == umax ? usz{umax} : filename.find_first_of("/", ndl_pos); + std::string_view component = filename.substr(ndl_pos, dl_pos - ndl_pos); + + if (component == "."sv) + { + // No change + // level += 0; + pos = dl_pos; + continue; + } + + if (component == ".."sv) + { + if (level > 1) + { + ensure(!trail.empty()); + trail.resize(trail.find_last_of("/") + 1); + trail.resize(trail.find_last_not_of("/") + 1); + } + else if (level == 1) + { + // Reset root + root = {}; + } + else//if (level == 0) + { + // Should CELL_ENOENT later - root cannot have a trail + return {""sv, "ENOENT"}; + } + + ensure(level)--; + pos = dl_pos; + continue; + } + + if (level == 0) + { + root = component; + } + else if (trail.empty()) + { + trail = std::string{component}; + } + else + { + trail += "/"; + trail.append(component); + } + + level++; + pos = dl_pos; + } + + return { root, std::move(trail) }; } lv2_fs_mount_point* lv2_fs_object::get_mp(std::string_view filename, std::string* vfs_path) @@ -302,7 +389,7 @@ lv2_fs_mount_point* lv2_fs_object::get_mp(std::string_view filename, std::string filename.remove_prefix(cell_fs_path.size()); const bool is_path = filename.starts_with("/"sv); - std::string mp_name = is_path ? get_device_root(filename) : std::string(filename); + std::string mp_name = is_path ? std::string{get_path_root_and_trail(filename).first} : std::string(filename); const auto check_mp = [&]() { @@ -382,10 +469,12 @@ lv2_fs_object::lv2_fs_object(utils::serial& ar, bool) u64 lv2_file::op_read(const fs::file& file, vm::ptr buf, u64 size, u64 opt_pos) { - if (u64 region = buf.addr() >> 28, region_end = (buf.addr() & 0xfff'ffff) + (size & 0xfff'ffff); region == region_end && ((region >> 28) == 0 || region >= 0xC)) + if (u64 region = buf.addr() >> 28, region_end = (buf.addr() + size) >> 28; + size < u32{umax} && region == region_end && (region == 0 || region == 0xD) && vm::check_addr(buf.addr(), vm::page_writable, static_cast(size))) { // Optimize reads from safe memory - return (opt_pos == umax ? file.read(buf.get_ptr(), size) : file.read_at(opt_pos, buf.get_ptr(), size)); + const auto buf_ptr = vm::get_super_ptr(buf.addr()); + return (opt_pos == umax ? file.read(buf_ptr, size) : file.read_at(opt_pos, buf_ptr, size)); } // Copy data from intermediate buffer (avoid passing vm pointer to a native API) @@ -412,6 +501,14 @@ u64 lv2_file::op_read(const fs::file& file, vm::ptr buf, u64 size, u64 opt u64 lv2_file::op_write(const fs::file& file, vm::cptr buf, u64 size) { + if (u64 region = buf.addr() >> 28, region_end = (buf.addr() + size) >> 28; + size < u32{umax} && region == region_end && (region == 0 || region == 0xD) && vm::check_addr(buf.addr(), vm::page_readable, static_cast(size))) + { + // Optimize writes from safe memory + const auto buf_ptr = vm::get_super_ptr(buf.addr()); + return file.write(buf_ptr, size); + } + // Copy data to intermediate buffer (avoid passing vm pointer to a native API) std::vector local_buf(std::min(size, 65536)); @@ -889,11 +986,18 @@ lv2_file::open_raw_result_t lv2_file::open_raw(const std::string& local_path, s3 switch (auto error = fs::g_tls_error) { + case fs::error::notdir: return {CELL_ENOTDIR}; case fs::error::noent: return {CELL_ENOENT}; - default: sys_fs.error("lv2_file::open(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_path)) + { + return {CELL_ENOTDIR}; + } - return {CELL_EIO}; + fmt::throw_exception("unknown error %s", error); + } + } } if (flags & CELL_FS_O_MSELF && !verify_mself(file)) @@ -1362,10 +1466,18 @@ error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr path, vm::ptr fd) break; } + case fs::error::notdir: + { + return { CELL_ENOTDIR, path }; + } default: { - sys_fs.error("sys_fs_opendir(): unknown error %s", error); - return {CELL_EIO, path}; + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } + + fmt::throw_exception("unknown error %s", error); } } } @@ -1391,7 +1503,8 @@ error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr path, vm::ptr fd) // Add additional entries for split file candidates (while ends with .66600) while (mp.mp != &g_mp_sys_dev_hdd1 && data.back().name.ends_with(".66600")) { - data.emplace_back(data.back()).name.resize(data.back().name.size() - 6); + fs::dir_entry copy = data.back(); + data.emplace_back(copy).name.resize(copy.name.size() - 6); } } @@ -1547,6 +1660,10 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path}; + } case fs::error::noent: { // Try to analyse split file (TODO) @@ -1586,8 +1703,12 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr } default: { - sys_fs.error("sys_fs_stat(): unknown error %s", error); - return {CELL_EIO, path}; + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } + + fmt::throw_exception("unknown error %s", error); } } } @@ -1713,6 +1834,10 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path}; + } case fs::error::noent: { return {mp == &g_mp_sys_dev_hdd1 ? sys_fs.warning : sys_fs.error, CELL_ENOENT, path}; @@ -1721,10 +1846,16 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) { return {sys_fs.warning, CELL_EEXIST, path}; } - default: sys_fs.error("sys_fs_mkdir(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } - return {CELL_EIO, path}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } sys_fs.notice("sys_fs_mkdir(): directory %s created", path); @@ -1784,12 +1915,19 @@ error_code sys_fs_rename(ppu_thread& ppu, vm::cptr from, vm::cptr to { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: return {CELL_ENOTDIR, from}; case fs::error::noent: return {CELL_ENOENT, from}; case fs::error::exist: return {CELL_EEXIST, to}; - default: sys_fs.error("sys_fs_rename(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_from)) + { + return {CELL_ENOTDIR, from}; + } - return {CELL_EIO, from}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } sys_fs.notice("sys_fs_rename(): %s renamed to %s", from, to); @@ -1839,12 +1977,19 @@ error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr path) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: return {CELL_ENOTDIR, path}; case fs::error::noent: return {CELL_ENOENT, path}; case fs::error::notempty: return {CELL_ENOTEMPTY, path}; - default: sys_fs.error("sys_fs_rmdir(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } - return {CELL_EIO, path}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } sys_fs.notice("sys_fs_rmdir(): directory %s removed", path); @@ -1895,14 +2040,24 @@ error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr path) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path }; + } case fs::error::noent: { return {mp == &g_mp_sys_dev_hdd1 ? sys_fs.warning : sys_fs.error, CELL_ENOENT, path}; } - default: sys_fs.error("sys_fs_unlink(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } - return {CELL_EIO, path}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } sys_fs.notice("sys_fs_unlink(): file %s deleted", path); @@ -2147,6 +2302,7 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 sys_fs.notice("sys_fs_fcntl(0xc0000006): %s", vpath); // Check only mountpoint + vpath = vpath.substr(0, vpath.find_first_of('\0')); vpath = vpath.substr(0, vpath.find_first_of("/", 1)); // Some mountpoints seem to be handled specially @@ -2620,10 +2776,8 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr switch (auto error = fs::g_tls_error) { case fs::error::inval: return {CELL_EINVAL, "fd=%u, offset=0x%x, whence=%d", fd, offset, whence}; - default: sys_fs.error("sys_fs_lseek(): unknown error %s", error); + default: fmt::throw_exception("unknown error %s", error); } - - return CELL_EIO; // ??? } lock.unlock(); @@ -2635,8 +2789,6 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) { - lv2_obj::sleep(ppu); - sys_fs.trace("sys_fs_fdadasync(fd=%d)", fd); const auto file = idm::get_unlocked(fd); @@ -2661,8 +2813,6 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) error_code sys_fs_fsync(ppu_thread& ppu, u32 fd) { - lv2_obj::sleep(ppu); - sys_fs.trace("sys_fs_fsync(fd=%d)", fd); const auto file = idm::get_unlocked(fd); @@ -2743,10 +2893,16 @@ error_code sys_fs_get_block_size(ppu_thread& ppu, vm::cptr path, vm::ptr(ppu.test_stopped()); @@ -2797,14 +2953,24 @@ error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr path, u64 size) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path}; + } case fs::error::noent: { return {mp == &g_mp_sys_dev_hdd1 ? sys_fs.warning : sys_fs.error, CELL_ENOENT, path}; } - default: sys_fs.error("sys_fs_truncate(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } - return {CELL_EIO, path}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } return CELL_OK; @@ -2850,10 +3016,11 @@ error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size) switch (auto error = fs::g_tls_error) { case fs::error::ok: - default: sys_fs.error("sys_fs_ftruncate(): unknown error %s", error); + default: + { + fmt::throw_exception("unknown error %s", error); + } } - - return CELL_EIO; // ??? } return CELL_OK; @@ -2899,18 +3066,14 @@ error_code sys_fs_chmod(ppu_thread&, vm::cptr path, s32 mode) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path}; + } case fs::error::noent: { // Try to locate split files - for (u32 i = 66601; i <= 66699; i++) - { - if (mp != &g_mp_sys_dev_hdd1 && !fs::get_stat(fmt::format("%s.%u", local_path, i), info) && !info.is_directory) - { - break; - } - } - if (fs::get_stat(local_path + ".66600", info) && !info.is_directory) { break; @@ -2920,8 +3083,12 @@ error_code sys_fs_chmod(ppu_thread&, vm::cptr path, s32 mode) } default: { - sys_fs.error("sys_fs_chmod(): unknown error %s", error); - return {CELL_EIO, path}; + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } + + fmt::throw_exception("unknown error %s", error); } } } @@ -3053,14 +3220,24 @@ error_code sys_fs_utime(ppu_thread& ppu, vm::cptr path, vm::cptr dev_name, vm::cptr return {path_error, path_sv}; } - const std::string vpath = lv2_fs_object::get_normalized_path(path_sv); + const auto [root_name, trail] = lv2_fs_object::get_path_root_and_trail(path_sv); std::string vfs_path; const auto mp = lv2_fs_object::get_mp(device_name, &vfs_path); @@ -3304,8 +3481,8 @@ error_code sys_fs_mount(ppu_thread& ppu, vm::cptr dev_name, vm::cptr if (vfs_path.empty()) return {CELL_ENOTSUP, device_name}; - if (vpath.find_first_not_of('/') == umax || !vfs::get(vpath).empty()) - return {CELL_EEXIST, vpath}; + if (root_name.empty() || !vfs::get(path_sv).empty()) + return {CELL_EEXIST, path_sv}; if (mp == &g_mp_sys_dev_hdd1) { @@ -3340,7 +3517,7 @@ error_code sys_fs_mount(ppu_thread& ppu, vm::cptr dev_name, vm::cptr } } - if (!vfs::mount(vpath, vfs_path, !is_simplefs)) + if (!vfs::mount("/" + std::string{root_name}, vfs_path, !is_simplefs)) { if (is_simplefs) { @@ -3357,7 +3534,7 @@ error_code sys_fs_mount(ppu_thread& ppu, vm::cptr dev_name, vm::cptr return CELL_EIO; } - g_fxo->get().add(vpath, mp, device_name, filesystem, prot); + g_fxo->get().add("/" + std::string{root_name}, mp, device_name, filesystem, prot); return CELL_OK; } diff --git a/rpcs3/Emu/Cell/lv2/sys_fs.h b/rpcs3/Emu/Cell/lv2/sys_fs.h index e64a2b4edb..c78ad7b5a2 100644 --- a/rpcs3/Emu/Cell/lv2/sys_fs.h +++ b/rpcs3/Emu/Cell/lv2/sys_fs.h @@ -4,6 +4,7 @@ #include "Emu/Cell/ErrorCodes.h" #include "Utilities/File.h" #include "Utilities/StrUtil.h" +#include "Utilities/mutex.h" #include @@ -245,11 +246,15 @@ public: lv2_fs_object& operator=(const lv2_fs_object&) = delete; - // Normalize a virtual path - static std::string get_normalized_path(std::string_view path); + // Get the device's root path (e.g. "/dev_hdd0") from a given path + // Cut the trail and return it in seccond argument + static std::pair get_path_root_and_trail(std::string_view path); // Get the device's root path (e.g. "/dev_hdd0") from a given path - static std::string get_device_root(std::string_view filename); + static std::string get_device_root(std::string_view filename) + { + return std::string{get_path_root_and_trail(filename).first}; + } // Filename can be either a path starting with '/' or a CELL_FS device name // This should be used only when handling devices that are not mounted diff --git a/rpcs3/Emu/Cell/lv2/sys_gamepad.cpp b/rpcs3/Emu/Cell/lv2/sys_gamepad.cpp index 26ffbc2696..55fd7cb885 100644 --- a/rpcs3/Emu/Cell/lv2/sys_gamepad.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_gamepad.cpp @@ -67,7 +67,6 @@ u32 sys_gamepad_ycon_is_gem(vm::ptr in, vm::ptr out) // syscall(621,packet_id,u8 *in,u8 *out) Talk:LV2_Functions_and_Syscalls#Syscall_621_.280x26D.29 gamepad_if usage u32 sys_gamepad_ycon_if(u8 packet_id, vm::ptr in, vm::ptr out) { - switch (packet_id) { case 0: diff --git a/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp b/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp index 43bfb308ba..ebe4dfc07c 100644 --- a/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp @@ -487,6 +487,8 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id { ensure(cond.unqueue(cond.sq, &ppu)); ppu.state += cpu_flag::again; + cond.lwmutex_waiters--; + mutex->lwcond_waiters--; return; } diff --git a/rpcs3/Emu/Cell/lv2/sys_memory.cpp b/rpcs3/Emu/Cell/lv2/sys_memory.cpp index 6110d7d4d5..0f6fb09865 100644 --- a/rpcs3/Emu/Cell/lv2/sys_memory.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_memory.cpp @@ -15,6 +15,18 @@ LOG_CHANNEL(sys_memory); // static shared_mutex s_memstats_mtx; +// This struct is for reduced logging repetition +struct last_reported_memory_stats +{ + struct inner_body + { + u32 prev_total = umax; + u32 prev_avail = umax; + }; + + atomic_t body{}; +}; + lv2_memory_container::lv2_memory_container(u32 size, bool from_idm) noexcept : size(size) , id{from_idm ? idm::last_id() : SYS_MEMORY_CONTAINER_ID_INVALID} @@ -313,8 +325,6 @@ error_code sys_memory_get_user_memory_size(cpu_thread& cpu, vm::ptrget(); @@ -332,6 +342,22 @@ error_code sys_memory_get_user_memory_size(cpu_thread& cpu, vm::ptrget().body.exchange(now); + + if (now.prev_total != out.total_user_memory || now.prev_avail != out.available_user_memory) + { + // Log on change + sys_memory.warning("sys_memory_get_user_memory_size(mem_info=*0x%x): Avail=0x%x, Total=0x%x", mem_info, out.available_user_memory, out.total_user_memory); + } + else + { + sys_memory.trace("sys_memory_get_user_memory_size(mem_info=*0x%x): Avail=0x%x, Total=0x%x", mem_info, out.available_user_memory, out.total_user_memory); + } + cpu.check_state(); *mem_info = out; return CELL_OK; diff --git a/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp b/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp index d7b66adfd8..0723564a5e 100644 --- a/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp @@ -333,7 +333,7 @@ error_code sys_mmapper_allocate_shared_memory_ext(ppu_thread& ppu, u64 ipc_key, } } - if (flags & ~SYS_MEMORY_PAGE_SIZE_MASK) + if (flags & ~SYS_MEMORY_GRANULARITY_MASK) { return CELL_EINVAL; } @@ -401,6 +401,11 @@ error_code sys_mmapper_allocate_shared_memory_from_container_ext(ppu_thread& ppu sys_mmapper.todo("sys_mmapper_allocate_shared_memory_from_container_ext(ipc_key=0x%x, size=0x%x, flags=0x%x, cid=0x%x, entries=*0x%x, entry_count=0x%x, mem_id=*0x%x)", ipc_key, size, flags, cid, entries, entry_count, mem_id); + if (size == 0) + { + return CELL_EALIGN; + } + switch (flags & SYS_MEMORY_PAGE_SIZE_MASK) { case SYS_MEMORY_PAGE_SIZE_1M: @@ -546,8 +551,7 @@ error_code sys_mmapper_free_address(ppu_thread& ppu, u32 addr) // If a memory block is freed, remove it from page notification table. auto& pf_entries = g_fxo->get(); - std::lock_guard lock(pf_entries.mutex); - + std::unique_lock lock(pf_entries.mutex); auto ind_to_remove = pf_entries.entries.begin(); for (; ind_to_remove != pf_entries.entries.end(); ++ind_to_remove) { @@ -558,7 +562,11 @@ error_code sys_mmapper_free_address(ppu_thread& ppu, u32 addr) } if (ind_to_remove != pf_entries.entries.end()) { + u32 port_id = ind_to_remove->port_id; pf_entries.entries.erase(ind_to_remove); + lock.unlock(); + sys_event_port_disconnect(ppu, port_id); + sys_event_port_destroy(ppu, port_id); } return CELL_OK; @@ -826,7 +834,6 @@ error_code sys_mmapper_enable_page_fault_notification(ppu_thread& ppu, u32 start vm::var port_id(0); error_code res = sys_event_port_create(ppu, port_id, SYS_EVENT_PORT_LOCAL, SYS_MEMORY_PAGE_FAULT_EVENT_KEY); - sys_event_port_connect_local(ppu, *port_id, event_queue_id); if (res + 0u == CELL_EAGAIN) { @@ -834,6 +841,8 @@ error_code sys_mmapper_enable_page_fault_notification(ppu_thread& ppu, u32 start return CELL_EAGAIN; } + sys_event_port_connect_local(ppu, *port_id, event_queue_id); + auto& pf_entries = g_fxo->get(); std::unique_lock lock(pf_entries.mutex); diff --git a/rpcs3/Emu/Cell/lv2/sys_mutex.cpp b/rpcs3/Emu/Cell/lv2/sys_mutex.cpp index e6c96ffd64..9f436ae4d3 100644 --- a/rpcs3/Emu/Cell/lv2/sys_mutex.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_mutex.cpp @@ -85,7 +85,7 @@ error_code sys_mutex_create(ppu_thread& ppu, vm::ptr mutex_id, vm::ptr(_attr.pshared, _attr.ipc_key, _attr.flags, [&]() + if (auto error = lv2_obj::create(_attr.pshared, ipc_key, _attr.flags, [&]() { return make_shared( _attr.protocol, diff --git a/rpcs3/Emu/Cell/lv2/sys_mutex.h b/rpcs3/Emu/Cell/lv2/sys_mutex.h index f82f913399..c4fe04ce2a 100644 --- a/rpcs3/Emu/Cell/lv2/sys_mutex.h +++ b/rpcs3/Emu/Cell/lv2/sys_mutex.h @@ -173,7 +173,11 @@ struct lv2_mutex final : lv2_obj if (sq == data.sq) { - atomic_storage::release(control.raw().owner, res->id); + if (cpu_flag::again - res->state) + { + atomic_storage::release(control.raw().owner, res->id); + } + return false; } diff --git a/rpcs3/Emu/Cell/lv2/sys_net.cpp b/rpcs3/Emu/Cell/lv2/sys_net.cpp index 827a4c98f2..a860d43b12 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net.cpp @@ -563,37 +563,34 @@ error_code sys_net_bnet_connect(ppu_thread& ppu, s32 s, vm::ptr(-SYS_NET_EINTR)) + if (state & cpu_flag::signal) { - return -SYS_NET_EINTR; + break; } - if (result) - { - if (result < 0) - { - return sys_net_error{result}; - } + ppu.state.wait(state); + } - return not_an_error(result); + if (ppu.gpr[3] == static_cast(-SYS_NET_EINTR)) + { + return -SYS_NET_EINTR; + } + + if (result) + { + if (result < 0) + { + return sys_net_error{result}; } + + return not_an_error(result); } return CELL_OK; @@ -992,7 +989,7 @@ error_code sys_net_bnet_sendto(ppu_thread& ppu, s32 s, vm::cptr buf, u32 l fmt::throw_exception("sys_net_bnet_sendto(s=%d): unknown flags (0x%x)", flags); } - if (addr && addrlen < 8) + if (addr && addrlen < sizeof(sys_net_sockaddr)) { sys_net.error("sys_net_bnet_sendto(s=%d): bad addrlen (%u)", s, addrlen); return -SYS_NET_EINVAL; @@ -1295,7 +1292,7 @@ error_code sys_net_bnet_poll(ppu_thread& ppu, vm::ptr fds, s32 n if (auto sock = idm::check_unlocked(fds_buf[i].fd)) { - signaled += sock->poll(fds_buf[i], _fds[i]); + sock->poll(fds_buf[i], _fds[i]); #ifdef _WIN32 connecting[i] = sock->is_connecting(); #endif @@ -1303,7 +1300,6 @@ error_code sys_net_bnet_poll(ppu_thread& ppu, vm::ptr fds, s32 n else { fds_buf[i].revents |= SYS_NET_POLLNVAL; - signaled++; } } @@ -1536,9 +1532,9 @@ error_code sys_net_bnet_select(ppu_thread& ppu, s32 nfds, vm::ptr select(bs_t selected, pollfd& native_pfd) = 0; error_code abort_socket(s32 flags); diff --git a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_native.cpp b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_native.cpp index d420f23cc8..b422db814b 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_native.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_native.cpp @@ -551,12 +551,14 @@ std::tuple lv2_socket_native::getsockopt(s32 } case SYS_NET_IP_TTLCHK: { - sys_net.error("sys_net_bnet_getsockopt(IPPROTO_IP, SYS_NET_IP_TTLCHK): stubbed option"); + out_val._int = min_ttl; + out_len = sizeof(s32); return {CELL_OK, out_val, out_len}; } case SYS_NET_IP_MAXTTL: { - sys_net.error("sys_net_bnet_getsockopt(IPPROTO_IP, SYS_NET_IP_MAXTTL): stubbed option"); + out_val._int = max_ttl; + out_len = sizeof(s32); return {CELL_OK, out_val, out_len}; } case SYS_NET_IP_DONTFRAG: @@ -834,13 +836,13 @@ s32 lv2_socket_native::setsockopt(s32 level, s32 optname, const std::vector& } case SYS_NET_IP_TTLCHK: { - sys_net.error("sys_net_bnet_setsockopt(s=%d, IPPROTO_IP): Stubbed option (0x%x) (SYS_NET_IP_TTLCHK)", lv2_id, optname); - break; + min_ttl = native_int; + return {}; } case SYS_NET_IP_MAXTTL: { - sys_net.error("sys_net_bnet_setsockopt(s=%d, IPPROTO_IP): Stubbed option (0x%x) (SYS_NET_IP_MAXTTL)", lv2_id, optname); - break; + max_ttl = native_int; + return {}; } case SYS_NET_IP_DONTFRAG: { @@ -910,7 +912,7 @@ std::optional, sys_net_sockaddr>> lv2_socket_nat { auto& nph = g_fxo->get>(); const auto packet = dnshook.get_dns_packet(lv2_id); - ensure(packet.size() < len); + ensure(packet.size() <= len); memcpy(res_buf.data(), packet.data(), packet.size()); native_addr.ss_family = AF_INET; (reinterpret_cast<::sockaddr_in*>(&native_addr))->sin_port = std::bit_cast>(53); // htons(53) @@ -1069,18 +1071,20 @@ std::optional lv2_socket_native::sendmsg(s32 flags, const sys_net_msghdr& m return {-SYS_NET_ECONNRESET}; } + std::vector buf_copy; for (int i = 0; i < msg.msg_iovlen; i++) { auto iov_base = msg.msg_iov[i].iov_base; const u32 len = msg.msg_iov[i].iov_len; - const std::vector buf_copy(vm::_ptr(iov_base.addr()), vm::_ptr(iov_base.addr()) + len); + const auto* src = vm::_ptr(iov_base.addr()); + buf_copy.insert(buf_copy.end(), src, src + len); + } - native_result = ::send(native_socket, reinterpret_cast(buf_copy.data()), ::narrow(buf_copy.size()), native_flags); + native_result = ::send(native_socket, reinterpret_cast(buf_copy.data()), ::narrow(buf_copy.size()), native_flags); - if (native_result >= 0) - { - return {native_result}; - } + if (native_result >= 0) + { + return {native_result}; } result = get_last_error(!so_nbio && (flags & SYS_NET_MSG_DONTWAIT) == 0); @@ -1143,14 +1147,14 @@ s32 lv2_socket_native::shutdown(s32 how) return -get_last_error(false); } -s32 lv2_socket_native::poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) +void lv2_socket_native::poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) { // Check for fake packet for dns interceptions auto& dnshook = g_fxo->get(); if (sn_pfd.events & SYS_NET_POLLIN && dnshook.is_dns(sn_pfd.fd) && dnshook.is_dns_queue(sn_pfd.fd)) { sn_pfd.revents |= SYS_NET_POLLIN; - return 1; + return; } if (sn_pfd.events & ~(SYS_NET_POLLIN | SYS_NET_POLLOUT | SYS_NET_POLLERR)) { @@ -1167,8 +1171,6 @@ s32 lv2_socket_native::poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) { native_pfd.events |= POLLOUT; } - - return 0; } std::tuple lv2_socket_native::select(bs_t selected, pollfd& native_pfd) @@ -1232,16 +1234,16 @@ bool lv2_socket_native::is_socket_connected() return false; } - fd_set readfds, writefds; - struct timeval timeout{0, 0}; // Zero timeout + pollfd pfd{}; + pfd.fd = native_socket; + pfd.events = POLLIN | POLLOUT; - FD_ZERO(&readfds); - FD_ZERO(&writefds); - FD_SET(native_socket, &readfds); - FD_SET(native_socket, &writefds); - - // Use select to check for readability and writability - const int result = ::select(1, &readfds, &writefds, NULL, &timeout); + // Use poll to check for readability and writability +#ifdef _WIN32 + const int result = WSAPoll(&pfd, 1, 0); +#else + const int result = ::poll(&pfd, 1, 0); +#endif if (result < 0) { @@ -1250,5 +1252,5 @@ bool lv2_socket_native::is_socket_connected() } // Socket is connected if it's readable or writable - return FD_ISSET(native_socket, &readfds) || FD_ISSET(native_socket, &writefds); + return (pfd.revents & (POLLIN | POLLOUT)) != 0; } diff --git a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_native.h b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_native.h index cf07dfcb76..af9e6a57b7 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_native.h +++ b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_native.h @@ -50,7 +50,7 @@ public: std::optional sendto(s32 flags, const std::vector& buf, std::optional opt_sn_addr, bool is_lock = true) override; std::optional sendmsg(s32 flags, const sys_net_msghdr& msg, bool is_lock = true) override; - s32 poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) override; + void poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) override; std::tuple select(bs_t selected, pollfd& native_pfd) override; bool is_socket_connected(); @@ -70,6 +70,10 @@ private: s32 so_reuseaddr = 0; s32 so_reuseport = 0; #endif + // Those values come from FreeBSD + s32 min_ttl = 1; + s32 max_ttl = 64; + u16 bound_port = 0; bool feign_tcp_conn_failure = false; // Savestate load related }; diff --git a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.cpp b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.cpp index 67cefe3e3e..903752085b 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.cpp @@ -364,7 +364,7 @@ s32 lv2_socket_p2p::shutdown([[maybe_unused]] s32 how) return CELL_OK; } -s32 lv2_socket_p2p::poll(sys_net_pollfd& sn_pfd, [[maybe_unused]] pollfd& native_pfd) +void lv2_socket_p2p::poll(sys_net_pollfd& sn_pfd, [[maybe_unused]] pollfd& native_pfd) { std::lock_guard lock(mutex); ensure(vport); @@ -381,8 +381,6 @@ s32 lv2_socket_p2p::poll(sys_net_pollfd& sn_pfd, [[maybe_unused]] pollfd& native { sn_pfd.revents |= SYS_NET_POLLOUT; } - - return sn_pfd.revents ? 1 : 0; } std::tuple lv2_socket_p2p::select(bs_t selected, [[maybe_unused]] pollfd& native_pfd) diff --git a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.h b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.h index b8fadb3d53..ec6c1d8b31 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.h +++ b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2p.h @@ -30,7 +30,7 @@ public: void close() override; s32 shutdown(s32 how) override; - s32 poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) override; + void poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) override; std::tuple select(bs_t selected, pollfd& native_pfd) override; void handle_new_data(sys_net_sockaddr_in_p2p p2p_addr, std::vector p2p_data); diff --git a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2ps.cpp b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2ps.cpp index c0790a2e33..cb59557458 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2ps.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2ps.cpp @@ -112,7 +112,6 @@ public: // reply is late, increases rtt auto& msg = it->second; - const auto addr = msg.dst_addr.sin_addr.s_addr; rtt_info rtt = rtts[msg.sock_id]; // Only increases rtt once per loop(in case a big number of packets are sent at once) if (!rtt_increased.count(msg.sock_id)) @@ -120,7 +119,7 @@ public: rtt.num_retries += 1; // Increases current rtt by 10% rtt.rtt_time += (rtt.rtt_time / 10); - rtts[addr] = rtt; + rtts[msg.sock_id] = rtt; rtt_increased.emplace(msg.sock_id); } @@ -625,7 +624,7 @@ std::tuple, sys_net_sockaddr> lv2_socket_p2ps: sys_net_sockaddr ps3_addr{}; auto* paddr = reinterpret_cast(&ps3_addr); - lv2_socket_p2ps* sock_client = reinterpret_cast(idm::check_unlocked(p2ps_client)); + auto sock_client = static_cast>(idm::get_unlocked(p2ps_client)); { std::lock_guard lock(sock_client->mutex); paddr->sin_family = SYS_NET_AF_INET; @@ -986,7 +985,7 @@ s32 lv2_socket_p2ps::shutdown([[maybe_unused]] s32 how) return CELL_OK; } -s32 lv2_socket_p2ps::poll(sys_net_pollfd& sn_pfd, [[maybe_unused]] pollfd& native_pfd) +void lv2_socket_p2ps::poll(sys_net_pollfd& sn_pfd, [[maybe_unused]] pollfd& native_pfd) { std::lock_guard lock(mutex); sys_net.trace("[P2PS] poll checking for 0x%X", sn_pfd.events); @@ -1003,14 +1002,7 @@ s32 lv2_socket_p2ps::poll(sys_net_pollfd& sn_pfd, [[maybe_unused]] pollfd& nativ { sn_pfd.revents |= SYS_NET_POLLOUT; } - - if (sn_pfd.revents) - { - return 1; - } } - - return 0; } std::tuple lv2_socket_p2ps::select(bs_t selected, [[maybe_unused]] pollfd& native_pfd) diff --git a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2ps.h b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2ps.h index 8158138936..ac23528d57 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2ps.h +++ b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_p2ps.h @@ -89,7 +89,7 @@ public: void close() override; s32 shutdown(s32 how) override; - s32 poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) override; + void poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) override; std::tuple select(bs_t selected, pollfd& native_pfd) override; private: diff --git a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_raw.cpp b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_raw.cpp index 6e74bd512f..39ae39e5b5 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_raw.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_raw.cpp @@ -134,10 +134,9 @@ s32 lv2_socket_raw::shutdown([[maybe_unused]] s32 how) return {}; } -s32 lv2_socket_raw::poll([[maybe_unused]] sys_net_pollfd& sn_pfd, [[maybe_unused]] pollfd& native_pfd) +void lv2_socket_raw::poll([[maybe_unused]] sys_net_pollfd& sn_pfd, [[maybe_unused]] pollfd& native_pfd) { LOG_ONCE(raw_poll, "lv2_socket_raw::poll"); - return {}; } std::tuple lv2_socket_raw::select([[maybe_unused]] bs_t selected, [[maybe_unused]] pollfd& native_pfd) diff --git a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_raw.h b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_raw.h index 01b7255884..a03339354f 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_raw.h +++ b/rpcs3/Emu/Cell/lv2/sys_net/lv2_socket_raw.h @@ -32,6 +32,6 @@ public: void close() override; s32 shutdown(s32 how) override; - s32 poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) override; + void poll(sys_net_pollfd& sn_pfd, pollfd& native_pfd) override; std::tuple select(bs_t selected, pollfd& native_pfd) override; }; diff --git a/rpcs3/Emu/Cell/lv2/sys_net/nt_p2p_port.cpp b/rpcs3/Emu/Cell/lv2/sys_net/nt_p2p_port.cpp index 61039c7856..67be0df63b 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net/nt_p2p_port.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net/nt_p2p_port.cpp @@ -249,8 +249,9 @@ bool nt_p2p_port::recv_data() auto& bound_sockets = ::at32(bound_p2p_vports, dst_vport); - for (const auto sock_id : bound_sockets) + for (auto it = bound_sockets.begin(); it != bound_sockets.end();) { + s32 sock_id = *it; const auto sock = idm::check(sock_id, [&](lv2_socket& sock) { ensure(sock.get_type() == SYS_NET_SOCK_DGRAM_P2P); @@ -262,12 +263,17 @@ bool nt_p2p_port::recv_data() if (!sock) { sys_net.error("Socket %d found in bound_p2p_vports didn't exist!", sock_id); - bound_sockets.erase(sock_id); + it = bound_sockets.erase(it); if (bound_sockets.empty()) { bound_p2p_vports.erase(dst_vport); + break; } } + else + { + it++; + } } return true; diff --git a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp index e2463ca101..f26b650346 100644 --- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp @@ -305,6 +305,7 @@ error_code sys_ppu_thread_detach(ppu_thread& ppu, u32 thread_id) { // Join and notify thread (it is detached from IDM now so it must be done explicitly now) *ptr = thread_state::finished; + return CELL_OK; } return result; diff --git a/rpcs3/Emu/Cell/lv2/sys_process.cpp b/rpcs3/Emu/Cell/lv2/sys_process.cpp index b914408ec9..4ddf3720fd 100644 --- a/rpcs3/Emu/Cell/lv2/sys_process.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_process.cpp @@ -473,7 +473,7 @@ void lv2_exitspawn(ppu_thread& ppu, std::vector& argv, std::vector< }; Emu.after_kill_callback = [func = std::move(func), argv = std::move(argv), envp = std::move(envp), data = std::move(data), - disc = std::move(disc), path = std::move(path), hdd1 = std::move(hdd1), old_config = Emu.GetUsedConfig(), klic]() mutable + disc = std::move(disc), path = std::move(path), hdd1 = std::move(hdd1), old_config = Emu.GetUsedConfig(), old_db_config = Emu.GetUsedDatabaseConfig(), klic]() mutable { Emu.argv = std::move(argv); Emu.envp = std::move(envp); @@ -489,7 +489,7 @@ void lv2_exitspawn(ppu_thread& ppu, std::vector& argv, std::vector< Emu.SetForceBoot(true); - auto res = Emu.BootGame(path, "", true, cfg_mode::continuous, old_config); + auto res = Emu.BootGame(path, "", true, cfg_mode::continuous, old_config, old_db_config); if (res != game_boot_result::no_errors) { diff --git a/rpcs3/Emu/Cell/lv2/sys_prx.cpp b/rpcs3/Emu/Cell/lv2/sys_prx.cpp index 046d19c48e..6f930e79dd 100644 --- a/rpcs3/Emu/Cell/lv2/sys_prx.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_prx.cpp @@ -63,8 +63,8 @@ extern const std::map g_prx_list { "libcelpenc.sprx", 0 }, { "libddpdec.sprx", 0 }, { "libdivxdec.sprx", 0 }, - { "libdmux.sprx", 0 }, - { "libdmuxpamf.sprx", 0 }, + { "libdmux.sprx", 1 }, + { "libdmuxpamf.sprx", 1 }, { "libdtslbrdec.sprx", 0 }, { "libfiber.sprx", 0 }, { "libfont.sprx", 0 }, @@ -899,7 +899,7 @@ error_code _sys_prx_register_library(ppu_thread& ppu, vm::ptr library) { for (u32 lib_addr = prx.exports_start, index = 0; lib_addr < prx.exports_end; index++, lib_addr += vm::read8(lib_addr) ? vm::read8(lib_addr) : sizeof_lib) { - if (std::memcpy(vm::base(lib_addr), mem_copy.data(), sizeof_lib) == 0) + if (std::memcmp(vm::base(lib_addr), mem_copy.data(), sizeof_lib) == 0) { atomic_storage::release(prx.m_external_loaded_flags[index], true); return true; diff --git a/rpcs3/Emu/Cell/lv2/sys_rsxaudio.cpp b/rpcs3/Emu/Cell/lv2/sys_rsxaudio.cpp index 2ba15b1146..29a1fa3501 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsxaudio.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rsxaudio.cpp @@ -46,14 +46,14 @@ namespace rsxaudio_ringbuf_reader static void set_timestamp(rsxaudio_shmem::ringbuf_t& ring_buf, u64 timestamp) { const s32 entry_idx_raw = (ring_buf.read_idx + ring_buf.rw_max_idx - (ring_buf.rw_max_idx > 2) - 1) % ring_buf.rw_max_idx; - const s32 entry_idx = std::clamp(entry_idx_raw, 0, SYS_RSXAUDIO_RINGBUF_SZ); + const s32 entry_idx = std::clamp(entry_idx_raw, 0, SYS_RSXAUDIO_RINGBUF_SZ - 1); ring_buf.entries[entry_idx].timestamp = convert_to_timebased_time(timestamp); } static std::tuple update_status(rsxaudio_shmem::ringbuf_t& ring_buf) { - const s32 read_idx = std::clamp(ring_buf.read_idx, 0, SYS_RSXAUDIO_RINGBUF_SZ); + const s32 read_idx = std::clamp(ring_buf.read_idx, 0, SYS_RSXAUDIO_RINGBUF_SZ - 1); if ((ring_buf.entries[read_idx].valid & 1) == 0U) { @@ -61,7 +61,7 @@ namespace rsxaudio_ringbuf_reader } const s32 entry_idx_raw = (ring_buf.read_idx + ring_buf.rw_max_idx - (ring_buf.rw_max_idx > 2)) % ring_buf.rw_max_idx; - const s32 entry_idx = std::clamp(entry_idx_raw, 0, SYS_RSXAUDIO_RINGBUF_SZ); + const s32 entry_idx = std::clamp(entry_idx_raw, 0, SYS_RSXAUDIO_RINGBUF_SZ - 1); ring_buf.entries[read_idx].valid = 0; ring_buf.queue_notify_idx = (ring_buf.queue_notify_idx + 1) % ring_buf.queue_notify_step; @@ -72,7 +72,7 @@ namespace rsxaudio_ringbuf_reader static std::pair get_addr(const rsxaudio_shmem::ringbuf_t& ring_buf) { - const s32 read_idx = std::clamp(ring_buf.read_idx, 0, SYS_RSXAUDIO_RINGBUF_SZ); + const s32 read_idx = std::clamp(ring_buf.read_idx, 0, SYS_RSXAUDIO_RINGBUF_SZ - 1); if (ring_buf.entries[read_idx].valid & 1) { @@ -1392,9 +1392,9 @@ void rsxaudio_backend_thread::operator()() return; } - static rsxaudio_state ra_state{}; - static emu_audio_cfg emu_cfg{}; - static bool backend_failed = false; + rsxaudio_state ra_state{}; + emu_audio_cfg emu_cfg{}; + bool backend_failed = false; for (;;) { @@ -2018,7 +2018,7 @@ void rsxaudio_periodic_tmr::cancel_timer_unlocked() { const u64 flag = 1; const auto wr_res = write(cancel_event, &flag, sizeof(flag)); - ensure(wr_res == sizeof(flag) || wr_res == -EAGAIN); + ensure(wr_res == sizeof(flag) || errno == EAGAIN); } #elif defined(BSD) || defined(__APPLE__) handle[TIMER_ID].flags = (handle[TIMER_ID].flags & ~EV_ENABLE) | EV_DISABLE; diff --git a/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp b/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp index c2abd40284..e60d4895cc 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp @@ -441,6 +441,8 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout) continue; } + ppu.state += cpu_flag::wait; + std::lock_guard lock(rwlock->mutex); if (!rwlock->unqueue(rwlock->wq, &ppu)) diff --git a/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp b/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp index 7440cf2def..b6ca578977 100644 --- a/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp @@ -72,7 +72,7 @@ error_code sys_semaphore_create(ppu_thread& ppu, vm::ptr sem_id, vm::ptr(ppu.test_stopped()); + ppu.check_state(); *sem_id = idm::last_id(); return CELL_OK; @@ -358,7 +358,7 @@ error_code sys_semaphore_get_value(ppu_thread& ppu, u32 sem_id, vm::ptr cou return CELL_EFAULT; } - static_cast(ppu.test_stopped()); + ppu.check_state(); *count = sema.ret; return CELL_OK; diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.cpp b/rpcs3/Emu/Cell/lv2/sys_spu.cpp index 3665efcd0a..ff4294a24f 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp @@ -437,7 +437,7 @@ struct spu_limits_t raw_spu_count += spu_thread::g_raw_spu_ctr; // physical_spus_count >= spu_limit returns EBUSY, not EINVAL! - if (spu_limit + raw_limit > 6 || raw_spu_count > raw_limit || physical_spus_count >= spu_limit || physical_spus_count > spu_limit || controllable_spu_count > spu_limit) + if (spu_limit + raw_limit > 6 || raw_spu_count > raw_limit || physical_spus_count >= spu_limit || controllable_spu_count > spu_limit) { return false; } diff --git a/rpcs3/Emu/Cell/lv2/sys_ss.cpp b/rpcs3/Emu/Cell/lv2/sys_ss.cpp index 2c4b1282fd..725adbcfc3 100644 --- a/rpcs3/Emu/Cell/lv2/sys_ss.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_ss.cpp @@ -24,7 +24,7 @@ struct lv2_update_manager // For example, 4.90 should be converted to 0x4900000000000 std::erase(version_str, '.'); - if (std::from_chars(version_str.data(), version_str.data() + version_str.size(), system_sw_version, 16).ec != std::errc{}) + if (std::from_chars(version_str.data(), version_str.data() + version_str.size(), system_sw_version, 16).ec == std::errc{}) system_sw_version <<= 40; else system_sw_version = 0; @@ -79,6 +79,7 @@ struct lv2_update_manager if (malloc_set.count(addr)) { + malloc_set.erase(addr); return vm::dealloc(addr, vm::main); } diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h index 0aff5e1e7a..6d55f4b1f0 100644 --- a/rpcs3/Emu/Cell/lv2/sys_sync.h +++ b/rpcs3/Emu/Cell/lv2/sys_sync.h @@ -453,6 +453,7 @@ public: // Can be called before the actual sleep call in order to move it out of mutex scope static void prepare_for_sleep(cpu_thread& cpu); + static ppu_thread* get_running_ppu(u32 index); struct notify_all_t { diff --git a/rpcs3/Emu/Cell/lv2/sys_time.cpp b/rpcs3/Emu/Cell/lv2/sys_time.cpp index 539bb9dedf..a71a11ba31 100644 --- a/rpcs3/Emu/Cell/lv2/sys_time.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_time.cpp @@ -118,6 +118,7 @@ static int clock_gettime(int clk_id, struct timespec* tp) #ifndef _WIN32 +#include #include static struct timespec start_time = []() diff --git a/rpcs3/Emu/Cell/lv2/sys_uart.cpp b/rpcs3/Emu/Cell/lv2/sys_uart.cpp index 67e91b97a0..0049b60ffe 100644 --- a/rpcs3/Emu/Cell/lv2/sys_uart.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_uart.cpp @@ -1531,9 +1531,11 @@ private: } }(); + if (sce_idx == umax) + return PS3AV_STATUS_INVALID_VIDEO_PARAM; + const video_sce_param &sce_param = sce_param_arr[sce_idx]; - if (sce_idx == umax || - video_head_cfg.video_head > PS3AV_HEAD_B_ANALOG || + if (video_head_cfg.video_head > PS3AV_HEAD_B_ANALOG || video_head_cfg.video_order > 1 || video_head_cfg.video_format > 16 || video_head_cfg.video_out_format > 16 || diff --git a/rpcs3/Emu/Cell/lv2/sys_usbd.cpp b/rpcs3/Emu/Cell/lv2/sys_usbd.cpp index e1c3caf9d7..dca61f3be8 100644 --- a/rpcs3/Emu/Cell/lv2/sys_usbd.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_usbd.cpp @@ -1173,11 +1173,15 @@ error_code sys_usbd_get_device_list(ppu_thread& ppu, u32 handle, vm::ptr - u32 i_tocopy = std::min(max_devices, ::size32(usbh.handled_devices)); + const u32 i_tocopy = std::min(max_devices, ::size32(usbh.handled_devices)); + u32 index = 0; - for (u32 index = 0; index < i_tocopy; index++) + for (const auto& [_, device] : usbh.handled_devices) { - device_list[index] = usbh.handled_devices[index].first; + if (index == i_tocopy) + break; + + device_list[index++] = device.first; } return not_an_error(i_tocopy); @@ -1409,7 +1413,7 @@ error_code sys_usbd_receive_event(ppu_thread& ppu, u32 handle, vm::ptr arg1 if (is_stopped(state)) { - std::lock_guard lock(usbh.mutex); + std::lock_guard lock(usbh.mutex_sq); for (auto cpu = +usbh.sq; cpu; cpu = cpu->next_cpu) { @@ -1587,7 +1591,7 @@ error_code sys_usbd_get_transfer_status(ppu_thread& ppu, u32 handle, u32 id_tran std::lock_guard lock(usbh.mutex); - if (!usbh.is_init) + if (!usbh.is_init || id_transfer >= MAX_SYS_USBD_TRANSFERS) return CELL_EINVAL; const auto status = usbh.get_transfer_status(id_transfer); @@ -1607,7 +1611,7 @@ error_code sys_usbd_get_isochronous_transfer_status(ppu_thread& ppu, u32 handle, std::lock_guard lock(usbh.mutex); - if (!usbh.is_init) + if (!usbh.is_init || id_transfer >= MAX_SYS_USBD_TRANSFERS) return CELL_EINVAL; const auto status = usbh.get_isochronous_transfer_status(id_transfer); diff --git a/rpcs3/Emu/Cell/lv2/sys_vm.cpp b/rpcs3/Emu/Cell/lv2/sys_vm.cpp index 2a224d2339..49337cf223 100644 --- a/rpcs3/Emu/Cell/lv2/sys_vm.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_vm.cpp @@ -97,7 +97,7 @@ error_code sys_vm_memory_map(ppu_thread& ppu, u64 vsize, u64 psize, u32 cid, u64 // Look for unmapped space if (const auto area = vm::find_map(0x10000000, 0x10000000, 2 | (flag & SYS_MEMORY_PAGE_SIZE_MASK))) { - sys_vm.warning("sys_vm_memory_map(): Found VM 0x%x area (vsize=0x%x)", addr, vsize); + sys_vm.warning("sys_vm_memory_map(): Found VM 0x%x area (vsize=0x%x)", area->addr, vsize); // Alloc all memory (shall not fail) ensure(area->alloc(static_cast(vsize))); diff --git a/rpcs3/Emu/GameInfo.h b/rpcs3/Emu/GameInfo.h index 3efca1410a..da8b2638ba 100644 --- a/rpcs3/Emu/GameInfo.h +++ b/rpcs3/Emu/GameInfo.h @@ -8,6 +8,7 @@ struct GameInfo std::string path; std::string icon_path; std::string movie_path; + std::string audio_path; std::string name; std::string serial; diff --git a/rpcs3/Emu/Io/GameTablet.cpp b/rpcs3/Emu/Io/GameTablet.cpp index 4c2330d908..1d0fcf24fe 100644 --- a/rpcs3/Emu/Io/GameTablet.cpp +++ b/rpcs3/Emu/Io/GameTablet.cpp @@ -229,6 +229,9 @@ void usb_device_gametablet::interrupt_transfer(u32 buf_size, u8* buf, u32 /*endp case CELL_PAD_CTRL_LEFT: left = true; break; + case CELL_PAD_CTRL_PS: + gt.btn_ps |= 1; + break; default: break; } @@ -249,9 +252,6 @@ void usb_device_gametablet::interrupt_transfer(u32 buf_size, u8* buf, u32 /*endp case CELL_PAD_CTRL_TRIANGLE: gt.btn_triangle |= 1; break; - case CELL_PAD_CTRL_PS: - gt.btn_ps |= 1; - break; default: break; } diff --git a/rpcs3/Emu/Io/LogitechG27.cpp b/rpcs3/Emu/Io/LogitechG27.cpp index 1f02c92d59..3f07e92560 100644 --- a/rpcs3/Emu/Io/LogitechG27.cpp +++ b/rpcs3/Emu/Io/LogitechG27.cpp @@ -884,7 +884,7 @@ static s16 fetch_sdl_as_axis(SDL_Joystick* joystick, const sdl_mapping& mapping) return 0; } -static s16 fetch_sdl_axis_avg(std::map>& joysticks, const sdl_mapping& mapping) +static s16 fetch_sdl_axis_avg(const std::map>& joysticks, const sdl_mapping& mapping) { constexpr s16 MAX = 0x7FFF; constexpr s16 MIN = -0x8000; @@ -910,7 +910,7 @@ static s16 fetch_sdl_axis_avg(std::map>& joystic return std::clamp(sdl_joysticks_total_value / static_cast(joysticks_of_type->second.size()), MIN, MAX); } -static bool sdl_to_logitech_g27_button(std::map>& joysticks, const sdl_mapping& mapping) +static bool sdl_to_logitech_g27_button(const std::map>& joysticks, const sdl_mapping& mapping) { auto joysticks_of_type = joysticks.find(mapping.device_type_id); if (joysticks_of_type == joysticks.end()) @@ -931,32 +931,21 @@ static bool sdl_to_logitech_g27_button(std::map> return pressed; } -static u16 sdl_to_logitech_g27_steering(std::map>& joysticks, const sdl_mapping& mapping) +static u16 sdl_to_logitech_g27_steering(const std::map>& joysticks, const sdl_mapping& mapping) { const s16 avg = fetch_sdl_axis_avg(joysticks, mapping); const u16 unsigned_avg = avg + 0x8000; return unsigned_avg * (0xFFFF >> 2) / 0xFFFF; } -static u8 sdl_to_logitech_g27_pedal(std::map>& joysticks, const sdl_mapping& mapping) +static u8 sdl_to_logitech_g27_pedal(const std::map>& joysticks, const sdl_mapping& mapping) { const s16 avg = fetch_sdl_axis_avg(joysticks, mapping); const u16 unsigned_avg = avg + 0x8000; return unsigned_avg * 0xFF / 0xFFFF; } -static inline void set_bit(u8* buf, int bit_num, bool set) -{ - const int byte_num = bit_num / 8; - bit_num %= 8; - const u8 mask = 1 << bit_num; - if (set) - buf[byte_num] = buf[byte_num] | mask; - else - buf[byte_num] = buf[byte_num] & (~mask); -} - -void usb_device_logitech_g27::transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* transfer) const { DFEX_data data{}; ensure(buf_size >= sizeof(data)); @@ -990,7 +979,7 @@ void usb_device_logitech_g27::transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* std::memcpy(buf, &data, sizeof(data)); } -void usb_device_logitech_g27::transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* transfer) const { DFP_data data{}; ensure(buf_size >= sizeof(data)); @@ -1026,7 +1015,7 @@ void usb_device_logitech_g27::transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* t std::memcpy(buf, &data, sizeof(data)); } -void usb_device_logitech_g27::transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* transfer) const { DFGT_data data{}; ensure(buf_size >= sizeof(data)); @@ -1068,7 +1057,7 @@ void usb_device_logitech_g27::transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* std::memcpy(buf, &data, sizeof(data)); } -void usb_device_logitech_g27::transfer_g25(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_g25(u32 buf_size, u8* buf, UsbTransfer* transfer) const { G25_data data{}; ensure(buf_size >= sizeof(data)); @@ -1116,7 +1105,7 @@ void usb_device_logitech_g27::transfer_g25(u32 buf_size, u8* buf, UsbTransfer* t std::memcpy(buf, &data, sizeof(data)); } -void usb_device_logitech_g27::transfer_g27(u32 buf_size, u8* buf, UsbTransfer* transfer) +void usb_device_logitech_g27::transfer_g27(u32 buf_size, u8* buf, UsbTransfer* transfer) const { G27_data data{}; ensure(buf_size >= sizeof(data)); diff --git a/rpcs3/Emu/Io/LogitechG27.h b/rpcs3/Emu/Io/LogitechG27.h index 4ec0d35c0b..ccb2f58908 100644 --- a/rpcs3/Emu/Io/LogitechG27.h +++ b/rpcs3/Emu/Io/LogitechG27.h @@ -38,10 +38,7 @@ struct logitech_g27_ffb_slot logitech_g27_ffb_state state = logitech_g27_ffb_state::inactive; u64 last_update = 0; SDL_HapticEffect last_effect {}; - - // TODO switch to SDL_HapticEffectID when it becomes available in a future SDL release - // Match the return of SDL_CreateHapticEffect for now - int effect_id = -1; + SDL_HapticEffectID effect_id = -1; }; struct sdl_mapping @@ -124,11 +121,11 @@ public: private: void sdl_refresh(); void set_personality(logitech_personality personality, bool reconnect = false); - void transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* transfer); - void transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* transfer); - void transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* transfer); - void transfer_g25(u32 buf_size, u8* buf, UsbTransfer* transfer); - void transfer_g27(u32 buf_size, u8* buf, UsbTransfer* transfer); + void transfer_dfex(u32 buf_size, u8* buf, UsbTransfer* transfer) const; + void transfer_dfp(u32 buf_size, u8* buf, UsbTransfer* transfer) const; + void transfer_dfgt(u32 buf_size, u8* buf, UsbTransfer* transfer) const; + void transfer_g25(u32 buf_size, u8* buf, UsbTransfer* transfer) const; + void transfer_g27(u32 buf_size, u8* buf, UsbTransfer* transfer) const; u32 m_controller_index = 0; @@ -137,7 +134,7 @@ private: logitech_g27_sdl_mapping m_mapping {}; bool m_reverse_effects = false; - std::mutex m_sdl_handles_mutex; + mutable std::mutex m_sdl_handles_mutex; SDL_Joystick* m_led_joystick_handle = nullptr; SDL_Haptic* m_haptic_handle = nullptr; std::map> m_joysticks; diff --git a/rpcs3/Emu/Io/PadHandler.cpp b/rpcs3/Emu/Io/PadHandler.cpp index 9110687a9c..ccd81d2805 100644 --- a/rpcs3/Emu/Io/PadHandler.cpp +++ b/rpcs3/Emu/Io/PadHandler.cpp @@ -11,26 +11,49 @@ PadHandlerBase::PadHandlerBase(pad_handler type) : m_type(type) { } -std::set PadHandlerBase::narrow_set(const std::set& src) +std::vector> PadHandlerBase::find_key_combos(const std::unordered_map& map, const std::string& cfg_string) { - if (src.empty()) - return {}; + std::vector> key_codes; - std::set dst; - for (const u64& s : src) + const std::vector combos = cfg_pad::get_combos(cfg_string); + + for (const pad::combo& combo : combos) { - dst.insert(::narrow(s)); + std::set keys = find_key_codes(map, combo); + + if (!keys.empty()) + { + key_codes.push_back(std::move(keys)); + } } - return dst; + + return key_codes; +} + +std::set PadHandlerBase::find_key_codes(const std::unordered_map& map, const pad::combo& combo) +{ + std::set key_codes; + + for (const std::string& button_name : combo.buttons()) + { + for (const auto& [code, name] : map) + { + if (button_name == name) + { + key_codes.insert(code); + break; + } + } + } + + return key_codes; } -// Get new multiplied value based on the multiplier s32 PadHandlerBase::MultipliedInput(s32 raw_value, s32 multiplier) { return (multiplier * raw_value) / 100; } -// Get new scaled value between 0 and range based on its minimum and maximum f32 PadHandlerBase::ScaledInput(f32 raw_value, f32 minimum, f32 maximum, f32 deadzone, f32 range) { if (deadzone > 0 && deadzone > minimum) @@ -46,7 +69,6 @@ f32 PadHandlerBase::ScaledInput(f32 raw_value, f32 minimum, f32 maximum, f32 dea return range * val; } -// Get new scaled value between -range and range based on its minimum and maximum f32 PadHandlerBase::ScaledAxisInput(f32 raw_value, f32 minimum, f32 maximum, f32 deadzone, f32 range) { // convert [min, max] to [0, 1] @@ -79,7 +101,6 @@ f32 PadHandlerBase::ScaledAxisInput(f32 raw_value, f32 minimum, f32 maximum, f32 return (2.0f * range * val) - range; } -// Get normalized trigger value based on the range defined by a threshold u16 PadHandlerBase::NormalizeTriggerInput(u16 value, u32 threshold) const { if (value <= threshold || threshold >= trigger_max) @@ -90,8 +111,6 @@ u16 PadHandlerBase::NormalizeTriggerInput(u16 value, u32 threshold) const return static_cast(ScaledInput(static_cast(value), static_cast(trigger_min), static_cast(trigger_max), static_cast(threshold))); } -// normalizes a directed input, meaning it will correspond to a single "button" and not an axis with two directions -// the input values must lie in 0+ u16 PadHandlerBase::NormalizeDirectedInput(s32 raw_value, s32 threshold, s32 maximum) const { if (threshold >= maximum || maximum <= 0 || raw_value < 0) @@ -114,9 +133,6 @@ u16 PadHandlerBase::NormalizeStickInput(u16 raw_value, s32 threshold, s32 multip return static_cast(ScaledInput(static_cast(scaled_value), 0.0f, static_cast(thumb_max), static_cast(threshold))); } -// This function normalizes stick deadzone based on the DS3's deadzone, which is ~13% (default of anti deadzone) -// X and Y is expected to be in (-255) to 255 range, deadzone should be in terms of thumb stick range -// return is new x and y values in 0-255 range std::tuple PadHandlerBase::NormalizeStickDeadzone(s32 inX, s32 inY, u32 deadzone, u32 anti_deadzone) const { f32 X = inX / 255.0f; @@ -150,28 +166,21 @@ std::tuple PadHandlerBase::NormalizeStickDeadzone(s32 inX, s32 inY, u3 return std::tuple(ConvertAxis(X), ConvertAxis(Y)); } -// get clamped value between 0 and 255 u16 PadHandlerBase::Clamp0To255(f32 input) { return static_cast(std::clamp(input, 0.0f, 255.0f)); } -// get clamped value between 0 and 1023 u16 PadHandlerBase::Clamp0To1023(f32 input) { return static_cast(std::clamp(input, 0.0f, 1023.0f)); } -// input has to be [-1,1]. result will be [0,255] u16 PadHandlerBase::ConvertAxis(f32 value) { return static_cast((value + 1.0) * (255.0 / 2.0)); } -// The DS3, (and i think xbox controllers) give a 'square-ish' type response, so that the corners will give (almost)max x/y instead of the ~30x30 from a perfect circle -// using a simple scale/sensitivity increase would *work* although it eats a chunk of our usable range in exchange -// this might be the best for now, in practice it seems to push the corners to max of 20x20, with a squircle_factor of ~4000 -// This function assumes inX and inY is already in 0-255 void PadHandlerBase::ConvertToSquirclePoint(u16& inX, u16& inY, u32 squircle_factor) { if (!squircle_factor) @@ -243,7 +252,7 @@ cfg_pad* PadHandlerBase::get_config(const std::string& pad_id) return nullptr; } -PadHandlerBase::connection PadHandlerBase::get_next_button_press(const std::string& pad_id, const pad_callback& callback, const pad_fail_callback& fail_callback, gui_call_type call_type, const std::vector& /*buttons*/) +PadHandlerBase::connection PadHandlerBase::get_next_button_press(const std::string& pad_id, const pad_callback& callback, const pad_fail_callback& fail_callback, gui_call_type call_type, const std::vector& buttons) { if (call_type == gui_call_type::blacklist) blacklist.clear(); @@ -284,12 +293,9 @@ PadHandlerBase::connection PadHandlerBase::get_next_button_press(const std::stri // Check for each button in our list if its corresponding (maybe remapped) button or axis was pressed. // Return the new value if the button was pressed (aka. its value was bigger than 0 or the defined threshold) - // Get all the legally pressed buttons and use the one with highest value (prioritize first) - struct - { - u16 value = 0; - std::string name; - } pressed_button{}; + // Get all the legally pressed buttons. We only accept one value per stick though, otherwise it will get messy. + std::map pressed_buttons; + std::array, 2> pressed_sticks{}; for (const auto& [keycode, name] : button_list) { @@ -306,7 +312,9 @@ PadHandlerBase::connection PadHandlerBase::get_next_button_press(const std::stri } const bool is_trigger = get_is_left_trigger(device, keycode) || get_is_right_trigger(device, keycode); - const bool is_stick = !is_trigger && (get_is_left_stick(device, keycode) || get_is_right_stick(device, keycode)); + const bool is_left_stick = !is_trigger && get_is_left_stick(device, keycode); + const bool is_right_stick = !is_trigger && !is_left_stick && get_is_right_stick(device, keycode); + const bool is_stick = is_left_stick || is_right_stick; const bool is_touch_motion = !is_trigger && !is_stick && get_is_touch_pad_motion(device, keycode); const bool is_button = !is_trigger && !is_stick && !is_touch_motion; @@ -324,9 +332,27 @@ PadHandlerBase::connection PadHandlerBase::get_next_button_press(const std::stri const u16 diff = value > min_value ? value - min_value : 0; - if (diff > button_press_threshold && value > pressed_button.value) + if (diff > button_press_threshold) { - pressed_button = { .value = value, .name = name }; + if (is_left_stick) + { + if (pressed_sticks[0].second < value) + { + pressed_sticks[0] = { name, value }; + } + } + else if (is_right_stick) + { + if (pressed_sticks[1].second < value) + { + pressed_sticks[1] = { name, value }; + } + } + else + { + u16& pressed_value = pressed_buttons[name]; + pressed_value = std::max(pressed_value, value); + } } } } @@ -345,14 +371,11 @@ PadHandlerBase::connection PadHandlerBase::get_next_button_press(const std::stri if (callback) { - pad_preview_values preview_values = get_preview_values(data); + pad_preview_values preview_values = get_preview_values(data, buttons); pad_capabilities capabilities = get_capabilities(pad_id); const u32 battery_level = get_battery_level(pad_id); - if (pressed_button.value > 0) - callback(pressed_button.value, pressed_button.name, pad_id, battery_level, std::move(preview_values), std::move(capabilities)); - else - callback(0, "", pad_id, battery_level, std::move(preview_values), std::move(capabilities)); + callback(std::move(pressed_buttons), std::move(pressed_sticks), pad_id, battery_level, std::move(preview_values), std::move(capabilities)); } return status; @@ -412,7 +435,7 @@ void PadHandlerBase::convert_stick_values(u16& x_out, u16& y_out, s32 x_in, s32 } // Update the pad button values based on their type and thresholds. With this you can use axis or triggers as buttons or vice versa -void PadHandlerBase::TranslateButtonPress(const std::shared_ptr& device, u64 keyCode, bool& pressed, u16& val, bool use_stick_multipliers, bool ignore_stick_threshold, bool ignore_trigger_threshold) +void PadHandlerBase::TranslateButtonPress(const std::shared_ptr& device, u32 keyCode, bool& pressed, u16& val, bool use_stick_multipliers, bool ignore_stick_threshold, bool ignore_trigger_threshold) { if (!device || !device->config) { @@ -476,7 +499,7 @@ bool PadHandlerBase::bindPadToDevice(std::shared_ptr pad) return false; } - std::array, button::button_count> mapping = get_mapped_key_codes(pad_device, config); + std::array>, button::button_count> mapping = get_mapped_key_codes(pad_device, config); u32 pclass_profile = 0x0; u32 capabilities = CELL_PAD_CAPABILITY_PS3_CONFORMITY | CELL_PAD_CAPABILITY_PRESS_MODE | CELL_PAD_CAPABILITY_HP_ANALOG_STICK | CELL_PAD_CAPABILITY_ACTUATOR | CELL_PAD_CAPABILITY_SENSOR_MODE; @@ -566,69 +589,58 @@ bool PadHandlerBase::bindPadToDevice(std::shared_ptr pad) return true; } -std::array, PadHandlerBase::button::button_count> PadHandlerBase::get_mapped_key_codes(const std::shared_ptr& device, const cfg_pad* cfg) +std::array>, PadHandlerBase::button::button_count> PadHandlerBase::get_mapped_key_codes(const std::shared_ptr& device, const cfg_pad* cfg) { - std::array, button::button_count> mapping{}; + std::array>, button::button_count> mapping{}; if (!device || !cfg) return mapping; - device->trigger_code_left = FindKeyCodes(button_list, cfg->l2); - device->trigger_code_right = FindKeyCodes(button_list, cfg->r2); - device->axis_code_left[0] = FindKeyCodes(button_list, cfg->ls_left); - device->axis_code_left[1] = FindKeyCodes(button_list, cfg->ls_right); - device->axis_code_left[2] = FindKeyCodes(button_list, cfg->ls_down); - device->axis_code_left[3] = FindKeyCodes(button_list, cfg->ls_up); - device->axis_code_right[0] = FindKeyCodes(button_list, cfg->rs_left); - device->axis_code_right[1] = FindKeyCodes(button_list, cfg->rs_right); - device->axis_code_right[2] = FindKeyCodes(button_list, cfg->rs_down); - device->axis_code_right[3] = FindKeyCodes(button_list, cfg->rs_up); + mapping[button::up] = find_key_combos(button_list, cfg->up); + mapping[button::down] = find_key_combos(button_list, cfg->down); + mapping[button::left] = find_key_combos(button_list, cfg->left); + mapping[button::right] = find_key_combos(button_list, cfg->right); + mapping[button::cross] = find_key_combos(button_list, cfg->cross); + mapping[button::square] = find_key_combos(button_list, cfg->square); + mapping[button::circle] = find_key_combos(button_list, cfg->circle); + mapping[button::triangle] = find_key_combos(button_list, cfg->triangle); + mapping[button::start] = find_key_combos(button_list, cfg->start); + mapping[button::select] = find_key_combos(button_list, cfg->select); + mapping[button::l1] = find_key_combos(button_list, cfg->l1); + mapping[button::l2] = find_key_combos(button_list, cfg->l2); + mapping[button::l3] = find_key_combos(button_list, cfg->l3); + mapping[button::r1] = find_key_combos(button_list, cfg->r1); + mapping[button::r2] = find_key_combos(button_list, cfg->r2); + mapping[button::r3] = find_key_combos(button_list, cfg->r3); + mapping[button::ls_left] = find_key_combos(button_list, cfg->ls_left); + mapping[button::ls_right] = find_key_combos(button_list, cfg->ls_right); + mapping[button::ls_down] = find_key_combos(button_list, cfg->ls_down); + mapping[button::ls_up] = find_key_combos(button_list, cfg->ls_up); + mapping[button::rs_left] = find_key_combos(button_list, cfg->rs_left); + mapping[button::rs_right] = find_key_combos(button_list, cfg->rs_right); + mapping[button::rs_down] = find_key_combos(button_list, cfg->rs_down); + mapping[button::rs_up] = find_key_combos(button_list, cfg->rs_up); + mapping[button::ps] = find_key_combos(button_list, cfg->ps); - mapping[button::up] = FindKeyCodes(button_list, cfg->up); - mapping[button::down] = FindKeyCodes(button_list, cfg->down); - mapping[button::left] = FindKeyCodes(button_list, cfg->left); - mapping[button::right] = FindKeyCodes(button_list, cfg->right); - mapping[button::cross] = FindKeyCodes(button_list, cfg->cross); - mapping[button::square] = FindKeyCodes(button_list, cfg->square); - mapping[button::circle] = FindKeyCodes(button_list, cfg->circle); - mapping[button::triangle] = FindKeyCodes(button_list, cfg->triangle); - mapping[button::start] = FindKeyCodes(button_list, cfg->start); - mapping[button::select] = FindKeyCodes(button_list, cfg->select); - mapping[button::l1] = FindKeyCodes(button_list, cfg->l1); - mapping[button::l2] = narrow_set(device->trigger_code_left); - mapping[button::l3] = FindKeyCodes(button_list, cfg->l3); - mapping[button::r1] = FindKeyCodes(button_list, cfg->r1); - mapping[button::r2] = narrow_set(device->trigger_code_right); - mapping[button::r3] = FindKeyCodes(button_list, cfg->r3); - mapping[button::ls_left] = narrow_set(device->axis_code_left[0]); - mapping[button::ls_right] = narrow_set(device->axis_code_left[1]); - mapping[button::ls_down] = narrow_set(device->axis_code_left[2]); - mapping[button::ls_up] = narrow_set(device->axis_code_left[3]); - mapping[button::rs_left] = narrow_set(device->axis_code_right[0]); - mapping[button::rs_right] = narrow_set(device->axis_code_right[1]); - mapping[button::rs_down] = narrow_set(device->axis_code_right[2]); - mapping[button::rs_up] = narrow_set(device->axis_code_right[3]); - mapping[button::ps] = FindKeyCodes(button_list, cfg->ps); - - mapping[button::skateboard_ir_nose] = FindKeyCodes(button_list, cfg->ir_nose); - mapping[button::skateboard_ir_tail] = FindKeyCodes(button_list, cfg->ir_tail); - mapping[button::skateboard_ir_left] = FindKeyCodes(button_list, cfg->ir_left); - mapping[button::skateboard_ir_right] = FindKeyCodes(button_list, cfg->ir_right); - mapping[button::skateboard_tilt_left] = FindKeyCodes(button_list, cfg->tilt_left); - mapping[button::skateboard_tilt_right] = FindKeyCodes(button_list, cfg->tilt_right); + mapping[button::skateboard_ir_nose] = find_key_combos(button_list, cfg->ir_nose); + mapping[button::skateboard_ir_tail] = find_key_combos(button_list, cfg->ir_tail); + mapping[button::skateboard_ir_left] = find_key_combos(button_list, cfg->ir_left); + mapping[button::skateboard_ir_right] = find_key_combos(button_list, cfg->ir_right); + mapping[button::skateboard_tilt_left] = find_key_combos(button_list, cfg->tilt_left); + mapping[button::skateboard_tilt_right] = find_key_combos(button_list, cfg->tilt_right); if (b_has_pressure_intensity_button) { - mapping[button::pressure_intensity_button] = FindKeyCodes(button_list, cfg->pressure_intensity_button); + mapping[button::pressure_intensity_button] = find_key_combos(button_list, cfg->pressure_intensity_button); } if (b_has_analog_limiter_button) { - mapping[button::analog_limiter_button] = FindKeyCodes(button_list, cfg->analog_limiter_button); + mapping[button::analog_limiter_button] = find_key_combos(button_list, cfg->analog_limiter_button); } if (b_has_orientation) { - mapping[button::orientation_reset_button] = FindKeyCodes(button_list, cfg->orientation_reset_button); + mapping[button::orientation_reset_button] = find_key_combos(button_list, cfg->orientation_reset_button); } return mapping; @@ -660,30 +672,46 @@ void PadHandlerBase::get_mapping(const pad_ensemble& binding) bool pressed{}; u16 value{}; - for (u32 code : button.m_key_codes) + // The DS3 Button is considered pressed if any configured button combination is pressed + for (const std::set& combo : button.m_key_combos) { - bool press{}; - u16 val = button_values[code]; + bool combo_pressed = !combo.empty(); + u16 combo_val = 0; - TranslateButtonPress(device, code, press, val, analog_limiter_enabled); - - if (press) + // The button combination is only considered pressed if all the buttons are pressed + for (u32 code : combo) { + bool btn_pressed{}; + u16 btn_val = button_values[code]; + TranslateButtonPress(device, code, btn_pressed, btn_val, analog_limiter_enabled); + + if (btn_pressed == false) + { + combo_pressed = false; + break; + } + // Modify pressure if necessary if the button was pressed if (adjust_pressure) { - val = pad->m_pressure_intensity; + btn_val = pad->m_pressure_intensity; } else if (pressure_intensity_deadzone > 0) { // Ignore triggers, since they have their own deadzones if (!get_is_left_trigger(device, code) && !get_is_right_trigger(device, code)) { - val = NormalizeDirectedInput(val, pressure_intensity_deadzone, 255); + btn_val = NormalizeDirectedInput(btn_val, pressure_intensity_deadzone, 255); } } - value = std::max(value, val); + // Take minimum combo value. Otherwise we will always end up with the max value in case an actual button is part of the combo. + combo_val = (combo_val == 0) ? btn_val : std::min(combo_val, btn_val); + } + + if (combo_pressed) + { + value = std::max(value, combo_val); pressed = value > 0; } } @@ -702,31 +730,44 @@ void PadHandlerBase::get_mapping(const pad_ensemble& binding) u16 val_min{}; u16 val_max{}; - // m_key_codes_min are the mapped keys for left or down - for (u32 key_min : pad->m_sticks[i].m_key_codes_min) + // The DS3 Stick direction is considered pressed if any configured button combination is pressed + const auto get_stick_val = [this, &device, &button_values, &pressed, analog_limiter_enabled](const std::vector>& combos, u16& value) { - u16 val = button_values[key_min]; - - TranslateButtonPress(device, key_min, pressed, val, analog_limiter_enabled, true); - - if (pressed) + for (const std::set& combo : combos) { - val_min = std::max(val_min, val); + bool combo_pressed = !combo.empty(); + u16 combo_val = 0; + + for (u32 key_min : combo) + { + bool btn_pressed{}; + u16 btn_val = button_values[key_min]; + + TranslateButtonPress(device, key_min, btn_pressed, btn_val, analog_limiter_enabled, true); + + if (btn_pressed == false) + { + combo_pressed = false; + break; + } + + // Take minimum combo value. Otherwise we will always end up with the max value in case an actual button is part of the combo. + combo_val = (combo_val == 0) ? btn_val : std::min(combo_val, btn_val); + } + + if (combo_pressed) + { + value = std::max(value, combo_val); + pressed = value > 0; + } } - } + }; - // m_key_codes_max are the mapped keys for right or up - for (u32 key_max : pad->m_sticks[i].m_key_codes_max) - { - u16 val = button_values[key_max]; + // m_key_combos_min are the mapped keys for left or down + get_stick_val(pad->m_sticks[i].m_key_combos_min, val_min); - TranslateButtonPress(device, key_max, pressed, val, analog_limiter_enabled, true); - - if (pressed) - { - val_max = std::max(val_max, val); - } - } + // m_key_combos_max are the mapped keys for right or up + get_stick_val(pad->m_sticks[i].m_key_combos_max, val_max); // cancel out opposing values and get the resulting difference stick_val[i] = val_max - val_min; diff --git a/rpcs3/Emu/Io/PadHandler.h b/rpcs3/Emu/Io/PadHandler.h index 98cca72a7d..0f38f0cb3b 100644 --- a/rpcs3/Emu/Io/PadHandler.h +++ b/rpcs3/Emu/Io/PadHandler.h @@ -36,10 +36,6 @@ public: u8 small_motor{0}; bool new_output_data{true}; steady_clock::time_point last_output; - std::set trigger_code_left{}; - std::set trigger_code_right{}; - std::array, 4> axis_code_left{}; - std::array, 4> axis_code_right{}; struct color { @@ -94,12 +90,12 @@ struct pad_capabilities }; using pad_preview_values = std::array; -using pad_callback = std::function; +using pad_callback = std::function&& /*pressed_buttons*/, std::array, 2>&& /*pressed_sticks*/, std::string /*pad_name*/, u32 /*battery_level*/, pad_preview_values&&, pad_capabilities&&)>; using pad_fail_callback = std::function; using motion_preview_values = std::array; -using motion_callback = std::function; -using motion_fail_callback = std::function; +using motion_callback = std::function; +using motion_fail_callback = std::function; class PadHandlerBase { @@ -196,76 +192,11 @@ protected: std::shared_ptr m_pad_for_pad_settings; - static std::set narrow_set(const std::set& src); + // Search an unordered map for a string value and return the found combos + static std::vector> find_key_combos(const std::unordered_map& map, const std::string& cfg_string); - // Search an unordered map for a string value and return found keycode - template - static std::set FindKeyCodes(const std::unordered_map& map, const cfg::string& cfg_string, bool fallback = true) - { - std::set key_codes; - - const std::string& def = cfg_string.def; - const std::vector names = cfg_pad::get_buttons(cfg_string.to_string()); - T def_code = umax; - - for (const std::string& nam : names) - { - for (const auto& [code, name] : map) - { - if (name == nam) - { - key_codes.insert(static_cast(code)); - } - - if (fallback && name == def) - def_code = static_cast(code); - } - } - - if (!key_codes.empty()) - { - return key_codes; - } - - if (fallback) - { - if (!names.empty()) - input_log.error("FindKeyCode for [name = %s] returned with [def_code = %d] for [def = %s]", cfg_string.to_string(), def_code, def); - - if (def_code != umax) - { - return { def_code }; - } - } - - return {}; - } - - // Search an unordered map for a string value and return found keycode - template - static std::set FindKeyCodes(const std::unordered_map& map, const std::vector& names) - { - std::set key_codes; - - for (const std::string& name : names) - { - for (const auto& [code, nam] : map) - { - if (nam == name) - { - key_codes.insert(static_cast(code)); - break; - } - } - } - - if (!key_codes.empty()) - { - return key_codes; - } - - return {}; - } + // Search an unordered map for a combo and return the found key codes + static std::set find_key_codes(const std::unordered_map& map, const pad::combo& combo); // Get normalized trigger value based on the range defined by a threshold u16 NormalizeTriggerInput(u16 value, u32 threshold) const; @@ -274,7 +205,7 @@ protected: // the input values must lie in 0+ u16 NormalizeDirectedInput(s32 raw_value, s32 threshold, s32 maximum) const; - // This function normalizes stick deadzone based on the DS3's deadzone, which is ~13% + // This function normalizes stick deadzone based on the DS3's deadzone, which is ~13% (default of anti deadzone) // X and Y is expected to be in (-255) to 255 range, deadzone should be in terms of thumb stick range // return is new x and y values in 0-255 range std::tuple NormalizeStickDeadzone(s32 inX, s32 inY, u32 deadzone, u32 anti_deadzone) const; @@ -284,10 +215,10 @@ public: // Get new multiplied value based on the multiplier static s32 MultipliedInput(s32 raw_value, s32 multiplier); - // Get new scaled value between 0 and 255 based on its minimum and maximum + // Get new scaled value between 0 and range based on its minimum and maximum static f32 ScaledInput(f32 raw_value, f32 minimum, f32 maximum, f32 deadzone, f32 range = 255.0f); - // Get new scaled value between -255 and 255 based on its minimum and maximum + // Get new scaled value between -range and range based on its minimum and maximum static f32 ScaledAxisInput(f32 raw_value, f32 minimum, f32 maximum, f32 deadzone, f32 range = 255.0f); // get clamped value between 0 and 255 @@ -301,7 +232,7 @@ public: // The DS3, (and i think xbox controllers) give a 'square-ish' type response, so that the corners will give (almost)max x/y instead of the ~30x30 from a perfect circle // using a simple scale/sensitivity increase would *work* although it eats a chunk of our usable range in exchange - // this might be the best for now, in practice it seems to push the corners to max of 20x20, with a squircle_factor of 8000 + // this might be the best for now, in practice it seems to push the corners to max of 20x20, with a squircle_factor of ~4000 // This function assumes inX and inY is already in 0-255 static void ConvertToSquirclePoint(u16& inX, u16& inY, u32 squircle_factor); @@ -375,23 +306,23 @@ public: private: virtual std::shared_ptr get_device(const std::string& /*device*/) { return nullptr; } - virtual bool get_is_left_trigger(const std::shared_ptr& /*device*/, u64 /*keyCode*/) { return false; } - virtual bool get_is_right_trigger(const std::shared_ptr& /*device*/, u64 /*keyCode*/) { return false; } - virtual bool get_is_left_stick(const std::shared_ptr& /*device*/, u64 /*keyCode*/) { return false; } - virtual bool get_is_right_stick(const std::shared_ptr& /*device*/, u64 /*keyCode*/) { return false; } - virtual bool get_is_touch_pad_motion(const std::shared_ptr& /*device*/, u64 /*keyCode*/) { return false; } + virtual bool get_is_left_trigger(const std::shared_ptr& /*device*/, u32 /*keyCode*/) { return false; } + virtual bool get_is_right_trigger(const std::shared_ptr& /*device*/, u32 /*keyCode*/) { return false; } + virtual bool get_is_left_stick(const std::shared_ptr& /*device*/, u32 /*keyCode*/) { return false; } + virtual bool get_is_right_stick(const std::shared_ptr& /*device*/, u32 /*keyCode*/) { return false; } + virtual bool get_is_touch_pad_motion(const std::shared_ptr& /*device*/, u32 /*keyCode*/) { return false; } virtual PadHandlerBase::connection update_connection(const std::shared_ptr& /*device*/) { return connection::disconnected; } virtual void get_extended_info(const pad_ensemble& /*binding*/) {} virtual void apply_pad_data(const pad_ensemble& /*binding*/) {} - virtual std::unordered_map get_button_values(const std::shared_ptr& /*device*/) { return {}; } - virtual pad_preview_values get_preview_values(const std::unordered_map& /*data*/) { return {}; } + virtual std::unordered_map get_button_values(const std::shared_ptr& /*device*/) { return {}; } + virtual pad_preview_values get_preview_values(const std::unordered_map& /*data*/, const std::vector& /*buttons*/){ return {}; } void get_orientation(const pad_ensemble& binding) const; protected: - virtual std::array, PadHandlerBase::button::button_count> get_mapped_key_codes(const std::shared_ptr& device, const cfg_pad* cfg); + virtual std::array>, PadHandlerBase::button::button_count> get_mapped_key_codes(const std::shared_ptr& device, const cfg_pad* cfg); virtual void get_mapping(const pad_ensemble& binding); - void TranslateButtonPress(const std::shared_ptr& device, u64 keyCode, bool& pressed, u16& val, bool use_stick_multipliers, bool ignore_stick_threshold = false, bool ignore_trigger_threshold = false); + void TranslateButtonPress(const std::shared_ptr& device, u32 keyCode, bool& pressed, u16& val, bool use_stick_multipliers, bool ignore_stick_threshold = false, bool ignore_trigger_threshold = false); void init_configs(); cfg_pad* get_config(const std::string& pad_id); diff --git a/rpcs3/Emu/Io/pad_config.cpp b/rpcs3/Emu/Io/pad_config.cpp index 937626076e..7d042bddbc 100644 --- a/rpcs3/Emu/Io/pad_config.cpp +++ b/rpcs3/Emu/Io/pad_config.cpp @@ -5,30 +5,103 @@ extern std::string g_input_config_override; -std::vector cfg_pad::get_buttons(std::string_view str) +std::vector cfg_pad::get_combos(std::string_view button_string) { - std::vector vec = fmt::split(str, {","}); + if (button_string.empty()) + return {}; - // Handle special case: string contains separator itself as configured value - if (str == "," || str.find(",,") != umax) + // Handle special case: string contains separator itself as configured value (it's why I don't use fmt::split here) + const auto split = [](std::string_view str, char sep) { - vec.push_back(","); + std::set buttons; + bool was_sep = true; + usz btn_start = 0ULL; + usz i = 0ULL; + + for (; i < str.size(); i++) + { + const char c = str[i]; + + if (c == sep) + { + if (!was_sep) + { + was_sep = true; + buttons.insert(std::string(str.substr(btn_start, i - btn_start))); + continue; + } + } + + if (was_sep) + { + was_sep = false; + btn_start = i; + } + + if (i == (str.size() - 1)) + { + buttons.insert(std::string(str.substr(btn_start, i - btn_start + 1))); + } + } + + return buttons; + }; + + std::vector combos; + + // Get all combos (seperated by ',') + const std::set combo_strings = split(button_string, ','); + + for (const std::string& combo_string : combo_strings) + { + // Get all keys for this combo (seperated by '&') + std::set combo = split(combo_string, '&'); + if (!combo.empty()) + { + combos.push_back(pad::combo{std::move(combo)}); + } } - // Remove duplicates - std::sort(vec.begin(), vec.end()); - vec.erase(std::unique(vec.begin(), vec.end()), vec.end()); - - return vec; + return combos; } -std::string cfg_pad::get_buttons(std::vector vec) +std::string cfg_pad::get_button_string(std::vector& combos) { - // Remove duplicates - std::sort(vec.begin(), vec.end()); - vec.erase(std::unique(vec.begin(), vec.end()), vec.end()); + std::vector combo_strings; - return fmt::merge(vec, ","); + // Remove duplicates + std::sort(combos.begin(), combos.end()); + combos.erase(std::unique(combos.begin(), combos.end()), combos.end()); + + for (const pad::combo& combo : combos) + { + // Merge all keys for this combo (seperated by '&') + combo_strings.push_back(combo.to_string()); + } + + // Merge combos (seperated by ',') + return fmt::merge(combo_strings, ","); +} + +std::string cfg_pad::make_button_string(const std::unordered_map& button_list, const std::vector>& button_combos) +{ + std::vector combos; + + for (const std::set& button_combo : button_combos) + { + if (button_combo.empty()) continue; + + pad::combo combo {}; + + for (u32 button : button_combo) + { + combo.add_button(::at32(button_list, button)); + } + + combos.push_back(std::move(combo)); + } + + return get_button_string(combos); } u8 cfg_pad::get_motor_speed(VibrateMotor& motor, f32 multiplier) const diff --git a/rpcs3/Emu/Io/pad_config.h b/rpcs3/Emu/Io/pad_config.h index 7dd4bd6323..86779c0859 100644 --- a/rpcs3/Emu/Io/pad_config.h +++ b/rpcs3/Emu/Io/pad_config.h @@ -5,10 +5,47 @@ #include "Utilities/Config.h" #include +#include namespace pad { constexpr static std::string_view keyboard_device_name = "Keyboard"; + + struct combo + { + public: + combo() = default; + combo(std::set buttons) : m_buttons(std::move(buttons)) {} + + const std::set& buttons() const + { + return m_buttons; + } + + void add_button(const std::string& button) + { + if (button.empty()) return; + m_buttons.insert(button); + } + + std::string to_string() const + { + return fmt::merge(m_buttons, "&"); + } + + bool operator==(const combo& other) const + { + return m_buttons == other.m_buttons; + } + + bool operator<(const combo& other) const + { + return m_buttons < other.m_buttons; + } + + private: + std::set m_buttons; + }; } struct cfg_sensor final : cfg::node @@ -25,8 +62,9 @@ struct cfg_pad final : cfg::node cfg_pad() {}; cfg_pad(node* owner, const std::string& name) : cfg::node(owner, name) {} - static std::vector get_buttons(std::string_view str); - static std::string get_buttons(std::vector vec); + static std::vector get_combos(std::string_view button_string); + static std::string get_button_string(std::vector& combos); + static std::string make_button_string(const std::unordered_map& button_list, const std::vector>& button_combos); u8 get_motor_speed(VibrateMotor& motor, f32 multiplier) const; u8 get_large_motor_speed(std::array& motors) const; diff --git a/rpcs3/Emu/Io/pad_types.cpp b/rpcs3/Emu/Io/pad_types.cpp index ad3369a7a7..d5ef26f0d7 100644 --- a/rpcs3/Emu/Io/pad_types.cpp +++ b/rpcs3/Emu/Io/pad_types.cpp @@ -203,7 +203,7 @@ bool Pad::get_analog_limiter_button_active(bool is_toggle_mode, u32 player_id) const Button& analog_limiter_button = m_buttons[m_analog_limiter_button_index]; - if (analog_limiter_button.m_key_codes.empty()) + if (analog_limiter_button.m_key_combos.empty()) { // Active by default if no button was assigned return true; diff --git a/rpcs3/Emu/Io/pad_types.h b/rpcs3/Emu/Io/pad_types.h index 5fd9c8973a..d2b1535a1a 100644 --- a/rpcs3/Emu/Io/pad_types.h +++ b/rpcs3/Emu/Io/pad_types.h @@ -386,18 +386,18 @@ struct Button u16 m_value = 0; bool m_pressed = false; - std::set m_key_codes{}; + std::vector> m_key_combos; - u16 m_actual_value = 0; // only used in keyboard_pad_handler - bool m_analog = false; // only used in keyboard_pad_handler - bool m_trigger = false; // only used in keyboard_pad_handler - std::map m_pressed_keys{}; // only used in keyboard_pad_handler + u16 m_actual_value = 0; // only used in keyboard_pad_handler + bool m_analog = false; // only used in keyboard_pad_handler + bool m_trigger = false; // only used in keyboard_pad_handler + std::map m_pressed_keys; // only used in keyboard_pad_handler Button(){} - Button(u32 offset, std::set key_codes, u32 outKeyCode) + Button(u32 offset, std::vector> key_combos, u32 outKeyCode) : m_offset(offset) , m_outKeyCode(outKeyCode) - , m_key_codes(std::move(key_codes)) + , m_key_combos(std::move(key_combos)) { if (offset == CELL_PAD_BTN_OFFSET_DIGITAL1) { @@ -426,17 +426,19 @@ struct AnalogStick u32 m_offset = 0; u16 m_value = 128; - std::set m_key_codes_min{}; - std::set m_key_codes_max{}; + std::vector> m_key_combos_min; + std::vector> m_key_combos_max; - std::map m_pressed_keys_min{}; // only used in keyboard_pad_handler - std::map m_pressed_keys_max{}; // only used in keyboard_pad_handler + std::map m_pressed_keys_min; // only used in keyboard_pad_handler + std::map m_pressed_keys_max; // only used in keyboard_pad_handler + std::map m_pressed_combos_min; // only used in keyboard_pad_handler + std::map m_pressed_combos_max; // only used in keyboard_pad_handler AnalogStick() {} - AnalogStick(u32 offset, std::set key_codes_min, std::set key_codes_max) + AnalogStick(u32 offset, std::vector> key_combos_min, std::vector> key_combos_max) : m_offset(offset) - , m_key_codes_min(std::move(key_codes_min)) - , m_key_codes_max(std::move(key_codes_max)) + , m_key_combos_min(std::move(key_combos_min)) + , m_key_combos_max(std::move(key_combos_max)) {} }; diff --git a/rpcs3/Emu/Io/ps_move_data.cpp b/rpcs3/Emu/Io/ps_move_data.cpp index 0a167eed39..bf35f26a19 100644 --- a/rpcs3/Emu/Io/ps_move_data.cpp +++ b/rpcs3/Emu/Io/ps_move_data.cpp @@ -24,34 +24,34 @@ void ps_move_data::reset_sensors() angaccel_world = {}; } +ps_move_data::vect<3> ps_move_data::rotate_vector(const vect<4>& q, const vect<3>& v) +{ + const auto cross = [](const vect<3>& a, const vect<3>& b) + { + return vect<3>({ + a.y() * b.z() - a.z() * b.y(), + a.z() * b.x() - a.x() * b.z(), + a.x() * b.y() - a.y() * b.x() + }); + }; + + // q = (x, y, z, w) + const vect<3> q_vec({q.x(), q.y(), q.z()}); + + // t = 2 * cross(q_vec, v) + const vect<3> t = cross(q_vec, v) * 2.0f; + + // v' = v + w * t + cross(q_vec, t) + const vect<3> v_prime = v + t * q.w() + cross(q_vec, t); + + return v_prime; +} + void ps_move_data::update_orientation(f32 delta_time) { if (!delta_time) return; - // Rotate vector v by quaternion q - const auto rotate_vector = [](const vect<4>& q, const vect<3>& v) - { - const vect<4> qv({0.0f, v.x(), v.y(), v.z()}); - const vect<4> q_inv({q.w(), -q.x(), -q.y(), -q.z()}); - - // t = q * v - vect<4> t; - t.w() = -q.x() * qv.x() - q.y() * qv.y() - q.z() * qv.z(); - t.x() = q.w() * qv.x() + q.y() * qv.z() - q.z() * qv.y(); - t.y() = q.w() * qv.y() - q.x() * qv.z() + q.z() * qv.x(); - t.z() = q.w() * qv.z() + q.x() * qv.y() - q.y() * qv.x(); - - // r = t * q_inv - vect<4> r; - r.w() = -t.x() * q_inv.x() - t.y() * q_inv.y() - t.z() * q_inv.z(); - r.x() = t.w() * q_inv.x() + t.y() * q_inv.z() - t.z() * q_inv.y(); - r.y() = t.w() * q_inv.y() - t.x() * q_inv.z() + t.z() * q_inv.x(); - r.z() = t.w() * q_inv.z() + t.x() * q_inv.y() - t.y() * q_inv.x(); - - return vect<3>({r.x(), r.y(), r.z()}); - }; - if constexpr (use_imu_for_velocity) { // Gravity in world frame diff --git a/rpcs3/Emu/Io/ps_move_data.h b/rpcs3/Emu/Io/ps_move_data.h index 1ae30f5c66..2470e47e8c 100644 --- a/rpcs3/Emu/Io/ps_move_data.h +++ b/rpcs3/Emu/Io/ps_move_data.h @@ -15,6 +15,26 @@ struct ps_move_data template const T& operator[](I i) const { return data[i]; } + vect operator*(f32 s) const + { + vect result = *this; + for (int i = 0; i < Size; ++i) + { + result[i] *= s; + } + return result; + } + + vect operator+(const vect& other) const + { + vect result = *this; + for (int i = 0; i < Size; ++i) + { + result[i] += other[i]; + } + return result; + } + T x() const requires (Size >= 1) { return data[0]; } T y() const requires (Size >= 2) { return data[1]; } T z() const requires (Size >= 3) { return data[2]; } @@ -72,4 +92,7 @@ struct ps_move_data void reset_sensors(); void update_orientation(f32 delta_time); void update_velocity(u64 timestamp, be_t pos_world[4]); + + // Rotate vector v by quaternion q + static vect<3> rotate_vector(const vect<4>& q, const vect<3>& v); }; diff --git a/rpcs3/Emu/Io/recording_config.h b/rpcs3/Emu/Io/recording_config.h index 127d24015f..ef73149f5e 100644 --- a/rpcs3/Emu/Io/recording_config.h +++ b/rpcs3/Emu/Io/recording_config.h @@ -13,13 +13,13 @@ struct cfg_recording final : cfg::node node_video(cfg::node* _this) : cfg::node(_this, "Video") {} cfg::uint<0, 60> framerate{this, "Framerate", 30}; - cfg::uint<0, 7680> width{this, "Width", 1280}; - cfg::uint<0, 4320> height{this, "Height", 720}; + cfg::uint<640, 7680> width{this, "Width", 1280}; + cfg::uint<360, 4320> height{this, "Height", 720}; cfg::uint<0, 192> pixel_format{this, "AVPixelFormat", 0}; // AVPixelFormat::AV_PIX_FMT_YUV420P cfg::uint<0, 0xFFFF> video_codec{this, "AVCodecID", 12}; // AVCodecID::AV_CODEC_ID_MPEG4 - cfg::uint<0, 25000000> video_bps{this, "Video Bitrate", 4000000}; - cfg::uint<0, 5> max_b_frames{this, "Max B-Frames", 2}; - cfg::uint<0, 20> gop_size{this, "Group of Pictures Size", 12}; + cfg::uint<1'000'000, 60'000'000> video_bps{this, "Video Bitrate", 4'000'000}; + cfg::uint<0, 3> max_b_frames{this, "Max B-Frames", 2}; + cfg::uint<1, 120> gop_size{this, "Group of Pictures Size", 30}; } video{ this }; @@ -28,7 +28,7 @@ struct cfg_recording final : cfg::node node_audio(cfg::node* _this) : cfg::node(_this, "Audio") {} cfg::uint<0x10000, 0x17000> audio_codec{this, "AVCodecID", 86018}; // AVCodecID::AV_CODEC_ID_AAC - cfg::uint<0, 25000000> audio_bps{this, "Audio Bitrate", 320000}; + cfg::uint<64'000, 320'000> audio_bps{this, "Audio Bitrate", 192'000}; } audio{ this }; diff --git a/rpcs3/Emu/NP/clans_client.cpp b/rpcs3/Emu/NP/clans_client.cpp index da3f506c91..c529d5ea38 100644 --- a/rpcs3/Emu/NP/clans_client.cpp +++ b/rpcs3/Emu/NP/clans_client.cpp @@ -128,12 +128,6 @@ void fmt_class_string::format(std::string& out, u64 arg namespace clan { - struct curl_memory - { - char* response; - size_t size; - }; - size_t clans_client::curl_write_callback(void* data, size_t size, size_t nmemb, void* clientp) { const size_t realsize = size * nmemb; @@ -464,7 +458,7 @@ namespace clan clan.append_child("ticket").text().set(ticket.c_str()); clan.append_child("id").text().set(clan_id); - const std::string jid_str = fmt::format(JID_FORMAT, np_id.handle.data); + const std::string jid_str = fmt::format(JID_FORMAT, np::npid_to_string(np_id)); clan.append_child("jid").text().set(jid_str.c_str()); pugi::xml_document response = pugi::xml_document(); @@ -656,7 +650,7 @@ namespace clan clan.append_child("ticket").text().set(ticket.c_str()); clan.append_child("id").text().set(clan_id); - const std::string jid_str = fmt::format(JID_FORMAT, np_id.handle.data); + const std::string jid_str = fmt::format(JID_FORMAT, np::npid_to_string(np_id)); clan.append_child("jid").text().set(jid_str.c_str()); pugi::xml_document response = pugi::xml_document(); @@ -674,7 +668,7 @@ namespace clan clan.append_child("ticket").text().set(ticket.c_str()); clan.append_child("id").text().set(clan_id); - const std::string jid_str = fmt::format(JID_FORMAT, np_id.handle.data); + const std::string jid_str = fmt::format(JID_FORMAT, np::npid_to_string(np_id)); clan.append_child("jid").text().set(jid_str.c_str()); pugi::xml_document response = pugi::xml_document(); @@ -832,7 +826,7 @@ namespace clan clan.append_child("ticket").text().set(ticket.c_str()); clan.append_child("id").text().set(clan_id); - const std::string jid_str = fmt::format(JID_FORMAT, np_id.handle.data); + const std::string jid_str = fmt::format(JID_FORMAT, np::npid_to_string(np_id)); clan.append_child("jid").text().set(jid_str.c_str()); pugi::xml_document response = pugi::xml_document(); @@ -850,7 +844,7 @@ namespace clan clan.append_child("ticket").text().set(ticket.c_str()); clan.append_child("id").text().set(clan_id); - const std::string jid_str = fmt::format(JID_FORMAT, np_id.handle.data); + const std::string jid_str = fmt::format(JID_FORMAT, np::npid_to_string(np_id)); clan.append_child("jid").text().set(jid_str.c_str()); pugi::xml_document response = pugi::xml_document(); @@ -868,7 +862,7 @@ namespace clan clan.append_child("ticket").text().set(ticket.c_str()); clan.append_child("id").text().set(clan_id); - const std::string jid_str = fmt::format(JID_FORMAT, np_id.handle.data); + const std::string jid_str = fmt::format(JID_FORMAT, np::npid_to_string(np_id)); clan.append_child("jid").text().set(jid_str.c_str()); pugi::xml_document response = pugi::xml_document(); @@ -902,7 +896,7 @@ namespace clan clan.append_child("id").text().set(clan_id); pugi::xml_node role = clan.append_child("onlinename"); - role.text().set(nph.get_npid().handle.data); + role.text().set(np::npid_to_string(nph.get_npid()).c_str()); pugi::xml_node description = clan.append_child("description"); description.text().set(info.description); @@ -990,7 +984,7 @@ namespace clan clan.append_child("ticket").text().set(ticket.c_str()); clan.append_child("id").text().set(clan_id); - const std::string jid_str = fmt::format(JID_FORMAT, np_id.handle.data); + const std::string jid_str = fmt::format(JID_FORMAT, np::npid_to_string(np_id)); clan.append_child("jid").text().set(jid_str.c_str()); pugi::xml_document response = pugi::xml_document(); @@ -1008,7 +1002,7 @@ namespace clan clan.append_child("ticket").text().set(ticket.c_str()); clan.append_child("id").text().set(clan_id); - const std::string jid_str = fmt::format(JID_FORMAT, np_id.handle.data); + const std::string jid_str = fmt::format(JID_FORMAT, np::npid_to_string(np_id)); clan.append_child("jid").text().set(jid_str.c_str()); pugi::xml_node role_node = clan.append_child("role"); diff --git a/rpcs3/Emu/NP/np_cache.cpp b/rpcs3/Emu/NP/np_cache.cpp index 04223da545..4d70cacaa0 100644 --- a/rpcs3/Emu/NP/np_cache.cpp +++ b/rpcs3/Emu/NP/np_cache.cpp @@ -125,7 +125,7 @@ namespace np rooms[room_id].opt_param = *sce_opt_param; } - std::pair> cache_manager::get_slots(SceNpMatching2RoomId room_id) + std::pair> cache_manager::get_slots(SceNpMatching2RoomId room_id) const { std::lock_guard lock(mutex); @@ -134,7 +134,7 @@ namespace np return {SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND, {}}; } - const auto& room = rooms[room_id]; + const auto& room = ::at32(rooms, room_id); SceNpMatching2RoomSlotInfo slots{}; @@ -166,7 +166,7 @@ namespace np return {CELL_OK, slots}; } - std::pair> cache_manager::get_memberids(u64 room_id, s32 sort_method) + std::pair> cache_manager::get_memberids(u64 room_id, s32 sort_method) const { std::lock_guard lock(mutex); @@ -175,7 +175,7 @@ namespace np return {SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND, {}}; } - const auto& room = rooms[room_id]; + const auto& room = ::at32(rooms, room_id); std::vector vec_memberids; @@ -211,7 +211,7 @@ namespace np return {CELL_OK, vec_memberids}; } - std::pair> cache_manager::get_password(SceNpMatching2RoomId room_id) + std::pair> cache_manager::get_password(SceNpMatching2RoomId room_id) const { std::lock_guard lock(mutex); @@ -220,15 +220,17 @@ namespace np return {SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND, {}}; } - if (!rooms[room_id].owner) + const auto& room = ::at32(rooms, room_id); + + if (!room.owner) { return {SCE_NP_MATCHING2_ERROR_NOT_ALLOWED, {}}; } - return {CELL_OK, rooms[room_id].password}; + return {CELL_OK, room.password}; } - std::pair> cache_manager::get_opt_param(SceNpMatching2RoomId room_id) + std::pair> cache_manager::get_opt_param(SceNpMatching2RoomId room_id) const { std::lock_guard lock(mutex); @@ -237,10 +239,10 @@ namespace np return {SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND, {}}; } - return {CELL_OK, rooms[room_id].opt_param}; + return {CELL_OK, ::at32(rooms, room_id).opt_param}; } - error_code cache_manager::get_member_and_attrs(SceNpMatching2RoomId room_id, SceNpMatching2RoomMemberId member_id, const std::vector& binattrs_list, SceNpMatching2RoomMemberDataInternal* ptr_member, u32 addr_data, u32 size_data, bool include_onlinename, bool include_avatarurl) + error_code cache_manager::get_member_and_attrs(SceNpMatching2RoomId room_id, SceNpMatching2RoomMemberId member_id, const std::vector& binattrs_list, SceNpMatching2RoomMemberDataInternal* ptr_member, u32 addr_data, u32 size_data, bool include_onlinename, bool include_avatarurl) const { std::lock_guard lock(mutex); @@ -249,7 +251,7 @@ namespace np return SCE_NP_MATCHING2_ERROR_ROOM_NOT_FOUND; } - if (!rooms[room_id].members.contains(member_id)) + if (!::at32(rooms, room_id).members.contains(member_id)) { return SCE_NP_MATCHING2_ERROR_ROOM_MEMBER_NOT_FOUND; } @@ -352,7 +354,7 @@ namespace np return not_an_error(needed_data_size); } - std::pair> cache_manager::get_npid(u64 room_id, u16 member_id) + std::pair> cache_manager::get_npid(u64 room_id, u16 member_id) const { std::lock_guard lock(mutex); @@ -371,13 +373,13 @@ namespace np return {CELL_OK, ::at32(::at32(rooms, room_id).members, member_id).userInfo.npId}; } - std::optional cache_manager::get_memberid(u64 room_id, const SceNpId& npid) + std::optional cache_manager::get_memberid(u64 room_id, const SceNpId& npid) const { std::lock_guard lock(mutex); if (!rooms.contains(room_id)) { - np_cache.error("np_cache::get_memberid cache miss room_id: room_id(%d)/npid(%s)", room_id, static_cast(npid.handle.data)); + np_cache.error("np_cache::get_memberid cache miss room_id: room_id(%d)/npid(%s)", room_id, np::npid_to_string(npid)); return std::nullopt; } @@ -389,7 +391,7 @@ namespace np return id; } - np_cache.error("np_cache::get_memberid cache miss member_id: room_id(%d)/npid(%s)", room_id, static_cast(npid.handle.data)); + np_cache.error("np_cache::get_memberid cache miss member_id: room_id(%d)/npid(%s)", room_id, np::npid_to_string(npid)); return std::nullopt; } diff --git a/rpcs3/Emu/NP/np_cache.h b/rpcs3/Emu/NP/np_cache.h index 8870f169a8..bd0bd8a736 100644 --- a/rpcs3/Emu/NP/np_cache.h +++ b/rpcs3/Emu/NP/np_cache.h @@ -74,16 +74,16 @@ namespace np void update_password(SceNpMatching2RoomId room_id, const std::optional& password); void update_opt_param(SceNpMatching2RoomId room_id, const SceNpMatching2SignalingOptParam* sce_opt_param); - std::pair> get_slots(SceNpMatching2RoomId room_id); - std::pair> get_memberids(u64 room_id, s32 sort_method); - std::pair> get_password(SceNpMatching2RoomId room_id); - std::pair> get_opt_param(SceNpMatching2RoomId room_id); - error_code get_member_and_attrs(SceNpMatching2RoomId room_id, SceNpMatching2RoomMemberId member_id, const std::vector& binattrs_list, SceNpMatching2RoomMemberDataInternal* ptr_member, u32 addr_data, u32 size_data, bool include_onlinename, bool include_avatarurl); - std::pair> get_npid(u64 room_id, u16 member_id); - std::optional get_memberid(u64 room_id, const SceNpId& npid); + std::pair> get_slots(SceNpMatching2RoomId room_id) const; + std::pair> get_memberids(u64 room_id, s32 sort_method) const; + std::pair> get_password(SceNpMatching2RoomId room_id) const; + std::pair> get_opt_param(SceNpMatching2RoomId room_id) const; + error_code get_member_and_attrs(SceNpMatching2RoomId room_id, SceNpMatching2RoomMemberId member_id, const std::vector& binattrs_list, SceNpMatching2RoomMemberDataInternal* ptr_member, u32 addr_data, u32 size_data, bool include_onlinename, bool include_avatarurl) const; + std::pair> get_npid(u64 room_id, u16 member_id) const; + std::optional get_memberid(u64 room_id, const SceNpId& npid) const; private: - shared_mutex mutex; + mutable shared_mutex mutex; std::map rooms; }; } // namespace np diff --git a/rpcs3/Emu/NP/np_contexts.cpp b/rpcs3/Emu/NP/np_contexts.cpp index f43d02c362..739a46368b 100644 --- a/rpcs3/Emu/NP/np_contexts.cpp +++ b/rpcs3/Emu/NP/np_contexts.cpp @@ -25,7 +25,7 @@ generic_async_transaction_context::~generic_async_transaction_context() } } -std::optional generic_async_transaction_context::get_transaction_status() +std::optional generic_async_transaction_context::get_transaction_status() const { std::lock_guard lock(mutex); return result; diff --git a/rpcs3/Emu/NP/np_contexts.h b/rpcs3/Emu/NP/np_contexts.h index 20e123730b..98ed36fa0a 100644 --- a/rpcs3/Emu/NP/np_contexts.h +++ b/rpcs3/Emu/NP/np_contexts.h @@ -20,12 +20,12 @@ struct generic_async_transaction_context generic_async_transaction_context(const SceNpCommunicationId& communicationId, const SceNpCommunicationPassphrase& passphrase, u64 timeout); - std::optional get_transaction_status(); + std::optional get_transaction_status() const; void abort_transaction(); error_code wait_for_completion(); void set_result_and_wake(error_code err); - shared_mutex mutex; + mutable shared_mutex mutex; std::condition_variable_any wake_cond, completion_cond; std::optional result; SceNpCommunicationId communicationId; diff --git a/rpcs3/Emu/NP/np_gui_cache.cpp b/rpcs3/Emu/NP/np_gui_cache.cpp index fee08356d5..1b85275d24 100644 --- a/rpcs3/Emu/NP/np_gui_cache.cpp +++ b/rpcs3/Emu/NP/np_gui_cache.cpp @@ -63,7 +63,7 @@ namespace np np_gui_cache.error("Cache mismatch: tried to remove a member but it wasn't in the room"); } - error_code gui_cache_manager::get_room_member_list(const SceNpRoomId& room_id, u32 buf_len, vm::ptr data) + error_code gui_cache_manager::get_room_member_list(const SceNpRoomId& room_id, u32 buf_len, vm::ptr data) const { std::lock_guard lock(mutex); diff --git a/rpcs3/Emu/NP/np_gui_cache.h b/rpcs3/Emu/NP/np_gui_cache.h index d742cc5749..431259df6b 100644 --- a/rpcs3/Emu/NP/np_gui_cache.h +++ b/rpcs3/Emu/NP/np_gui_cache.h @@ -45,10 +45,10 @@ namespace np void add_member(const SceNpRoomId& room_id, const SceNpMatchingRoomMember* user_info, bool new_member); void del_member(const SceNpRoomId& room_id, const SceNpMatchingRoomMember* user_info); - error_code get_room_member_list(const SceNpRoomId& room_id, u32 buf_len, vm::ptr data); + error_code get_room_member_list(const SceNpRoomId& room_id, u32 buf_len, vm::ptr data) const; private: - shared_mutex mutex; + mutable shared_mutex mutex; std::map rooms; }; } // namespace np diff --git a/rpcs3/Emu/NP/np_handler.cpp b/rpcs3/Emu/NP/np_handler.cpp index c4db3350c0..3eb2bef0ed 100644 --- a/rpcs3/Emu/NP/np_handler.cpp +++ b/rpcs3/Emu/NP/np_handler.cpp @@ -111,7 +111,7 @@ namespace np } ticket::ticket(std::vector&& raw_data) - : raw_data(raw_data) + : raw_data(std::move(raw_data)) { parse(); } @@ -254,7 +254,7 @@ namespace np // Trim null characters const auto& vec = node.data.data_vec; - auto it = std::find(vec.begin(), vec.end(), 0); + const auto it = std::find(vec.begin(), vec.end(), 0); return std::string(vec.begin(), it); } @@ -387,7 +387,7 @@ namespace np return; } - if (nodes[0].id != 0x3000 && nodes[1].id != 0x3002) + if (nodes[0].id != 0x3000 || nodes[1].id != 0x3002) { ticket_log.error("The 2 blobs ids are incorrect"); return; @@ -1024,7 +1024,7 @@ namespace np } } - nph_log.notice("basic_event: event:%d, from:%s(%s), size:%d", *event, static_cast(from->userId.handle.data), static_cast(from->name.data), *size); + nph_log.notice("basic_event: event:%d, from:%s(%s), size:%d", *event, np::npid_to_string(from->userId), static_cast(from->name.data), *size); return CELL_OK; } @@ -1228,16 +1228,22 @@ namespace np } auto messages = rpcn->get_new_messages(); - if (basic_handler_registered) + + for (const auto msg_id : messages) { - for (const auto msg_id : messages) + const auto opt_msg = rpcn->get_message(msg_id); + + if (!opt_msg) + { + continue; + } + + const auto& msg = opt_msg.value(); + const localized_string_id loc_id = (msg->second.mainType == SCE_NP_BASIC_MESSAGE_MAIN_TYPE_INVITE) ? localized_string_id::CELL_NP_MESSAGE_INVITE_RECEIVED : localized_string_id::CELL_NP_MESSAGE_OTHER_RECEIVED; + rsx::overlays::queue_message(get_localized_string(loc_id, msg->first.c_str()), 6'000'000); + + if (basic_handler_registered) { - const auto opt_msg = rpcn->get_message(msg_id); - if (!opt_msg) - { - continue; - } - const auto& msg = opt_msg.value(); if (strncmp(msg->second.commId.data, basic_handler.context.data, sizeof(basic_handler.context.data) - 1) == 0) { u32 event; @@ -1361,7 +1367,7 @@ namespace np player_history& np_handler::get_player_and_set_timestamp(const SceNpId& npid, u64 timestamp) { - std::string npid_str = std::string(npid.handle.data); + std::string npid_str = np::npid_to_string(npid); if (!players_history.contains(npid_str)) { @@ -1375,12 +1381,12 @@ namespace np return history; } - u32 np_handler::get_clan_ticket_ready() + u32 np_handler::get_clan_ticket_ready() const { return clan_ticket_ready.load(); } - ticket np_handler::get_clan_ticket() + ticket np_handler::get_clan_ticket() const { clan_ticket_ready.wait(0, atomic_wait_timeout{60'000'000'000}); // 60 seconds @@ -1441,7 +1447,7 @@ namespace np return req_id; } - u32 np_handler::get_players_history_count(u32 options) + u32 np_handler::get_players_history_count(u32 options) const { const bool all_history = (options == SCE_NP_BASIC_PLAYERS_HISTORY_OPTIONS_ALL); @@ -1459,7 +1465,7 @@ namespace np })); } - bool np_handler::get_player_history_entry(u32 options, u32 index, SceNpId* npid) + bool np_handler::get_player_history_entry(u32 options, u32 index, SceNpId* npid) const { const bool all_history = (options == SCE_NP_BASIC_PLAYERS_HISTORY_OPTIONS_ALL); @@ -1467,14 +1473,13 @@ namespace np if (all_history) { + if (index >= players_history.size()) + return false; + auto it = players_history.begin(); std::advance(it, index); - - if (it != players_history.end()) - { - string_to_npid(it->first, *npid); - return true; - } + string_to_npid(it->first, *npid); + return true; } else { @@ -1641,7 +1646,7 @@ namespace np return SCE_NP_BASIC_ERROR_NOT_CONNECTED; } - auto friend_infos = rpcn->get_friend_presence_by_npid(std::string(npid.handle.data)); + auto friend_infos = rpcn->get_friend_presence_by_npid(np::npid_to_string(npid)); if (!friend_infos) { return SCE_NP_BASIC_ERROR_INVALID_ARGUMENT; diff --git a/rpcs3/Emu/NP/np_handler.h b/rpcs3/Emu/NP/np_handler.h index 9c02007a9c..7e43bbf3cd 100644 --- a/rpcs3/Emu/NP/np_handler.h +++ b/rpcs3/Emu/NP/np_handler.h @@ -257,12 +257,12 @@ namespace np // Misc stuff void req_ticket(u32 version, const SceNpId* npid, const char* service_id, const u8* cookie, u32 cookie_size, const char* entitlement_id, u32 consumed_count); const ticket& get_ticket() const; - u32 get_clan_ticket_ready(); - ticket get_clan_ticket(); + u32 get_clan_ticket_ready() const; + ticket get_clan_ticket() const; void add_player_to_history(const SceNpId* npid, const char* description); u32 add_players_to_history(const SceNpId* npids, const char* description, u32 count); - u32 get_players_history_count(u32 options); - bool get_player_history_entry(u32 options, u32 index, SceNpId* npid); + u32 get_players_history_count(u32 options) const; + bool get_player_history_entry(u32 options, u32 index, SceNpId* npid) const; SceNpMatching2MemoryInfo get_memory_info() const; error_code abort_request(u32 req_id); @@ -518,7 +518,7 @@ namespace np player_history& get_player_and_set_timestamp(const SceNpId& npid, u64 timestamp); void save_players_history(); - shared_mutex mutex_history; + mutable shared_mutex mutex_history; std::map players_history; // npid / history struct diff --git a/rpcs3/Emu/NP/np_helpers.cpp b/rpcs3/Emu/NP/np_helpers.cpp index 79f61ca627..9d651693d9 100644 --- a/rpcs3/Emu/NP/np_helpers.cpp +++ b/rpcs3/Emu/NP/np_helpers.cpp @@ -20,7 +20,7 @@ namespace np return std::string(ip_str); } - std::string ether_to_string(std::array& ether) + std::string ether_to_string(const std::array& ether) { return fmt::format("%02X:%02X:%02X:%02X:%02X:%02X", ether[0], ether[1], ether[2], ether[3], ether[4], ether[5]); } @@ -89,6 +89,13 @@ namespace np // npid->reserved[0] = 1; } + std::string npid_to_string(const SceNpId& npid) + { + char npid_str[17]{}; + std::memcpy(npid_str, npid.handle.data, 16); + return std::string(npid_str); + } + void string_to_online_name(std::string_view str, SceNpOnlineName& online_name) { memset(&online_name, 0, sizeof(online_name)); @@ -103,7 +110,7 @@ namespace np bool is_valid_npid(const SceNpId& npid) { - if (!std::all_of(npid.handle.data, npid.handle.data + 16, [](char c) { return std::isalnum(c) || c == '-' || c == '_' || c == 0; } ) + if (!std::all_of(npid.handle.data, npid.handle.data + 16, [](char c) { return std::isalnum(static_cast(c)) || c == '-' || c == '_' || c == 0; } ) || npid.handle.data[16] != 0 || !std::all_of(npid.handle.dummy, npid.handle.dummy + 3, [](char val) { return val == 0; }) ) { diff --git a/rpcs3/Emu/NP/np_helpers.h b/rpcs3/Emu/NP/np_helpers.h index c33b4ca001..d9e2a9d076 100644 --- a/rpcs3/Emu/NP/np_helpers.h +++ b/rpcs3/Emu/NP/np_helpers.h @@ -7,12 +7,13 @@ namespace np { std::string ip_to_string(u32 addr); - std::string ether_to_string(std::array& ether); + std::string ether_to_string(const std::array& ether); bool validate_communication_id(const SceNpCommunicationId& com_id); std::string communication_id_to_string(const SceNpCommunicationId& communicationId); std::optional string_to_communication_id(std::string_view str); void string_to_npid(std::string_view str, SceNpId& npid); + std::string npid_to_string(const SceNpId& npid); void string_to_online_name(std::string_view str, SceNpOnlineName& online_name); void string_to_avatar_url(std::string_view str, SceNpAvatarUrl& avatar_url); void strings_to_userinfo(std::string_view npid, std::string_view online_name, std::string_view avatar_url, SceNpUserInfo& user_info); diff --git a/rpcs3/Emu/NP/np_notifications.cpp b/rpcs3/Emu/NP/np_notifications.cpp index d5756a8351..9b64882f0f 100644 --- a/rpcs3/Emu/NP/np_notifications.cpp +++ b/rpcs3/Emu/NP/np_notifications.cpp @@ -41,7 +41,7 @@ namespace np return; } - rpcn_log.notice("Received notification that user %s(%d) joined the room(%d)", notif_data->roomMemberDataInternal->userInfo.npId.handle.data, notif_data->roomMemberDataInternal->memberId, room_id); + rpcn_log.notice("Received notification that user %s(%d) joined the room(%d)", np::npid_to_string(notif_data->roomMemberDataInternal->userInfo.npId), notif_data->roomMemberDataInternal->memberId, room_id); extra_nps::print_SceNpMatching2RoomMemberDataInternal(notif_data->roomMemberDataInternal.get_ptr()); // We initiate signaling if necessary @@ -54,7 +54,7 @@ namespace np const u16 member_id = notif_data->roomMemberDataInternal->memberId; const SceNpId& npid = notif_data->roomMemberDataInternal->userInfo.npId; - rpcn_log.notice("Join notification told to connect to member(%d=%s) of room(%d): %s:%d", member_id, reinterpret_cast(npid.handle.data), room_id, ip_to_string(addr_p2p), port_p2p); + rpcn_log.notice("Join notification told to connect to member(%d=%s) of room(%d): %s:%d", member_id, np::npid_to_string(npid), room_id, ip_to_string(addr_p2p), port_p2p); // Attempt Signaling auto& sigh = g_fxo->get>(); @@ -98,7 +98,7 @@ namespace np return; } - rpcn_log.notice("Received notification that user %s(%d) left the room(%d)", notif_data->roomMemberDataInternal->userInfo.npId.handle.data, notif_data->roomMemberDataInternal->memberId, room_id); + rpcn_log.notice("Received notification that user %s(%d) left the room(%d)", np::npid_to_string(notif_data->roomMemberDataInternal->userInfo.npId), notif_data->roomMemberDataInternal->memberId, room_id); extra_nps::print_SceNpMatching2RoomMemberDataInternal(notif_data->roomMemberDataInternal.get_ptr()); if (room_event_cb) @@ -204,7 +204,7 @@ namespace np return; } - rpcn_log.notice("Received notification that user's %s(%d) room (%d) data was updated", notif_data->newRoomMemberDataInternal->userInfo.npId.handle.data, notif_data->newRoomMemberDataInternal->memberId, room_id); + rpcn_log.notice("Received notification that user's %s(%d) room (%d) data was updated", np::npid_to_string(notif_data->newRoomMemberDataInternal->userInfo.npId), notif_data->newRoomMemberDataInternal->memberId, room_id); extra_nps::print_SceNpMatching2RoomMemberDataInternal(notif_data->newRoomMemberDataInternal.get_ptr()); if (room_event_cb) @@ -381,7 +381,10 @@ namespace np auto ctx = get_matching_context(ctx_id); if (!ctx) + { + np_memory.free(edata.addr()); return; + } gui_cache.add_room(room_info->room_status.id); diff --git a/rpcs3/Emu/NP/np_requests.cpp b/rpcs3/Emu/NP/np_requests.cpp index 3f21f24cd3..d9dd6dc337 100644 --- a/rpcs3/Emu/NP/np_requests.cpp +++ b/rpcs3/Emu/NP/np_requests.cpp @@ -191,7 +191,7 @@ namespace np case rpcn::ErrorType::RoomGroupMaxSlotMismatch: error_code = SCE_NP_MATCHING2_SERVER_ERROR_MAX_OVER_SLOT_GROUP; break; case rpcn::ErrorType::RoomPasswordMissing: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_PASSWORD; break; case rpcn::ErrorType::RoomGroupNoJoinLabel: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_JOIN_GROUP_LABEL; break; - default: fmt::throw_exception("Unexpected error in reply to CreateRoom: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to CreateRoom: %s", error); } if (error_code != CELL_OK) @@ -262,7 +262,7 @@ namespace np case rpcn::ErrorType::RoomPasswordMismatch: error_code = SCE_NP_MATCHING2_SERVER_ERROR_PASSWORD_MISMATCH; break; case rpcn::ErrorType::RoomGroupFull: error_code = SCE_NP_MATCHING2_SERVER_ERROR_GROUP_FULL; break; case rpcn::ErrorType::RoomGroupJoinLabelNotFound: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_GROUP; break; - default: fmt::throw_exception("Unexpected error in reply to JoinRoom: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to JoinRoom: %s", error); } if (error_code != 0) @@ -310,7 +310,7 @@ namespace np if (npid_res != CELL_OK) continue; - rpcn_log.notice("JoinRoomResult told to connect to member(%d=%s) of room(%d): %s:%d", member_id, reinterpret_cast(npid_p2p->handle.data), room_id, ip_to_string(addr_p2p), port_p2p); + rpcn_log.notice("JoinRoomResult told to connect to member(%d=%s) of room(%d): %s:%d", member_id, np::npid_to_string(*npid_p2p), room_id, ip_to_string(addr_p2p), port_p2p); // Attempt Signaling auto& sigh = g_fxo->get>(); @@ -348,7 +348,7 @@ namespace np case rpcn::ErrorType::NoError: break; case rpcn::ErrorType::NotFound: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM; break; // Unsure if this should return another error(missing user in room has no appropriate error code) case rpcn::ErrorType::RoomMissing: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM; break; - default: fmt::throw_exception("Unexpected error in reply to LeaveRoom: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to LeaveRoom: %s", error); } if (error_code != CELL_OK) @@ -447,7 +447,7 @@ namespace np u32 np_handler::get_room_member_data_external_list(SceNpMatching2ContextId ctx_id, vm::cptr optParam, const SceNpMatching2GetRoomMemberDataExternalListRequest* req) { - const u32 req_id = generate_callback_info(ctx_id, optParam, SCE_NP_MATCHING2_REQUEST_EVENT_GetRoomDataExternalList, true); + const u32 req_id = generate_callback_info(ctx_id, optParam, SCE_NP_MATCHING2_REQUEST_EVENT_GetRoomMemberDataExternalList, true); if (!get_rpcn()->get_room_member_data_external_list(req_id, get_match2_context(ctx_id)->communicationId, req->roomId)) { @@ -465,13 +465,18 @@ namespace np if (!cb_info_opt) return; - ensure(error == rpcn::ErrorType::NoError, "Unexpected error in GetRoomMemberDataExternalList reply"); - - if (error == rpcn::ErrorType::RoomMissing) + switch (error) + { + case rpcn::ErrorType::NoError: + break; + case rpcn::ErrorType::RoomMissing: { cb_info_opt->queue_callback(req_id, 0, SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM, 0); return; } + default: + fmt::throw_exception("Unexpected error in GetRoomMemberDataExternalList reply: %s", error); + } const auto resp = reply.get_protobuf(); ensure(!reply.is_error(), "Malformed reply to GetRoomMemberDataExternalList command"); @@ -518,7 +523,7 @@ namespace np case rpcn::ErrorType::NoError: break; case rpcn::ErrorType::RoomMissing: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM; break; case rpcn::ErrorType::Unauthorized: error_code = SCE_NP_MATCHING2_SERVER_ERROR_FORBIDDEN; break; - default: fmt::throw_exception("Unexpected error in reply to SetRoomDataExternal: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to SetRoomDataExternal: %s", error); } cb_info_opt->queue_callback(req_id, 0, error_code, 0); @@ -550,7 +555,7 @@ namespace np { case rpcn::ErrorType::NoError: break; case rpcn::ErrorType::RoomMissing: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM; break; - default: fmt::throw_exception("Unexpected error in reply to GetRoomDataInternal: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to GetRoomDataInternal: %s", error); } if (error_code != CELL_OK) @@ -606,7 +611,7 @@ namespace np { case rpcn::ErrorType::NoError: break; case rpcn::ErrorType::RoomMissing: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM; break; - default: fmt::throw_exception("Unexpected error in reply to GetRoomDataInternal: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to SetRoomDataInternal: %s", error); } cb_info_opt->queue_callback(req_id, 0, error_code, 0); @@ -640,7 +645,7 @@ namespace np case rpcn::ErrorType::NoError: break; case rpcn::ErrorType::RoomMissing: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM; break; case rpcn::ErrorType::NotFound: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_USER; break; - default: fmt::throw_exception("Unexpected error in reply to GetRoomMemberDataInternal: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to GetRoomMemberDataInternal: %s", error); } if (error_code != CELL_OK) @@ -694,7 +699,7 @@ namespace np case rpcn::ErrorType::RoomMissing: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM; break; case rpcn::ErrorType::NotFound: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_USER; break; case rpcn::ErrorType::Unauthorized: error_code = SCE_NP_MATCHING2_SERVER_ERROR_FORBIDDEN; break; - default: fmt::throw_exception("Unexpected error in reply to SetRoomMemberDataInternal: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to SetRoomMemberDataInternal: %s", error); } cb_info_opt->queue_callback(req_id, 0, error_code, 0); @@ -723,7 +728,7 @@ namespace np switch (error) { case rpcn::ErrorType::NoError: break; - default: fmt::throw_exception("Unexpected error in reply to SetUserInfo: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to SetUserInfo: %s", error); } cb_info_opt->queue_callback(req_id, 0, 0, 0); @@ -755,7 +760,7 @@ namespace np { case rpcn::ErrorType::NoError: break; case rpcn::ErrorType::RoomMissing: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM; break; - default: fmt::throw_exception("Unexpected error in reply to PingRoomOwner: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to PingRoomOwner: %s", error); } if (error_code != CELL_OK) @@ -803,7 +808,7 @@ namespace np case rpcn::ErrorType::NoError: break; case rpcn::ErrorType::RoomMissing: error_code = SCE_NP_MATCHING2_SERVER_ERROR_NO_SUCH_ROOM; break; case rpcn::ErrorType::Unauthorized: error_code = SCE_NP_MATCHING2_SERVER_ERROR_FORBIDDEN; break; - default: fmt::throw_exception("Unexpected error in reply to SendRoomMessage: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to SendRoomMessage: %s", error); } cb_info_opt->queue_callback(req_id, 0, error_code, 0); @@ -841,7 +846,7 @@ namespace np rpcn_log.error("Signaling information was requested for a user that doesn't exist or is not online"); return; } - default: fmt::throw_exception("Unexpected error in reply to RequestSignalingInfos: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to RequestSignalingInfos: %s", error); } const auto resp = reply.get_protobuf(); @@ -861,9 +866,7 @@ namespace np const u32 req_id = generate_callback_info(ctx_id, optParam, SCE_NP_MATCHING2_REQUEST_EVENT_GetLobbyInfoList, false); auto cb_info_opt = take_pending_request(req_id); - - if (!cb_info_opt) - return true; + ensure (cb_info_opt); const u32 event_key = get_event_key(); @@ -920,19 +923,19 @@ namespace np ensure(!reply.is_error(), "Malformed reply to RequestTicket command"); auto incoming_ticket = ticket(std::move(ticket_raw)); - + // Clans: check if ticket belongs to the clan service. // If so, hijack the ticket and cache it for future use. if (incoming_ticket.get_service_id() == CLANS_SERVICE_ID) { - clan_ticket = incoming_ticket; + clan_ticket = std::move(incoming_ticket); clan_ticket_ready.store(1); clan_ticket_ready.notify_all(); - + return; } - current_ticket = incoming_ticket; + current_ticket = std::move(incoming_ticket); auto ticket_size = static_cast(current_ticket.size()); if (manager_cb) @@ -1017,7 +1020,7 @@ namespace np { case rpcn::ErrorType::NoError: break; case rpcn::ErrorType::NotFound: error_code = SCE_NP_COMMUNITY_SERVER_ERROR_RANKING_BOARD_MASTER_NOT_FOUND; break; - default: fmt::throw_exception("Unexpected error in reply to GetBoardInfos: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to GetBoardInfos: %s", error); } if (error_code != CELL_OK) @@ -1091,7 +1094,7 @@ namespace np score_trans->wake_cond.notify_one(); return; } - default: fmt::throw_exception("Unexpected error in reply_record_score: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply_record_score: %s", error); } auto tmp_rank = reply.get(); @@ -1154,7 +1157,7 @@ namespace np case rpcn::ErrorType::NotFound: trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_RANKING_STORE_NOT_FOUND); break; case rpcn::ErrorType::ScoreInvalid: trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_INVALID_SCORE); break; case rpcn::ErrorType::ScoreHasData: trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_GAME_DATA_ALREADY_EXISTS); break; - default: fmt::throw_exception("Unexpected error in reply to RecordScoreData: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to RecordScoreData: %s", error); } } @@ -1207,7 +1210,7 @@ namespace np { case rpcn::ErrorType::NoError: break; case rpcn::ErrorType::NotFound: score_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_RANKING_GAME_DATA_MASTER_NOT_FOUND); return; - default: fmt::throw_exception("Unexpected error in reply to GetScoreData: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to GetScoreData: %s", error); } auto* tdata = std::get_if(&score_trans->tdata); @@ -1287,7 +1290,7 @@ namespace np switch (error) { case rpcn::ErrorType::NoError: break; - default: fmt::throw_exception("Unexpected error in GetScoreResponse: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in GetScoreResponse: %s", error); } const auto resp = reply.get_protobuf(); @@ -1476,7 +1479,7 @@ namespace np case rpcn::ErrorType::NotFound: trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_USER_NOT_ASSIGNED); break; case rpcn::ErrorType::Unauthorized: trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_FORBIDDEN); break; case rpcn::ErrorType::CondFail: trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_CONDITIONS_NOT_SATISFIED); break; - default: fmt::throw_exception("Unexpected error in handle_tus_no_data: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in handle_tus_no_data: %s", error); } } @@ -1499,7 +1502,7 @@ namespace np case rpcn::ErrorType::NotFound: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_USER_NOT_ASSIGNED); case rpcn::ErrorType::Unauthorized: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_FORBIDDEN); case rpcn::ErrorType::CondFail: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_CONDITIONS_NOT_SATISFIED); - default: fmt::throw_exception("Unexpected error in handle_TusVarResponse: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in handle_TusVarResponse: %s", error); } const auto resp = reply.get_protobuf(); @@ -1555,7 +1558,7 @@ namespace np case rpcn::ErrorType::NotFound: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_USER_NOT_ASSIGNED); case rpcn::ErrorType::Unauthorized: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_FORBIDDEN); case rpcn::ErrorType::CondFail: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_CONDITIONS_NOT_SATISFIED); - default: fmt::throw_exception("Unexpected error in handle_TusVariable: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in handle_TusVariable: %s", error); } auto pb_var = reply.get_protobuf(); @@ -1603,7 +1606,7 @@ namespace np case rpcn::ErrorType::NotFound: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_USER_NOT_ASSIGNED); case rpcn::ErrorType::Unauthorized: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_FORBIDDEN); case rpcn::ErrorType::CondFail: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_CONDITIONS_NOT_SATISFIED); - default: fmt::throw_exception("Unexpected error in handle_TusDataStatusResponse: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in handle_TusDataStatusResponse: %s", error); } const auto resp = reply.get_protobuf(); @@ -1803,7 +1806,7 @@ namespace np if (!tdata) { trans_ctx->tdata = tdata_tus_get_data{.recvSize = recvSize, .dataStatus = dataStatus, .data = data}; - const u32 req_id = get_req_id(REQUEST_ID_HIGH::SCORE); + const u32 req_id = get_req_id(REQUEST_ID_HIGH::TUS); get_rpcn()->tus_get_data(req_id, trans_ctx->communicationId, targetNpId, slotId, vuser); transaction_async_handler(std::move(lock), trans_ctx, req_id, async); return; @@ -1844,7 +1847,7 @@ namespace np case rpcn::ErrorType::NotFound: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_USER_NOT_ASSIGNED); case rpcn::ErrorType::Unauthorized: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_FORBIDDEN); case rpcn::ErrorType::CondFail: return tus_trans->set_result_and_wake(SCE_NP_COMMUNITY_SERVER_ERROR_CONDITIONS_NOT_SATISFIED); - default: fmt::throw_exception("Unexpected error in reply to TusGetData: %d", static_cast(error)); + default: fmt::throw_exception("Unexpected error in reply to TusGetData: %s", error); } auto pb_data = reply.get_protobuf(); @@ -1870,6 +1873,7 @@ namespace np data_status->data = tdata->data; data_status->dataSize = ::narrow(pb_data->data().size()); data_status->info.infoSize = ::narrow(pb_status.info().size()); + memcpy(data_status->info.data, pb_data->status().info().data(), std::min(pb_data->status().info().size(), sizeof(data_status->info.data))); const u32 to_copy = std::min(data_status->dataSize, tdata->recvSize); memcpy(data, pb_data->data().data(), to_copy); diff --git a/rpcs3/Emu/NP/np_requests_gui.cpp b/rpcs3/Emu/NP/np_requests_gui.cpp index be6026afa6..5e38a57e77 100644 --- a/rpcs3/Emu/NP/np_requests_gui.cpp +++ b/rpcs3/Emu/NP/np_requests_gui.cpp @@ -303,7 +303,11 @@ namespace np gui_cache.del_room(room_status->id); - gui_notifications.list.emplace(std::make_pair(gui_notifications.current_gui_ctx_id, req_id), gui_notification{.event = SCE_NP_MATCHING_EVENT_LEAVE_ROOM_DONE, .edata = std::move(edata)}); + { + std::lock_guard lock(gui_notifications.mutex); + gui_notifications.list.emplace(std::make_pair(gui_notifications.current_gui_ctx_id, req_id), gui_notification{.event = SCE_NP_MATCHING_EVENT_LEAVE_ROOM_DONE, .edata = std::move(edata)}); + } + ctx->queue_callback(req_id, SCE_NP_MATCHING_EVENT_LEAVE_ROOM_DONE, 0); } @@ -453,7 +457,11 @@ namespace np extra_nps::print_SceNpMatchingRoom(room_info); - gui_notifications.list.emplace(std::make_pair(gui_notifications.current_gui_ctx_id, req_id), gui_notification{.event = SCE_NP_MATCHING_EVENT_GET_ROOM_SEARCH_FLAG_DONE, .edata = std::move(edata)}); + { + std::lock_guard lock(gui_notifications.mutex); + gui_notifications.list.emplace(std::make_pair(gui_notifications.current_gui_ctx_id, req_id), gui_notification{.event = SCE_NP_MATCHING_EVENT_GET_ROOM_SEARCH_FLAG_DONE, .edata = std::move(edata)}); + } + ctx->queue_callback(req_id, SCE_NP_MATCHING_EVENT_GET_ROOM_SEARCH_FLAG_DONE, 0); } @@ -548,7 +556,11 @@ namespace np extra_nps::print_SceNpMatchingRoom(room_info); - gui_notifications.list.emplace(std::make_pair(gui_notifications.current_gui_ctx_id, req_id), gui_notification{.event = SCE_NP_MATCHING_EVENT_GET_ROOM_INFO_DONE, .edata = std::move(edata)}); + { + std::lock_guard lock(gui_notifications.mutex); + gui_notifications.list.emplace(std::make_pair(gui_notifications.current_gui_ctx_id, req_id), gui_notification{.event = SCE_NP_MATCHING_EVENT_GET_ROOM_INFO_DONE, .edata = std::move(edata)}); + } + ctx->queue_callback(req_id, SCE_NP_MATCHING_EVENT_GET_ROOM_INFO_DONE, 0); } @@ -581,9 +593,13 @@ namespace np SceNpRoomId room_id{}; ensure(!resp->id().empty() && resp->id().size() == sizeof(SceNpRoomId::opt)); + ctx->wakey = 0; std::memcpy(room_id.opt, resp->id().data(), sizeof(SceNpRoomId::opt)); - const auto [_, inserted] = pending_quickmatching.insert_or_assign(room_id, ctx->ctx_id); - ensure(inserted); + { + std::lock_guard lock(this->mutex_quickmatching); + const auto [_, inserted] = pending_quickmatching.insert_or_assign(room_id, ctx->ctx_id); + ensure(inserted); + } // Now that the reply has been received, we start the wait for the notification ctx->thread = std::make_unique>>("NP GUI Timeout Worker", [ctx, req_id, this](SceNpRoomId room_id) @@ -615,7 +631,6 @@ namespace np } }); - ctx->wakey = 0; auto& thread = *ctx->thread; thread(room_id); } diff --git a/rpcs3/Emu/NP/np_structs_extra.cpp b/rpcs3/Emu/NP/np_structs_extra.cpp index 58770c450f..6774b20bc4 100644 --- a/rpcs3/Emu/NP/np_structs_extra.cpp +++ b/rpcs3/Emu/NP/np_structs_extra.cpp @@ -2,6 +2,7 @@ #include "stdafx.h" #include #include "np_structs_extra.h" +#include "np_helpers.h" LOG_CHANNEL(sceNp); LOG_CHANNEL(sceNp2); @@ -13,7 +14,7 @@ namespace extra_nps void print_SceNpUserInfo2(const SceNpUserInfo2* user) { sceNp2.warning("SceNpUserInfo2:"); - sceNp2.warning("npid: %s", static_cast(user->npId.handle.data)); + sceNp2.warning("npid: %s", np::npid_to_string(user->npId)); sceNp2.warning("onlineName: *0x%x(%s)", user->onlineName, user->onlineName ? static_cast(user->onlineName->data) : ""); sceNp2.warning("avatarUrl: *0x%x(%s)", user->avatarUrl, user->avatarUrl ? static_cast(user->avatarUrl->data) : ""); } @@ -208,7 +209,7 @@ namespace extra_nps { sceNp2.warning("SceNpMatching2RoomMemberDataInternal:"); sceNp2.warning("next: *0x%x", member->next); - sceNp2.warning("npId: %s", member->userInfo.npId.handle.data); + sceNp2.warning("npId: %s", np::npid_to_string(member->userInfo.npId)); sceNp2.warning("onlineName: %s", member->userInfo.onlineName ? member->userInfo.onlineName->data : ""); sceNp2.warning("avatarUrl: %s", member->userInfo.avatarUrl ? member->userInfo.avatarUrl->data : ""); sceNp2.warning("joinDate: %lld", member->joinDate.tick); @@ -460,7 +461,7 @@ namespace extra_nps void print_SceNpScoreRankData(const SceNpScoreRankData* data) { sceNp.warning("sceNpScoreRankData:"); - sceNp.warning("npId: %s", static_cast(data->npId.handle.data)); + sceNp.warning("npId: %s", np::npid_to_string(data->npId)); sceNp.warning("onlineName: %s", static_cast(data->onlineName.data)); sceNp.warning("pcId: %d", data->pcId); sceNp.warning("serialRank: %d", data->serialRank); @@ -474,7 +475,7 @@ namespace extra_nps void print_SceNpScoreRankData_deprecated(const SceNpScoreRankData_deprecated* data) { sceNp.warning("sceNpScoreRankData_deprecated:"); - sceNp.warning("npId: %s", static_cast(data->npId.handle.data)); + sceNp.warning("npId: %s", np::npid_to_string(data->npId)); sceNp.warning("onlineName: %s", static_cast(data->onlineName.data)); sceNp.warning("serialRank: %d", data->serialRank); sceNp.warning("rank: %d", data->rank); @@ -542,7 +543,7 @@ namespace extra_nps void print_SceNpUserInfo(const SceNpUserInfo* data) { - sceNp.warning("userId: %s", data->userId.handle.data); + sceNp.warning("userId: %s", np::npid_to_string(data->userId)); sceNp.warning("name: %s", data->name.data); sceNp.warning("icon: %s", data->icon.data); } @@ -576,7 +577,7 @@ namespace extra_nps if (data->kick_actor) { - sceNp.warning("kick_actor: %s", data->kick_actor->handle.data); + sceNp.warning("kick_actor: %s", np::npid_to_string(*data->kick_actor)); } sceNp.warning("opt: 0x%x", data->kick_actor); diff --git a/rpcs3/Emu/NP/pb_helpers.cpp b/rpcs3/Emu/NP/pb_helpers.cpp index 41b0ca4579..3debd0ba20 100644 --- a/rpcs3/Emu/NP/pb_helpers.cpp +++ b/rpcs3/Emu/NP/pb_helpers.cpp @@ -263,7 +263,7 @@ namespace np for (u32 i = 0; i < room_info->memberList.membersNum; i++) { SceNpMatching2RoomMemberDataInternal* sce_member = &room_info->memberList.members[i]; - if (strcmp(sce_member->userInfo.npId.handle.data, npid.handle.data) == 0) + if (strncmp(sce_member->userInfo.npId.handle.data, npid.handle.data, 16) == 0) { room_info->memberList.me = room_info->memberList.members + i; edata.add_relocation(room_info->memberList.me); @@ -532,8 +532,8 @@ namespace np if (!mi.msg().empty()) { - sce_mi->msgLen = ::narrow(mi.msg().size()); - auto* ptr_msg_data = static_cast(edata.allocate(mi.msg().size(), sce_mi->msg)); + sce_mi->msgLen = ::size32(mi.msg()); + auto* ptr_msg_data = static_cast(edata.allocate(::size32(mi.msg()), sce_mi->msg)); memcpy(ptr_msg_data, mi.msg().data(), mi.msg().size()); } } @@ -575,8 +575,8 @@ namespace np if (!resp.opt().empty()) { - room_status->opt_len = ::narrow(resp.opt().size()); - u8* opt_data = static_cast(edata.allocate(resp.opt().size(), room_status->opt)); + room_status->opt_len = ::size32(resp.opt()); + u8* opt_data = static_cast(edata.allocate(::size32(resp.opt()), room_status->opt)); memcpy(opt_data, resp.opt().data(), resp.opt().size()); } @@ -604,8 +604,8 @@ namespace np cur_attr->id = attr.attr_id(); if (!attr.data().empty()) { - cur_attr->value.data.size = ::narrow(attr.data().size()); - u8* data_ptr = static_cast(edata.allocate(attr.data().size(), cur_attr->value.data.ptr)); + cur_attr->value.data.size = ::size32(attr.data()); + u8* data_ptr = static_cast(edata.allocate(::size32(attr.data()), cur_attr->value.data.ptr)); memcpy(data_ptr, attr.data().data(), attr.data().size()); } else diff --git a/rpcs3/Emu/NP/rpcn_client.cpp b/rpcs3/Emu/NP/rpcn_client.cpp index d7bb274c66..96ab505abf 100644 --- a/rpcs3/Emu/NP/rpcn_client.cpp +++ b/rpcs3/Emu/NP/rpcn_client.cpp @@ -896,7 +896,7 @@ namespace rpcn return error_and_disconnect("Failed to send all the bytes"); } - res = 0; + continue; } n_sent += res; } @@ -1055,6 +1055,8 @@ namespace rpcn found = found->ai_next; } + freeaddrinfo(addr_info); + if (!found_ipv4) { rpcn_log.error("connect: Failed to find IPv4 for %s", host); @@ -1156,7 +1158,7 @@ namespace rpcn if (!connected || terminate) { state = rpcn_state::failure_other; - return true; + return false; } if (received_version != RPCN_PROTOCOL_VERSION) @@ -1487,7 +1489,7 @@ namespace rpcn if (error == ErrorType::NoError) rpcn_log.success("add_friend(\"%s\") succeeded", friend_username); else - rpcn_log.error("add_friend(\"%s\") failed with error: %s", error); + rpcn_log.error("add_friend(\"%s\") failed with error: %s", friend_username, error); return error; } @@ -1754,7 +1756,7 @@ namespace rpcn { continue; } - pb_req.add_alloweduser(req->allowedUser[i].handle.data); + pb_req.add_alloweduser(np::npid_to_string(req->allowedUser[i])); } } @@ -1766,7 +1768,7 @@ namespace rpcn { continue; } - pb_req.add_blockeduser(req->blockedUser[i].handle.data); + pb_req.add_blockeduser(np::npid_to_string(req->blockedUser[i])); } } @@ -2265,7 +2267,7 @@ namespace rpcn for (usz i = 0; i < npids.size(); i++) { auto* npid_entry = pb_req.add_npids(); - npid_entry->set_npid(static_cast(npids[i].first.handle.data)); + npid_entry->set_npid(np::npid_to_string(npids[i].first)); npid_entry->set_pcid(npids[i].second); } @@ -2316,7 +2318,7 @@ namespace rpcn { np2_structs::GetScoreGameDataRequest pb_req; pb_req.set_boardid(board_id); - pb_req.set_npid(reinterpret_cast(npid.handle.data)); + pb_req.set_npid(np::npid_to_string(npid)); pb_req.set_pcid(pc_id); std::string serialized; @@ -2412,7 +2414,7 @@ namespace rpcn if (option->isLastChangedAuthorId) { - pb_req.set_islastchangedauthorid(option->isLastChangedAuthorId->handle.data); + pb_req.set_islastchangedauthorid(np::npid_to_string(*option->isLastChangedAuthorId)); } } @@ -2441,7 +2443,7 @@ namespace rpcn if (option->isLastChangedAuthorId) { - pb_req.set_islastchangedauthorid(option->isLastChangedAuthorId->handle.data); + pb_req.set_islastchangedauthorid(np::npid_to_string(*option->isLastChangedAuthorId)); } if (option->compareValue) @@ -2497,7 +2499,7 @@ namespace rpcn if (option->isLastChangedAuthorId) { - pb_req.set_islastchangedauthorid(option->isLastChangedAuthorId->handle.data); + pb_req.set_islastchangedauthorid(np::npid_to_string(*option->isLastChangedAuthorId)); } } @@ -3083,7 +3085,7 @@ namespace rpcn } case NotificationType::FriendPresenceChanged: { - const std::string username = vdata.get_string(true); + const std::string username = vdata.get_string(false); SceNpCommunicationId pr_com_id = vdata.get_com_id(); std::string pr_title = fmt::truncate(vdata.get_string(true), SCE_NP_BASIC_PRESENCE_TITLE_SIZE_MAX - 1); std::string pr_status = fmt::truncate(vdata.get_string(true), SCE_NP_BASIC_PRESENCE_EXTENDED_STATUS_SIZE_MAX - 1); @@ -3178,7 +3180,7 @@ namespace rpcn } } - std::optional>> rpcn_client::get_message(u64 id) + std::optional>> rpcn_client::get_message(u64 id) const { { std::lock_guard lock(mutex_messages); @@ -3236,21 +3238,21 @@ namespace rpcn active_messages.erase(id); } - u32 rpcn_client::get_num_friends() + u32 rpcn_client::get_num_friends() const { std::lock_guard lock(mutex_friends); return ::size32(friend_infos.friends); } - u32 rpcn_client::get_num_blocks() + u32 rpcn_client::get_num_blocks() const { std::lock_guard lock(mutex_friends); return ::size32(friend_infos.blocked); } - std::optional rpcn_client::get_friend_by_index(u32 index) + std::optional rpcn_client::get_friend_by_index(u32 index) const { std::lock_guard lock(mutex_friends); @@ -3268,7 +3270,7 @@ namespace rpcn return it->first; } - std::optional> rpcn_client::get_friend_presence_by_index(u32 index) + std::optional> rpcn_client::get_friend_presence_by_index(u32 index) const { std::lock_guard lock(mutex_friends); @@ -3282,7 +3284,7 @@ namespace rpcn return std::optional(*it); } - std::optional> rpcn_client::get_friend_presence_by_npid(const std::string& npid) + std::optional> rpcn_client::get_friend_presence_by_npid(const std::string& npid) const { std::lock_guard lock(mutex_friends); const auto it = friend_infos.friends.find(npid); diff --git a/rpcs3/Emu/NP/rpcn_client.h b/rpcs3/Emu/NP/rpcn_client.h index 56ba17d04d..daa60d90e7 100644 --- a/rpcs3/Emu/NP/rpcn_client.h +++ b/rpcs3/Emu/NP/rpcn_client.h @@ -79,6 +79,14 @@ public: res.push_back(vec[i]); i++; } + + // Make sure we hit terminating 0 + if (i >= vec.size()) + { + error = true; + return {}; + } + i++; if (!empty && res.empty()) @@ -234,7 +242,7 @@ namespace rpcn std::mutex mutex_packets_to_send; // Friends related - shared_mutex mutex_friends; + mutable shared_mutex mutex_friends; std::set> friend_cbs; friend_data friend_infos; @@ -296,11 +304,11 @@ namespace rpcn std::optional add_friend(const std::string& friend_username); bool remove_friend(const std::string& friend_username); - u32 get_num_friends(); - u32 get_num_blocks(); - std::optional get_friend_by_index(u32 index); - std::optional> get_friend_presence_by_index(u32 index); - std::optional> get_friend_presence_by_npid(const std::string& npid); + u32 get_num_friends() const; + u32 get_num_blocks() const; + std::optional get_friend_by_index(u32 index) const; + std::optional> get_friend_presence_by_index(u32 index) const; + std::optional> get_friend_presence_by_npid(const std::string& npid) const; std::vector>> get_notifications(); std::map>> get_replies(); @@ -308,7 +316,7 @@ namespace rpcn std::map get_presence_states(); std::vector get_new_messages(); - std::optional>> get_message(u64 id); + std::optional>> get_message(u64 id) const; std::vector>>> get_messages_and_register_cb(SceNpBasicMessageMainType type, bool include_bootable, message_cb_func cb_func, void* cb_param); void remove_message_cb(message_cb_func cb_func, void* cb_param); void mark_message_used(u64 id); @@ -437,7 +445,7 @@ namespace rpcn return (void_cb_func < void_other_cb_func) || ((!(void_other_cb_func < void_cb_func)) && (cb_param < other.cb_param)); } }; - shared_mutex mutex_messages; + mutable shared_mutex mutex_messages; std::set message_cbs; std::unordered_map>> messages; // msg id / (sender / message) std::set active_messages; // msg id of messages that have not been discarded diff --git a/rpcs3/Emu/NP/rpcn_config.cpp b/rpcs3/Emu/NP/rpcn_config.cpp index 0b5cabe768..6665adbf7d 100644 --- a/rpcs3/Emu/NP/rpcn_config.cpp +++ b/rpcs3/Emu/NP/rpcn_config.cpp @@ -10,8 +10,7 @@ void cfg_rpcn::load() { const std::string path = cfg_rpcn::get_path(); - fs::file cfg_file(path, fs::read); - if (cfg_file) + if (fs::file cfg_file(path, fs::read); cfg_file) { rpcn_log.notice("Loading RPCN config. Path: %s", path); from_string(cfg_file.to_string()); diff --git a/rpcs3/Emu/NP/rpcn_config.h b/rpcs3/Emu/NP/rpcn_config.h index 3ea6b707b1..113d83e408 100644 --- a/rpcs3/Emu/NP/rpcn_config.h +++ b/rpcs3/Emu/NP/rpcn_config.h @@ -4,7 +4,7 @@ struct cfg_rpcn : cfg::node { - cfg::uint32 version{this, "Version", 1}; + cfg::uint32 version{this, "Version", 2}; cfg::string host{this, "Host", "np.rpcs3.net"}; cfg::string npid{this, "NPID", ""}; cfg::string password{this, "Password", ""}; diff --git a/rpcs3/Emu/NP/signaling_handler.cpp b/rpcs3/Emu/NP/signaling_handler.cpp index 2e4ac5df56..a4c59763cb 100644 --- a/rpcs3/Emu/NP/signaling_handler.cpp +++ b/rpcs3/Emu/NP/signaling_handler.cpp @@ -256,9 +256,7 @@ void signaling_handler::process_incoming_messages() addr.s_addr = op_addr; char ip_str[16]; inet_ntop(AF_INET, &addr, ip_str, sizeof(ip_str)); - std::string_view npid(sp->npid.handle.data); - - sign_log.trace("SP %s from %s:%d(npid: %s)", sp->command, ip_str, op_port, npid); + sign_log.trace("SP %s from %s:%d(npid: %s)", sp->command, ip_str, op_port, np::npid_to_string(sp->npid)); } bool reply = false, schedule_repeat = false; @@ -426,9 +424,10 @@ void signaling_handler::operator()() if (sig.sig_info->time_last_msg_recvd < now - 60s && cmd != signal_info) { // We had no connection to opponent for 60 seconds, consider the connection dead + auto retire_info = sig.sig_info; sign_log.notice("Timeout disconnection"); - update_si_status(sig.sig_info, SCE_NP_SIGNALING_CONN_STATUS_INACTIVE, SCE_NP_SIGNALING_ERROR_TIMEOUT); - retire_packet(sig.sig_info, signal_ping); // Retire ping packet if necessary + update_si_status(retire_info, SCE_NP_SIGNALING_CONN_STATUS_INACTIVE, SCE_NP_SIGNALING_ERROR_TIMEOUT); + retire_packet(retire_info, signal_ping); // Retire ping packet if necessary break; // qpackets has been emptied of all packets for this user so we're requeuing } @@ -674,9 +673,7 @@ std::shared_ptr signaling_handler::get_signaling_ptr(const signa { u32 conn_id; - char npid_buf[17]{}; - memcpy(npid_buf, sp->npid.handle.data, 16); - std::string npid(npid_buf); + std::string npid = np::npid_to_string(sp->npid); if (!npid_to_conn_id.contains(npid)) return nullptr; @@ -777,6 +774,7 @@ void signaling_handler::send_information_packets(u32 addr, u16 port, const SceNp auto& sent_packet = sig_packet; sent_packet.command = signal_info; + retire_packet(si, signal_info); send_signaling_packet(sent_packet, addr, port); queue_signaling_packet(sent_packet, si, steady_clock::now() + REPEAT_INFO_DELAY); wake_up(); @@ -784,7 +782,7 @@ void signaling_handler::send_information_packets(u32 addr, u16 port, const SceNp u32 signaling_handler::get_always_conn_id(const SceNpId& npid) { - std::string npid_str(reinterpret_cast(npid.handle.data)); + std::string npid_str = np::npid_to_string(npid); if (npid_to_conn_id.contains(npid_str)) return ::at32(npid_to_conn_id, npid_str); @@ -810,9 +808,8 @@ u32 signaling_handler::init_sig1(const SceNpId& npid) sig_peers[conn_id]->conn_status = SCE_NP_SIGNALING_CONN_STATUS_PENDING; // Request peer infos from RPCN - std::string npid_str(reinterpret_cast(npid.handle.data)); auto& nph = g_fxo->get>(); - nph.req_sign_infos(npid_str, conn_id); + nph.req_sign_infos(np::npid_to_string(npid), conn_id); } return conn_id; @@ -835,18 +832,18 @@ u32 signaling_handler::init_sig2(const SceNpId& npid, u64 room_id, u16 member_id return conn_id; } -std::optional signaling_handler::get_conn_id_from_npid(const SceNpId& npid) +std::optional signaling_handler::get_conn_id_from_npid(const SceNpId& npid) const { std::lock_guard lock(data_mutex); - std::string npid_str(reinterpret_cast(npid.handle.data)); + std::string npid_str = np::npid_to_string(npid); if (npid_to_conn_id.contains(npid_str)) return ::at32(npid_to_conn_id, npid_str); return std::nullopt; } -std::optional signaling_handler::get_sig_infos(u32 conn_id) +std::optional signaling_handler::get_sig_infos(u32 conn_id) const { std::lock_guard lock(data_mutex); if (sig_peers.contains(conn_id)) @@ -855,7 +852,7 @@ std::optional signaling_handler::get_sig_infos(u32 conn_id) return std::nullopt; } -std::optional signaling_handler::get_conn_id_from_addr(u32 addr, u16 port) +std::optional signaling_handler::get_conn_id_from_addr(u32 addr, u16 port) const { std::lock_guard lock(data_mutex); diff --git a/rpcs3/Emu/NP/signaling_handler.h b/rpcs3/Emu/NP/signaling_handler.h index b4ec8229b8..c97c049d36 100644 --- a/rpcs3/Emu/NP/signaling_handler.h +++ b/rpcs3/Emu/NP/signaling_handler.h @@ -63,9 +63,9 @@ public: u32 init_sig1(const SceNpId& npid); u32 init_sig2(const SceNpId& npid, u64 room_id, u16 member_id); - std::optional get_sig_infos(u32 conn_id); - std::optional get_conn_id_from_npid(const SceNpId& npid); - std::optional get_conn_id_from_addr(u32 addr, u16 port); + std::optional get_sig_infos(u32 conn_id) const; + std::optional get_conn_id_from_npid(const SceNpId& npid) const; + std::optional get_conn_id_from_addr(u32 addr, u16 port) const; void add_sig_ctx(u32 ctx_id); void remove_sig_ctx(u32 ctx_id); @@ -128,12 +128,12 @@ private: void retire_all_packets(std::shared_ptr& si); void stop_sig_nl(u32 conn_id, bool forceful); - shared_mutex data_mutex; + mutable shared_mutex data_mutex; atomic_t wakey = 0; signaling_packet sig_packet{}; - std::map qpackets; // (wakeup time, packet) + std::multimap qpackets; // (wakeup time, packet) u32 cur_conn_id = 1; std::unordered_map npid_to_conn_id; // (npid, conn_id) diff --git a/rpcs3/Emu/NP/upnp_handler.cpp b/rpcs3/Emu/NP/upnp_handler.cpp index dca00de986..c19e30d441 100644 --- a/rpcs3/Emu/NP/upnp_handler.cpp +++ b/rpcs3/Emu/NP/upnp_handler.cpp @@ -89,12 +89,10 @@ void upnp_handler::upnp_enable() if (desc_xml) { - IGDdatas igd_data{}; - UPNPUrls igd_urls{}; - parserootdesc(desc_xml, desc_xml_size, &igd_data); + parserootdesc(desc_xml, desc_xml_size, &m_igd_data); free(desc_xml); desc_xml = nullptr; - GetUPNPUrls(&igd_urls, &igd_data, dev->descURL, 1); + GetUPNPUrls(&m_igd_urls, &m_igd_data, dev->descURL, 1); upnp_log.notice("Found UPnP device type:%s at %s", dev->st, dev->descURL); @@ -116,24 +114,28 @@ void upnp_handler::upnp_enable() freeUPNPDevlist(devlist); } -void upnp_handler::add_port_redir(std::string_view addr, u16 internal_port, std::string_view protocol) +void upnp_handler::add_port_redir(const std::string& addr, u16 internal_port, std::string_view protocol) { if (!m_active) return; std::lock_guard lock(m_mutex); - u16 external_port = internal_port; - std::string internal_port_str = fmt::format("%d", internal_port); + if (m_bindings[std::string(protocol)].contains(internal_port)) + return; + + const std::string internal_port_str = fmt::format("%d", internal_port); + const std::string protocol_str(protocol); + const u32 max_port = std::min(static_cast(internal_port) + 100, 0xFFFFu); int res = 0; - for (u16 external_port = internal_port; external_port < internal_port + 100; external_port++) + for (u32 external_port = internal_port; external_port <= max_port; external_port++) { std::string external_port_str = fmt::format("%d", external_port); - res = UPNP_AddPortMapping(m_igd_urls.controlURL, m_igd_data.first.servicetype, external_port_str.c_str(), internal_port_str.c_str(), addr.data(), "RPCS3", protocol.data(), nullptr, nullptr); + res = UPNP_AddPortMapping(m_igd_urls.controlURL, m_igd_data.first.servicetype, external_port_str.c_str(), internal_port_str.c_str(), addr.c_str(), "RPCS3", protocol_str.c_str(), nullptr, nullptr); if (res == UPNPCOMMAND_SUCCESS) { - m_bindings[std::string(protocol)][internal_port] = external_port; + m_bindings[protocol_str][static_cast(internal_port)] = external_port; upnp_log.notice("Successfully bound %s:%d(%s) to IGD:%d", addr, internal_port, protocol, external_port); return; } @@ -146,7 +148,7 @@ void upnp_handler::add_port_redir(std::string_view addr, u16 internal_port, std: // } } - upnp_log.error("Failed to bind %s:%d(%s) to IGD:(%d=>%d): %d", addr, internal_port, protocol, internal_port, external_port, res); + upnp_log.error("Failed to bind %s:%d(%s) to IGD:(%d=>%d): %d", addr, internal_port, protocol, internal_port, internal_port, res); } void upnp_handler::remove_port_redir(u16 internal_port, std::string_view protocol) @@ -156,27 +158,28 @@ void upnp_handler::remove_port_redir(u16 internal_port, std::string_view protoco std::lock_guard lock(m_mutex); - const std::string str_protocol(protocol); + const std::string protocol_str(protocol); - if (!m_bindings.contains(str_protocol) || !::at32(m_bindings, str_protocol).contains(internal_port)) + if (!m_bindings.contains(protocol_str) || !::at32(m_bindings, protocol_str).contains(internal_port)) { upnp_log.error("tried to unbind port mapping %d to IGD(%s) but it isn't bound", internal_port, protocol); return; } - const u16 external_port = ::at32(::at32(m_bindings, str_protocol), internal_port); + const u16 external_port = ::at32(::at32(m_bindings, protocol_str), internal_port); remove_port_redir_external(external_port, protocol); - ensure(::at32(m_bindings, str_protocol).erase(internal_port)); + ensure(::at32(m_bindings, protocol_str).erase(internal_port)); upnp_log.notice("Successfully deleted port mapping %d to IGD:%d(%s)", internal_port, external_port, protocol); } void upnp_handler::remove_port_redir_external(u16 external_port, std::string_view protocol, bool verbose) { const std::string str_ext_port = fmt::format("%d", external_port); + const std::string protocol_str(protocol); - if (int res = UPNP_DeletePortMapping(m_igd_urls.controlURL, m_igd_data.first.servicetype, str_ext_port.c_str(), protocol.data(), nullptr); res != 0 && verbose) + if (int res = UPNP_DeletePortMapping(m_igd_urls.controlURL, m_igd_data.first.servicetype, str_ext_port.c_str(), protocol_str.c_str(), nullptr); res != 0 && verbose) upnp_log.error("Failed to delete port mapping IGD:%s(%s): %d", str_ext_port, protocol, res); } diff --git a/rpcs3/Emu/NP/upnp_handler.h b/rpcs3/Emu/NP/upnp_handler.h index 40892f7443..1486702cbb 100644 --- a/rpcs3/Emu/NP/upnp_handler.h +++ b/rpcs3/Emu/NP/upnp_handler.h @@ -13,7 +13,7 @@ public: ~upnp_handler(); void upnp_enable(); - void add_port_redir(std::string_view addr, u16 internal_port, std::string_view protocol); + void add_port_redir(const std::string& addr, u16 internal_port, std::string_view protocol); void remove_port_redir(u16 internal_port, std::string_view protocol); bool is_active() const; diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 96f87111ff..43068bf723 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -7,6 +7,12 @@ #include "util/asm.hpp" +// Unaligned u128 alias +union x128 +{ + u8 _u8[16]; +}; + namespace utils { template @@ -244,7 +250,7 @@ struct copy_unmodified_block_swizzled } const u32 size_in_block = padded_width * padded_height * depth * 2; - rsx::simple_array tmp(size_in_block * words_per_block); + rsx::simple_array tmp(size_in_block * words_per_block); if (words_per_block == 1) [[likely]] { @@ -520,14 +526,14 @@ struct copy_decoded_bc1_block struct copy_decoded_bc2_block { - static void copy_mipmap_level(std::span dst, std::span src, u16 width_in_block, u32 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) + static void copy_mipmap_level(std::span dst, std::span src, u16 width_in_block, u32 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) { u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4; for (u32 row = 0; row < row_count * depth; row++) { for (u32 col = 0; col < width_in_block; col++) { - const u8* compressedBlock = reinterpret_cast(&src[src_offset + col]); + const u8* compressedBlock = src[src_offset + col]._u8; u8* decompressedBlock = reinterpret_cast(&dst[dst_offset + col * 4]); bcdec_bc2(compressedBlock, decompressedBlock, destinationPitch); } @@ -540,14 +546,14 @@ struct copy_decoded_bc2_block struct copy_decoded_bc3_block { - static void copy_mipmap_level(std::span dst, std::span src, u16 width_in_block, u32 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) + static void copy_mipmap_level(std::span dst, std::span src, u16 width_in_block, u32 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) { u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4; for (u32 row = 0; row < row_count * depth; row++) { for (u32 col = 0; col < width_in_block; col++) { - const u8* compressedBlock = reinterpret_cast(&src[src_offset + col]); + const u8* compressedBlock = src[src_offset + col]._u8; u8* decompressedBlock = reinterpret_cast(&dst[dst_offset + col * 4]); bcdec_bc3(compressedBlock, decompressedBlock, destinationPitch); } @@ -847,6 +853,17 @@ namespace rsx } } + bool texture_format_ex::hw_SNORM_possible() const + { + return (texel_remap_control & SEXT_MASK) == (get_host_format_snorm_mask(format()) << SEXT_OFFSET); + } + + bool texture_format_ex::hw_SRGB_possible() const + { + return encoded_remap == RSX_TEXTURE_REMAP_IDENTITY && + (texel_remap_control & GAMMA_CTRL_MASK) == GAMMA_RGB_MASK; + } + std::vector get_subresources_layout(const rsx::fragment_texture& texture) { return get_subresources_layout_impl(texture); @@ -1028,22 +1045,25 @@ namespace rsx // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else if (is_3d && !is_po2 && caps.supports_vtc_decoding) + + if (is_3d && !is_po2 && caps.supports_vtc_decoding) { // In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one. // We need to compress the 2D-planar DXT input into a VTC output copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else if (caps.supports_zero_copy) + + if (caps.supports_zero_copy) { result.require_upload = true; result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), 8, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else - { - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); - } + + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); break; } @@ -1051,7 +1071,7 @@ namespace rsx { if (!caps.supports_dxt) { - copy_decoded_bc2_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_decoded_bc2_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); break; } [[fallthrough]]; @@ -1060,7 +1080,7 @@ namespace rsx { if (!caps.supports_dxt) { - copy_decoded_bc3_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_decoded_bc3_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); break; } @@ -1072,23 +1092,44 @@ namespace rsx // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + if (src_layout.data.is_naturally_aligned()) + { + copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; + } + + copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else if (is_3d && !is_po2 && caps.supports_vtc_decoding) + + if (is_3d && !is_po2 && caps.supports_vtc_decoding) { // In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one. // We need to compress the 2D-planar DXT input into a VTC output - copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + if (src_layout.data.is_naturally_aligned()) + { + copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; + } + + copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else if (caps.supports_zero_copy) + + if (caps.supports_zero_copy) { result.require_upload = true; result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), 16, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else + + if (src_layout.data.is_naturally_aligned()) { copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } + + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); break; } @@ -1200,26 +1241,84 @@ namespace rsx fmt::throw_exception("Unknown format 0x%x", texture_format); } - bool is_int8_remapped_format(u32 format) + rsx::flags32_t get_format_features(u32 texture_format) { - switch (format) + switch (texture_format) { + case CELL_GCM_TEXTURE_B8: + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_D8R8G8B8: + // Base texture formats - everything is supported + return RSX_FORMAT_FEATURE_SIGNED_COMPONENTS | RSX_FORMAT_FEATURE_GAMMA_CORRECTION | RSX_FORMAT_FEATURE_BIASED_NORMALIZATION; + case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + // Depth textures will hang the hardware if BX2 or GAMMA is active. ARGB8_SIGNED has no impact. + // UNSIGNED_REMAP=BIASED works on all formats including the float variants. + return RSX_FORMAT_FEATURE_BIASED_NORMALIZATION; + case CELL_GCM_TEXTURE_X16: + // X16 - GAMMA causes hangs. ARGB8_SIGNED is ignored. UNSIGNED_REMAP=BIASED works. + return RSX_FORMAT_FEATURE_BIASED_NORMALIZATION | RSX_FORMAT_FEATURE_16BIT_CHANNELS; case CELL_GCM_TEXTURE_Y16_X16: + // X16 | Y16 - GAMMA causes hangs. ARGB8_SIGNED works. UNSIGNED_REMAP=BIASED also works. + return RSX_FORMAT_FEATURE_SIGNED_COMPONENTS | RSX_FORMAT_FEATURE_BIASED_NORMALIZATION | RSX_FORMAT_FEATURE_16BIT_CHANNELS; + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + // GAMMA causes GPU hangs. ARGB8_SIGNED is ignored. UNSIGNED_REMAP=BIASED works. + return RSX_FORMAT_FEATURE_BIASED_NORMALIZATION; + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + // GAMMA causes hangs. Other flags ignored. + return 0; + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: case CELL_GCM_TEXTURE_X32_FLOAT: case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - // NOTE: Special data formats (XY, HILO, DEPTH) are not RGB formats - return false; + // Floating point textures. Nothing works. + return 0; + } + fmt::throw_exception("Unknown format 0x%x", texture_format); + } + + /** + * Returns a channel mask in ARGB that can be SNORM-converted + * Some formats have a hardcoded constant in one lane which we cannot SNORM-interpret in hardware. + */ + u32 get_host_format_snorm_mask(u32 format) + { + switch (format) + { + case CELL_GCM_TEXTURE_B8: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_D8R8G8B8: + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + // Hardcoded alpha formats + return 0b1110; + + case CELL_GCM_TEXTURE_X16: + // This one is a mess. X and Z are hardcoded. Not supported. + // Fall through instead of throw default: - return true; + return 0b1111; } } diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index cc40305721..31f43432dc 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -9,6 +9,8 @@ namespace rsx { + using flags32_t = u32; + enum texture_upload_context : u32 { shader_read = 1, @@ -125,6 +127,56 @@ namespace rsx using namespace format_class_; + enum format_features : u8 + { + RSX_FORMAT_FEATURE_SIGNED_COMPONENTS = (1 << 0), + RSX_FORMAT_FEATURE_BIASED_NORMALIZATION = (1 << 1), + RSX_FORMAT_FEATURE_GAMMA_CORRECTION = (1 << 2), + RSX_FORMAT_FEATURE_16BIT_CHANNELS = (1 << 3), // Complements RSX_FORMAT_FEATURE_SIGNED_COMPONENTS + }; + + enum host_format_features : u8 + { + RSX_HOST_FORMAT_FEATURE_SNORM = (1 << 0), + RSX_HOST_FORMAT_FEATURE_SRGB = (1 << 1), + }; + + using enum format_features; + + struct texture_format_ex + { + texture_format_ex() = default; + texture_format_ex(u32 bits) + : format_bits(bits) + {} + + bool valid() const { return format_bits != 0; } + u32 format() const { return format_bits & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); } + + bool hw_SNORM_possible() const; + bool hw_SRGB_possible() const; + + bool host_snorm_format_active() const { return host_features & RSX_HOST_FORMAT_FEATURE_SNORM; } + bool host_srgb_format_active() const { return host_features & RSX_HOST_FORMAT_FEATURE_SRGB; } + + operator bool() const { return valid(); } + + bool operator == (const texture_format_ex& that) const + { + return this->format_bits == that.format_bits && + this->features == that.features && + this->host_features == that.host_features && + this->encoded_remap == that.encoded_remap; + } + + //private: + u32 format_bits = 0; + u32 features = 0; + u32 encoded_remap = 0; + u32 texel_remap_control = 0; + u32 host_features = 0; + }; + // Sampled image descriptor class sampled_image_descriptor_base { @@ -167,6 +219,7 @@ namespace rsx u64 surface_cache_tag = 0; texcoord_xform_t texcoord_xform; + texture_format_ex format_ex; }; struct typeless_xfer @@ -257,7 +310,18 @@ namespace rsx u8 get_format_sample_count(rsx::surface_antialiasing antialias); u32 get_max_depth_value(rsx::surface_depth_format2 format); bool is_depth_stencil_format(rsx::surface_depth_format2 format); - bool is_int8_remapped_format(u32 format); // Returns true if the format is treated as INT8 by the RSX remapper. + + /** + * Format feature support. There is not simple format to determine what is supported here, results are from hw tests + * Returns a bitmask of supported features. + */ + rsx::flags32_t get_format_features(u32 texture_format); + + /** + * Returns a channel mask in ARGB that can be SNORM-converted + * Some formats have a hardcoded constant in one lane which we cannot SNORM-interpret in hardware. + */ + u32 get_host_format_snorm_mask(u32 format); /** * Returns number of texel rows encoded in one pitch-length line of bytes diff --git a/rpcs3/Emu/RSX/Common/aligned_malloc.hpp b/rpcs3/Emu/RSX/Common/aligned_malloc.hpp new file mode 100644 index 0000000000..2ca59c3cf2 --- /dev/null +++ b/rpcs3/Emu/RSX/Common/aligned_malloc.hpp @@ -0,0 +1,88 @@ +#pragma once + +#include + +namespace rsx +{ + namespace aligned_allocator + { + template + requires (Align != 0) && ((Align& (Align - 1)) == 0) + size_t align_up(size_t size) + { + return (size + (Align - 1)) & ~(Align - 1); + } + + template + requires (Align != 0) && ((Align& (Align - 1)) == 0) + void* malloc(size_t size) + { +#if defined(_WIN32) + return _aligned_malloc(size, Align); +#elif defined(__APPLE__) + constexpr size_t NativeAlign = std::max(Align, sizeof(void*)); + return std::aligned_alloc(NativeAlign, align_up(size)); +#else + return std::aligned_alloc(Align, align_up(size)); +#endif + } + + template + requires (Align != 0) && ((Align& (Align - 1)) == 0) + void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size) + { + if (align_up(prev_size) >= new_size) + { + return prev_ptr; + } + + ensure(reinterpret_cast(prev_ptr) % Align == 0, "Pointer not aligned to Align"); +#if defined(_WIN32) + return _aligned_realloc(prev_ptr, new_size, Align); +#else +#if defined(__APPLE__) + constexpr size_t NativeAlign = std::max(Align, sizeof(void*)); + void* ret = std::aligned_alloc(NativeAlign, align_up(new_size)); +#else + void* ret = std::aligned_alloc(Align, align_up(new_size)); +#endif + std::memcpy(ret, prev_ptr, std::min(prev_size, new_size)); + std::free(prev_ptr); + return ret; +#endif + } + + static inline void free(void* ptr) + { +#ifdef _WIN32 + _aligned_free(ptr); +#else + std::free(ptr); +#endif + } + } + + template + class aligned_pointer_t + { + public: + aligned_pointer_t(size_t size) + { + m_ptr = aligned_allocator::malloc(size); + } + + virtual ~aligned_pointer_t() + { + aligned_allocator::free(m_ptr); + } + + T* data() const { return m_ptr; } + + T& operator * () const { return *m_ptr; } + + T* operator -> () const { return m_ptr; } + + private: + T* m_ptr; + }; +} diff --git a/rpcs3/Emu/RSX/Common/io_buffer.h b/rpcs3/Emu/RSX/Common/io_buffer.h index 64f95a5e61..edca80675b 100644 --- a/rpcs3/Emu/RSX/Common/io_buffer.h +++ b/rpcs3/Emu/RSX/Common/io_buffer.h @@ -9,7 +9,7 @@ namespace rsx template concept SpanLike = requires(T t) { - { t.data() } -> std::convertible_to; + { t.data() } -> std::convertible_to; { t.size_bytes() } -> std::convertible_to; }; @@ -71,19 +71,27 @@ namespace rsx return static_cast(m_ptr); } - usz size() const + template + T size() const { - return m_size; + return static_cast(m_size); } template std::span as_span() const { auto bytes = data(); - ensure((reinterpret_cast(bytes) & (sizeof(T) - 1)) == 0, "IO buffer span cast requires naturally aligned pointers."); + ensure(is_naturally_aligned(), "IO buffer span cast requires naturally aligned pointers."); return { utils::bless(bytes), m_size / sizeof(T) }; } + template + bool is_naturally_aligned() const + { + return ((reinterpret_cast(data()) & (alignof(T) - 1)) == 0) && + (m_size % sizeof(T)) == 0; + } + bool empty() const { return m_size == 0; diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index 6852e670fb..a37df9dd54 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -3,70 +3,12 @@ #include #include #include -#include +#include "aligned_malloc.hpp" #include "reverse_ptr.hpp" namespace rsx { - namespace aligned_allocator - { - template - requires (Align != 0) && ((Align & (Align - 1)) == 0) - size_t align_up(size_t size) - { - return (size + (Align - 1)) & ~(Align - 1); - } - - template - requires (Align != 0) && ((Align & (Align - 1)) == 0) - void* malloc(size_t size) - { -#if defined(_WIN32) - return _aligned_malloc(size, Align); -#elif defined(__APPLE__) - constexpr size_t NativeAlign = std::max(Align, sizeof(void*)); - return std::aligned_alloc(NativeAlign, align_up(size)); -#else - return std::aligned_alloc(Align, align_up(size)); -#endif - } - - template - requires (Align != 0) && ((Align & (Align - 1)) == 0) - void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size) - { - if (align_up(prev_size) >= new_size) - { - return prev_ptr; - } - - ensure(reinterpret_cast(prev_ptr) % Align == 0, "Pointer not aligned to Align"); -#if defined(_WIN32) - return _aligned_realloc(prev_ptr, new_size, Align); -#else -#if defined(__APPLE__) - constexpr size_t NativeAlign = std::max(Align, sizeof(void*)); - void* ret = std::aligned_alloc(NativeAlign, align_up(new_size)); -#else - void* ret = std::aligned_alloc(Align, align_up(new_size)); -#endif - std::memcpy(ret, prev_ptr, std::min(prev_size, new_size)); - std::free(prev_ptr); - return ret; -#endif - } - - static inline void free(void* ptr) - { -#ifdef _WIN32 - _aligned_free(ptr); -#else - std::free(ptr); -#endif - } - } - template concept span_like = requires(C& c) { diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 4476930607..4b3aaa0605 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -132,7 +132,7 @@ namespace rsx free_rsx_memory(Traits::get(sink)); } - Traits::clone_surface(cmd, sink, region.source, new_address, region); + Traits::clone_surface(cmd, sink, region.source, new_address, region, region.source->resolution_scaling_config); allocate_rsx_memory(Traits::get(sink)); if (invalidated) [[unlikely]] @@ -398,6 +398,7 @@ namespace rsx surface_antialiasing antialias, usz width, usz height, usz pitch, u8 bpp, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { surface_storage_type old_surface_storage; @@ -448,7 +449,7 @@ namespace rsx } } - if (Traits::surface_matches_properties(surface, format, width, height, antialias)) + if (Traits::surface_matches_properties(surface, format, width, height, antialias, scaling_config)) { if (!pitch_compatible) { @@ -495,7 +496,7 @@ namespace rsx for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) { auto &surface = *It; - if (Traits::surface_matches_properties(surface, format, width, height, antialias, true)) + if (Traits::surface_matches_properties(surface, format, width, height, antialias, scaling_config, true)) { new_surface_storage = std::move(surface); Traits::notify_surface_reused(new_surface_storage); @@ -531,7 +532,7 @@ namespace rsx if (!new_surface) { ensure(store); - new_surface_storage = Traits::create_new_surface(address, format, width, height, pitch, antialias, std::forward(extra_params)...); + new_surface_storage = Traits::create_new_surface(address, format, width, height, pitch, antialias, scaling_config, std::forward(extra_params)...); new_surface = Traits::get(new_surface_storage); Traits::prepare_surface_for_drawing(command_list, new_surface); allocate_rsx_memory(new_surface); @@ -842,11 +843,13 @@ namespace rsx surface_color_format color_format, surface_antialiasing antialias, usz width, usz height, usz pitch, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { return bind_surface_address( command_list, address, color_format, antialias, width, height, pitch, get_format_block_size_in_bytes(color_format), + scaling_config, std::forward(extra_params)...); } @@ -857,12 +860,14 @@ namespace rsx surface_depth_format2 depth_format, surface_antialiasing antialias, usz width, usz height, usz pitch, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { return bind_surface_address( command_list, address, depth_format, antialias, width, height, pitch, get_format_block_size_in_bytes(depth_format), + scaling_config, std::forward(extra_params)...); } @@ -969,6 +974,7 @@ namespace rsx surface_raster_type raster_type, const std::array &surface_addresses, u32 address_z, const std::array &surface_pitch, u32 zeta_pitch, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { u32 clip_width = clip_horizontal_reg; @@ -998,7 +1004,7 @@ namespace rsx m_bound_render_targets[surface_index] = std::make_pair(surface_addresses[surface_index], bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias, - clip_width, clip_height, surface_pitch[surface_index], std::forward(extra_params)...)); + clip_width, clip_height, surface_pitch[surface_index], scaling_config, std::forward(extra_params)...)); m_bound_render_target_ids.push_back(surface_index); } @@ -1014,7 +1020,7 @@ namespace rsx { m_bound_depth_stencil = std::make_pair(address_z, bind_address_as_depth_stencil(command_list, address_z, depth_format, antialias, - clip_width, clip_height, zeta_pitch, std::forward(extra_params)...)); + clip_width, clip_height, zeta_pitch, scaling_config, std::forward(extra_params)...)); } else { @@ -1463,5 +1469,113 @@ namespace rsx } } } + + void sync_scaling_config(command_list_type cmd, const rsx::surface_scaling_config_t& active_config) + { + auto process_list_function = [&](surface_ranged_map& data, const utils::address_range32& range) + { + std::vector surfaces_to_clone; + + for (auto It = data.begin_range(range); It != data.end();) + { + auto surface = Traits::get(It->second); + if (surface->get_resolution_scaling_config() == active_config) + { + ++It; + continue; + } + + // Perform a test scaling and check if anything is different after scaling + // There are many cases where this will avoid creating new surfaces + const auto [new_w, new_h] = rsx::apply_resolution_scale( + active_config, + surface->template get_surface_width<>(), + surface->template get_surface_height<>()); + + if (new_w == surface->width() && new_h == surface->height()) + { + // Not affected by resolution scale. Just update the details and move on. + surface->resolution_scaling_config = active_config; + ++It; + continue; + } + + surfaces_to_clone.push_back(surface); + + // Invalidate the previous surface + invalidate(It->second); + It = data.erase(It); + } + + for (auto& surface : surfaces_to_clone) + { + // Enqueue the memory transfer + surface_storage_type sink{}; + deferred_clipped_region copy{}; + copy.width = surface->template get_surface_width<>(); + copy.height = surface->template get_surface_height<>(); + copy.transfer_scale_x = 1.f; + copy.transfer_scale_y = 1.f; + copy.target = nullptr; + copy.source = surface; + + Traits::clone_surface(cmd, sink, surface, surface->base_addr, copy, active_config); + allocate_rsx_memory(Traits::get(sink)); + + // Replace with the new one + auto new_surface = Traits::get(sink); + ensure(copy.target == new_surface); + data.emplace(surface->get_memory_range(), std::move(sink)); + + // Force barrier to reduce VRAM pressure + new_surface->memory_barrier(cmd, rsx::surface_access::memory_read); + } + }; + + const auto rtt_bind_backup = m_bound_render_targets; + const auto dsv_bind_backup = m_bound_depth_stencil; + + // Unbind everything. We'll restore it later + for (auto& rtt_bind : m_bound_render_targets) + { + rtt_bind = {}; + } + + m_bound_depth_stencil = {}; + + process_list_function(m_render_targets_storage, m_render_targets_memory_range); + process_list_function(m_depth_stencil_storage, m_depth_stencil_memory_range); + + // Restore bindings. + for (int i = 0; i < 4; ++i) + { + const auto address = rtt_bind_backup[i].first; + if (!address) + { + continue; + } + + auto rtt = m_render_targets_storage.find(address); + ensure(rtt != m_render_targets_storage.end()); + + m_bound_render_targets[i] = + { + address, + Traits::get(rtt->second) + }; + } + + if (const auto ds_address = dsv_bind_backup.first) + { + auto ds = m_depth_stencil_storage.find(ds_address); + ensure(ds != m_depth_stencil_storage.end()); + + m_bound_depth_stencil = + { + ds_address, + Traits::get(ds->second) + }; + } + } }; } diff --git a/rpcs3/Emu/RSX/Common/surface_utils.h b/rpcs3/Emu/RSX/Common/surface_utils.h index bf7dee2db3..8ec97600bc 100644 --- a/rpcs3/Emu/RSX/Common/surface_utils.h +++ b/rpcs3/Emu/RSX/Common/surface_utils.h @@ -88,18 +88,18 @@ namespace rsx auto dst_h = std::get<3>(region); // Apply resolution scale if needed - if (g_cfg.video.resolution_scale_percent != 100) - { - auto src = static_cast(source); + auto src = static_cast(source); + std::tie(src_w, src_h) = rsx::apply_resolution_scale( + src->resolution_scaling_config, + src_w, src_h, + src->template get_surface_width(), + src->template get_surface_height()); - std::tie(src_w, src_h) = rsx::apply_resolution_scale(src_w, src_h, - src->template get_surface_width(), - src->template get_surface_height()); - - std::tie(dst_w, dst_h) = rsx::apply_resolution_scale(dst_w, dst_h, - target_surface->template get_surface_width(), - target_surface->template get_surface_height()); - } + std::tie(dst_w, dst_h) = rsx::apply_resolution_scale( + target_surface->resolution_scaling_config, + dst_w, dst_h, + target_surface->template get_surface_width(), + target_surface->template get_surface_height()); width = src_w; height = src_h; @@ -146,6 +146,9 @@ namespace rsx u8 samples_x = 1; u8 samples_y = 1; + // Scaling configuration + surface_scaling_config_t resolution_scaling_config; + rsx::address_range32 memory_range; std::unique_ptr> resolve_surface; @@ -303,6 +306,11 @@ namespace rsx format_info.gcm_depth_format = format; } + void set_resolution_scaling_config(const surface_scaling_config_t& config) + { + resolution_scaling_config = config; + } + inline rsx::surface_color_format get_surface_color_format() const { return format_info.gcm_color_format; @@ -323,6 +331,11 @@ namespace rsx ); } + inline const rsx::surface_scaling_config_t& get_resolution_scaling_config() const + { + return resolution_scaling_config; + } + inline bool dirty() const { return (state_flags != rsx::surface_state_flags::ready) || !old_contents.empty(); @@ -541,10 +554,16 @@ namespace rsx } // Apply resolution scale if needed - if (g_cfg.video.resolution_scale_percent != 100) + if (resolution_scaling_config.scale_percent != 100 || + region.source->resolution_scaling_config.scale_percent != 100) { - auto [src_width, src_height] = rsx::apply_resolution_scale(slice.width, slice.height, slice.source->width(), slice.source->height()); - auto [dst_width, dst_height] = rsx::apply_resolution_scale(slice.width, slice.height, slice.target->width(), slice.target->height()); + const auto& src_res_scale = region.source->resolution_scaling_config; + const auto& dst_res_scale = resolution_scaling_config; + const auto src_surface = ensure(dynamic_cast(slice.source)); + const auto dst_surface = ensure(dynamic_cast(slice.target)); + + auto [src_width, src_height] = rsx::apply_resolution_scale(src_res_scale, slice.width, slice.height, src_surface->get_surface_width(), src_surface->get_surface_height()); + auto [dst_width, dst_height] = rsx::apply_resolution_scale(dst_res_scale, slice.width, slice.height, dst_surface->get_surface_width(), dst_surface->get_surface_height()); slice.transfer_scale_x *= f32(dst_width) / src_width; slice.transfer_scale_y *= f32(dst_height) / src_height; @@ -552,8 +571,8 @@ namespace rsx slice.width = src_width; slice.height = src_height; - std::tie(slice.src_x, slice.src_y) = rsx::apply_resolution_scale(slice.src_x, slice.src_y, slice.source->width(), slice.source->height()); - std::tie(slice.dst_x, slice.dst_y) = rsx::apply_resolution_scale(slice.dst_x, slice.dst_y, slice.target->width(), slice.target->height()); + std::tie(slice.src_x, slice.src_y) = rsx::apply_resolution_scale(src_res_scale, slice.src_x, slice.src_y, src_surface->get_surface_width(), src_surface->get_surface_height()); + std::tie(slice.dst_x, slice.dst_y) = rsx::apply_resolution_scale(dst_res_scale, slice.dst_x, slice.dst_y, dst_surface->get_surface_width(), dst_surface->get_surface_height()); } } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index dfe8043bd3..295090a5bc 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1731,24 +1731,34 @@ namespace rsx } case deferred_request_command::cubemap_unwrap: { - rsx::simple_array sections(6); - for (u16 n = 0; n < 6; ++n) + rsx::simple_array sections(6 * desc.mipmaps); + for (u16 n = 0, section_id = 0; n < 6; ++n) { - sections[n] = + u16 mip_w = desc.width, mip_h = desc.height; + u16 y_offset = static_cast(desc.slice_h * n); + + for (u8 mip = 0; mip < desc.mipmaps; ++mip) { - .src = desc.external_handle, - .xform = surface_transform::coordinate_transform, - .level = 0, - .src_x = 0, - .src_y = static_cast(desc.slice_h * n), - .dst_x = 0, - .dst_y = 0, - .dst_z = n, - .src_w = desc.width, - .src_h = desc.height, - .dst_w = desc.width, - .dst_h = desc.height - }; + sections[section_id++] = + { + .src = desc.external_handle, + .xform = surface_transform::coordinate_transform, + .level = mip, + .src_x = 0, + .src_y = y_offset, + .dst_x = 0, + .dst_y = 0, + .dst_z = n, + .src_w = mip_w, + .src_h = mip_h, + .dst_w = mip_w, + .dst_h = mip_h + }; + + y_offset += mip_h; + mip_w = std::max(mip_w / 2, 1); + mip_h = std::max(mip_h / 2, 1); + } } result = generate_cubemap_from_images(cmd, desc.gcm_format, desc.width, sections, desc.remap); @@ -2413,9 +2423,13 @@ namespace rsx // 2. The image has to have been generated on the GPU (fbo or blit target only) rsx::simple_array sections; - const bool use_upscaling = (result.upload_context == rsx::texture_upload_context::framebuffer_storage && g_cfg.video.resolution_scale_percent != 100); + const bool use_upscaling = (result.upload_context == rsx::texture_upload_context::framebuffer_storage); + auto to_surface_type = [](const copy_region_descriptor& rgn) -> typename surface_store_type::surface_type + { + return static_cast(rgn.src); + }; - if (!helpers::append_mipmap_level(sections, result, attributes, 0, use_upscaling, attributes)) [[unlikely]] + if (!helpers::append_mipmap_level(to_surface_type, sections, result, attributes, 0, use_upscaling, attributes)) [[unlikely]] { // Abort if mip0 is not compatible return result; @@ -2445,7 +2459,7 @@ namespace rsx options, range, extended_dimension, m_rtts, std::forward(extras)...); if (!ret.validate() || - !helpers::append_mipmap_level(sections, ret, attr2, subsurface, use_upscaling, attributes)) + !helpers::append_mipmap_level(to_surface_type, sections, ret, attr2, subsurface, use_upscaling, attributes)) { // Abort break; @@ -2778,7 +2792,7 @@ namespace rsx surf->template get_surface_height() != surf->height()) { // Must go through a scaling operation due to resolution scaling being present - ensure(g_cfg.video.resolution_scale_percent != 100); + ensure(src_subres.surface->resolution_scaling_config.scale_percent != 100); use_null_region = false; } } @@ -3389,8 +3403,8 @@ namespace rsx { const auto surface_width = src_subres.surface->template get_surface_width(); const auto surface_height = src_subres.surface->template get_surface_height(); - std::tie(src_area.x1, src_area.y1) = rsx::apply_resolution_scale(src_area.x1, src_area.y1, surface_width, surface_height); - std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale(src_area.x2, src_area.y2, surface_width, surface_height); + std::tie(src_area.x1, src_area.y1) = rsx::apply_resolution_scale(src_subres.surface->resolution_scaling_config, src_area.x1, src_area.y1, surface_width, surface_height); + std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale(src_subres.surface->resolution_scaling_config, src_area.x2, src_area.y2, surface_width, surface_height); // The resource is of surface type; possibly disabled AA emulation src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_read, src_area); @@ -3400,8 +3414,8 @@ namespace rsx { const auto surface_width = dst_subres.surface->template get_surface_width(); const auto surface_height = dst_subres.surface->template get_surface_height(); - std::tie(dst_area.x1, dst_area.y1) = rsx::apply_resolution_scale(dst_area.x1, dst_area.y1, surface_width, surface_height); - std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale(dst_area.x2, dst_area.y2, surface_width, surface_height); + std::tie(dst_area.x1, dst_area.y1) = rsx::apply_resolution_scale(dst_subres.surface->resolution_scaling_config, dst_area.x1, dst_area.y1, surface_width, surface_height); + std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale(dst_subres.surface->resolution_scaling_config, dst_area.x2, dst_area.y2, surface_width, surface_height); // The resource is of surface type; possibly disabled AA emulation dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_write, dst_area); diff --git a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h index 1560f40ad4..8898830034 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h @@ -357,11 +357,11 @@ namespace rsx const auto surface_width = section.surface->template get_surface_width(); const auto surface_height = section.surface->template get_surface_height(); - const auto [src_width, src_height] = rsx::apply_resolution_scale(section.src_area.width, h, surface_width, surface_height); - const auto [dst_width, dst_height] = rsx::apply_resolution_scale(section.dst_area.width, h, attr.width, attr.height); + const auto [src_width, src_height] = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, section.src_area.width, h, surface_width, surface_height); + const auto [dst_width, dst_height] = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, section.dst_area.width, h, attr.width, attr.height); - std::tie(src_x, src_y) = rsx::apply_resolution_scale(src_x, src_y, surface_width, surface_height); - std::tie(dst_x, dst_y) = rsx::apply_resolution_scale(dst_x, dst_y, attr.width, attr.height); + std::tie(src_x, src_y) = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, src_x, src_y, surface_width, surface_height); + std::tie(dst_x, dst_y) = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, dst_x, dst_y, attr.width, attr.height); section.surface->memory_barrier(cmd, rsx::surface_access::transfer_read); @@ -430,8 +430,10 @@ namespace rsx if (scaling) { // Since output is upscaled, also upscale on dst - const auto [_dst_x, _dst_y] = rsx::apply_resolution_scale(static_cast(dst_offset.x), static_cast(dst_y - dst_slice_begin), attr.width, attr.height); - const auto [_dst_w, _dst_h] = rsx::apply_resolution_scale(dst_w, height, attr.width, attr.height); + + const auto& scaling_config = rsx::get_current_renderer()->resolution_scaling_config; + const auto [_dst_x, _dst_y] = rsx::apply_resolution_scale(scaling_config, static_cast(dst_offset.x), static_cast(dst_y - dst_slice_begin), attr.width, attr.height); + const auto [_dst_w, _dst_h] = rsx::apply_resolution_scale(scaling_config, dst_w, height, attr.width, attr.height); out.push_back ({ @@ -660,10 +662,10 @@ namespace rsx bool is_depth = texptr->is_depth_surface(); auto attr2 = attr; - if (rsx::get_resolution_scale_percent() != 100) + if (texptr->resolution_scaling_config.scale_percent != 100) { - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(attr.width, attr.height, surface_width, surface_height); - const auto [unused, scaled_slice_h] = rsx::apply_resolution_scale(RSX_SURFACE_DIMENSION_IGNORED, attr.slice_h, surface_width, surface_height); + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(texptr->resolution_scaling_config, attr.width, attr.height, surface_width, surface_height); + const auto [unused, scaled_slice_h] = rsx::apply_resolution_scale(texptr->resolution_scaling_config, RSX_SURFACE_DIMENSION_IGNORED, attr.slice_h, surface_width, surface_height); attr2.width = scaled_w; attr2.height = scaled_h; attr2.slice_h = scaled_slice_h; @@ -841,7 +843,8 @@ namespace rsx } // If this method was called, there is no easy solution, likely means atlas gather is needed - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(attr2.width, attr2.height); + const auto& scaling_config = rsx::get_current_renderer()->resolution_scaling_config; + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(scaling_config, attr2.width, attr2.height); const auto format_class = classify_format(attr2.gcm_format); const auto upload_context = (fbos.empty()) ? texture_upload_context::shader_read : texture_upload_context::framebuffer_storage; @@ -892,14 +895,15 @@ namespace rsx return result; } - template + template bool append_mipmap_level( + to_surface_type_converter&& as_surface_type, // Cast function to surface type rsx::simple_array& sections, // Destination list - const sampled_image_descriptor& level, // Descriptor for the image level being checked - const image_section_attributes_t& attr, // Attributes of image level - u8 mipmap_level, // Level index - bool apply_upscaling, // Whether to upscale the results or not - const image_section_attributes_t& level0_attr) // Attributes of the first mipmap level + const sampled_image_descriptor& level, // Descriptor for the image level being checked + const image_section_attributes_t& attr, // Attributes of image level + u8 mipmap_level, // Level index + bool apply_upscaling, // Whether to upscale the results or not + const image_section_attributes_t& level0_attr) // Attributes of the first mipmap level { if (level.image_handle) { @@ -916,7 +920,8 @@ namespace rsx // Calculate transfer dimensions from attr if (level.upload_context == rsx::texture_upload_context::framebuffer_storage) [[likely]] { - std::tie(mip.src_w, mip.src_h) = rsx::apply_resolution_scale(attr.width, attr.height); + auto rtv = as_surface_type(mip); + std::tie(mip.src_w, mip.src_h) = rsx::apply_resolution_scale(rtv->resolution_scaling_config, attr.width, attr.height); } else { @@ -964,7 +969,9 @@ namespace rsx if (apply_upscaling) { auto& mip = sections.back(); - std::tie(mip.dst_w, mip.dst_h) = rsx::apply_resolution_scale(mip.dst_w, mip.dst_h, level0_attr.width, level0_attr.height); + std::tie(mip.dst_w, mip.dst_h) = rsx::apply_resolution_scale( + as_surface_type(mip)->resolution_scaling_config, + mip.dst_w, mip.dst_h, level0_attr.width, level0_attr.height); } return true; diff --git a/rpcs3/Emu/RSX/Core/RSXDisplay.cpp b/rpcs3/Emu/RSX/Core/RSXDisplay.cpp index e263a945ef..f86c6dea04 100644 --- a/rpcs3/Emu/RSX/Core/RSXDisplay.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDisplay.cpp @@ -49,7 +49,7 @@ namespace rsx } } - std::string framebuffer_statistics_t::to_string(bool squash) const + std::string framebuffer_statistics_t::to_string(const surface_scaling_config_t& scaling_config, bool squash) const { // Format is sorted by sample count struct sorted_message_t @@ -70,7 +70,7 @@ namespace rsx for (const auto& [aa_mode, stat] : data) { auto real_stat = stat; - std::tie(real_stat.width, real_stat.height) = apply_resolution_scale(stat.width, stat.height); + std::tie(real_stat.width, real_stat.height) = apply_resolution_scale(scaling_config, stat.width, stat.height); real_stats.push_back(real_stat); sorted_message_t msg; diff --git a/rpcs3/Emu/RSX/Core/RSXDisplay.h b/rpcs3/Emu/RSX/Core/RSXDisplay.h index 77407f2f19..6a04374e1b 100644 --- a/rpcs3/Emu/RSX/Core/RSXDisplay.h +++ b/rpcs3/Emu/RSX/Core/RSXDisplay.h @@ -12,6 +12,8 @@ namespace rsx { enum class surface_antialiasing : u8; + struct surface_scaling_config_t; + struct framebuffer_dimensions_t { u16 width; @@ -42,7 +44,7 @@ namespace rsx void add(u16 width, u16 height, rsx::surface_antialiasing aa); // Returns a formatted string representing the statistics collected over the frame. - std::string to_string(bool squash) const; + std::string to_string(const surface_scaling_config_t& scaling_config, bool squash) const; }; struct frame_statistics_t diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp index 53f6ce31e7..8ef45e1f06 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -699,7 +699,9 @@ namespace rsx const auto window_origin = REGS(m_ctx)->shader_window_origin(); const u32 window_height = REGS(m_ctx)->shader_window_height(); const auto pixel_center = REGS(m_ctx)->pixel_center(); - const f32 resolution_scale = (window_height <= static_cast(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale(); + const f32 resolution_scale = (window_height <= RSX(m_ctx)->resolution_scaling_config.min_scalable_dimension) + ? 1.f + : RSX(m_ctx)->resolution_scaling_config.scale_factor(); payload.wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale); payload.wpos_bias[0] = 0.f; diff --git a/rpcs3/Emu/RSX/GL/GLCompute.cpp b/rpcs3/Emu/RSX/GL/GLCompute.cpp index 5607c149ed..12d10d2368 100644 --- a/rpcs3/Emu/RSX/GL/GLCompute.cpp +++ b/rpcs3/Emu/RSX/GL/GLCompute.cpp @@ -340,7 +340,8 @@ namespace gl void cs_d24x8_to_ssbo::run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout) { - const auto row_pitch = region.width; + const auto row_pitch = layout.row_length ? layout.row_length : region.width; + ensure(row_pitch >= region.width); m_program.uniforms["swap_bytes"] = layout.swap_bytes; m_program.uniforms["output_pitch"] = row_pitch; @@ -390,14 +391,15 @@ namespace gl void cs_rgba8_to_ssbo::run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout) { - const auto row_pitch = region.width; + const auto row_pitch = layout.row_length ? layout.row_length : region.width; + ensure(row_pitch >= region.width); m_program.uniforms["swap_bytes"] = layout.swap_bytes; m_program.uniforms["output_pitch"] = row_pitch; m_program.uniforms["region_offset"] = color2i(region.x, region.y); m_program.uniforms["region_size"] = color2i(region.width, region.height); m_program.uniforms["is_bgra"] = (layout.format == static_cast(gl::texture::format::bgra)); - m_program.uniforms["block_width"] = static_cast(layout.size); + m_program.uniforms["block_width"] = static_cast(layout.block_size); auto data_view = src->get_view(rsx::default_remap_vector.with_encoding(GL_REMAP_IDENTITY), gl::image_aspect::color); @@ -441,6 +443,7 @@ namespace gl { const u32 bpp = dst->image()->pitch() / dst->image()->width(); const u32 row_length = utils::align(dst_region.width * bpp, std::max(layout.alignment, 1)) / bpp; + ensure(row_length >= dst_region.width); m_program.uniforms["swap_bytes"] = layout.swap_bytes; m_program.uniforms["src_pitch"] = row_length; diff --git a/rpcs3/Emu/RSX/GL/GLDMA.cpp b/rpcs3/Emu/RSX/GL/GLDMA.cpp index 3c12009f67..5e60525a5c 100644 --- a/rpcs3/Emu/RSX/GL/GLDMA.cpp +++ b/rpcs3/Emu/RSX/GL/GLDMA.cpp @@ -22,7 +22,7 @@ namespace gl void* userptr = vm::get_super_ptr(base_address); m_data = std::make_unique(); - m_data->create(buffer::target::array, block_size, userptr, buffer::memory_type::userptr, 0); + m_data->create(buffer::target::copy_dst, block_size, userptr, buffer::memory_type::userptr, 0); m_base_address = base_address; // Some drivers may reject userptr input for whatever reason. Check that the state is still valid. @@ -77,7 +77,7 @@ namespace gl { const auto start_block_address = start & s_dma_block_mask; const auto end_block_address = (start + length + s_dma_block_size - 1) & s_dma_block_mask; - return utils::address_range32::start_end(start_block_address, end_block_address); + return utils::address_range32::start_length(start_block_address, end_block_address - start_block_address); } const dma_block& get_block(u32 start, u32 length) diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index e8ea4bc714..0abf0111e6 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -2,6 +2,7 @@ #include "GLGSRender.h" #include "../rsx_methods.h" #include "../Common/BufferUtils.h" +#include "../Program/GLSLCommon.h" #include "Emu/RSX/NV47/HW/context_accessors.define.h" @@ -226,7 +227,7 @@ void GLGSRender::update_draw_state() case rsx::primitive_type::lines: case rsx::primitive_type::line_loop: case rsx::primitive_type::line_strip: - gl_state.line_width(rsx::method_registers.line_width() * rsx::get_resolution_scale()); + gl_state.line_width(rsx::method_registers.line_width() * resolution_scaling_config.scale_factor()); gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH); break; default: @@ -298,88 +299,169 @@ void GLGSRender::load_texture_env() for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { if (!(textures_ref & 1)) + { continue; + } if (!fs_sampler_state[i]) + { fs_sampler_state[i] = std::make_unique(); + } auto sampler_state = static_cast(fs_sampler_state[i].get()); const auto& tex = rsx::method_registers.fragment_textures[i]; const auto previous_format_class = sampler_state->format_class; - if (m_samplers_dirty || m_textures_dirty[i] || m_gl_texture_cache.test_if_descriptor_expired(cmd, m_rtts, sampler_state, tex)) + if (!m_samplers_dirty && + !m_textures_dirty[i] && + !m_gl_texture_cache.test_if_descriptor_expired(cmd, m_rtts, sampler_state, tex)) { - if (tex.enabled()) - { - *sampler_state = m_gl_texture_cache.upload_texture(cmd, tex, m_rtts); + continue; + } - if (sampler_state->validate()) - { - if (m_textures_dirty[i]) - { - m_fs_sampler_states[i].apply(tex, fs_sampler_state[i].get()); - } - else if (sampler_state->format_class != previous_format_class) - { - m_graphics_state |= rsx::fragment_program_state_dirty; - } + const bool is_sampler_dirty = m_textures_dirty[i]; + m_textures_dirty[i] = false; - if (const auto texture_format = tex.format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); - sampler_state->format_class != rsx::classify_format(texture_format) && - (texture_format == CELL_GCM_TEXTURE_A8R8G8B8 || texture_format == CELL_GCM_TEXTURE_D8R8G8B8)) - { - // Depth format redirected to BGRA8 resample stage. Do not filter to avoid bits leaking. - // If accurate graphics are desired, force a bitcast to COLOR as a workaround. - m_fs_sampler_states[i].set_parameteri(GL_TEXTURE_MIN_FILTER, GL_NEAREST); - m_fs_sampler_states[i].set_parameteri(GL_TEXTURE_MAG_FILTER, GL_NEAREST); - } - } - } - else + if (!tex.enabled()) + { + *sampler_state = {}; + continue; + } + + *sampler_state = m_gl_texture_cache.upload_texture(cmd, tex, m_rtts); + if (!sampler_state->validate()) + { + continue; + } + + if (!is_sampler_dirty) + { + if (sampler_state->format_class != previous_format_class) { - *sampler_state = {}; + // Host details changed but RSX is not aware + m_graphics_state |= rsx::fragment_program_state_dirty; } - m_textures_dirty[i] = false; + if (sampler_state->format_ex) + { + // Nothing to change, use cached sampler + continue; + } + } + + sampler_state->format_ex = tex.format_ex(); + + if (sampler_state->format_ex.texel_remap_control && + sampler_state->image_handle && + sampler_state->upload_context == rsx::texture_upload_context::shader_read && + (current_fp_metadata.bx2_texture_reads_mask & (1u << i)) == 0 && + !g_cfg.video.disable_hardware_texel_remapping) [[ unlikely ]] + { + // Check if we need to override the view format + const auto gl_format = sampler_state->image_handle->view_format(); + GLenum format_override = gl_format; + rsx::flags32_t flags_to_erase = 0u; + rsx::flags32_t host_flags_to_set = 0u; + + if (sampler_state->format_ex.hw_SNORM_possible()) + { + format_override = gl::get_compatible_snorm_format(gl_format); + flags_to_erase = rsx::texture_control_bits::SEXT_MASK; + host_flags_to_set = rsx::RSX_HOST_FORMAT_FEATURE_SNORM; + } + else if (sampler_state->format_ex.hw_SRGB_possible()) + { + format_override = gl::get_compatible_srgb_format(gl_format); + flags_to_erase = rsx::texture_control_bits::GAMMA_CTRL_MASK; + host_flags_to_set = rsx::RSX_HOST_FORMAT_FEATURE_SRGB; + } + + if (format_override != GL_NONE && format_override != gl_format) + { + sampler_state->image_handle = sampler_state->image_handle->as(format_override); + sampler_state->format_ex.texel_remap_control &= (~flags_to_erase); + sampler_state->format_ex.host_features |= host_flags_to_set; + } + } + + u32 actual_mipcount = 1; + if (sampler_state->upload_context == rsx::texture_upload_context::shader_read) + { + actual_mipcount = tex.get_exact_mipmap_count(); + } + else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::mipmap_gather) + { + actual_mipcount = sampler_state->external_subresource_desc.sections_to_copy.size(); + } + else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::cubemap_unwrap) + { + actual_mipcount = sampler_state->external_subresource_desc.mipmaps; + } + + m_fs_sampler_states[i].apply(tex, fs_sampler_state[i].get(), actual_mipcount > 1); + + const auto texture_format = sampler_state->format_ex.format(); + // Depth format redirected to BGRA8 resample stage. Do not filter to avoid bits leaking. + // If accurate graphics are desired, force a bitcast to COLOR as a workaround. + const bool is_depth_reconstructed = sampler_state->format_class != rsx::classify_format(texture_format) && + (texture_format == CELL_GCM_TEXTURE_A8R8G8B8 || texture_format == CELL_GCM_TEXTURE_D8R8G8B8); + // SNORM conversion required in shader. Do not interpolate to avoid introducing discontinuities due to how negative numbers work + const bool is_snorm = (sampler_state->format_ex.texel_remap_control & rsx::texture_control_bits::SEXT_MASK) != 0; + + if (is_depth_reconstructed || is_snorm) + { + // Depth format redirected to BGRA8 resample stage. Do not filter to avoid bits leaking. + m_fs_sampler_states[i].set_parameteri(GL_TEXTURE_MIN_FILTER, GL_NEAREST); + m_fs_sampler_states[i].set_parameteri(GL_TEXTURE_MAG_FILTER, GL_NEAREST); } } for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { if (!(textures_ref & 1)) + { continue; + } if (!vs_sampler_state[i]) + { vs_sampler_state[i] = std::make_unique(); + } auto sampler_state = static_cast(vs_sampler_state[i].get()); const auto& tex = rsx::method_registers.vertex_textures[i]; const auto previous_format_class = sampler_state->format_class; - if (m_samplers_dirty || m_vertex_textures_dirty[i] || m_gl_texture_cache.test_if_descriptor_expired(cmd, m_rtts, sampler_state, tex)) + if (!m_samplers_dirty && + !m_vertex_textures_dirty[i] && + !m_gl_texture_cache.test_if_descriptor_expired(cmd, m_rtts, sampler_state, tex)) { - if (rsx::method_registers.vertex_textures[i].enabled()) - { - *sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.vertex_textures[i], m_rtts); + continue; + } - if (sampler_state->validate()) - { - if (m_vertex_textures_dirty[i]) - { - m_vs_sampler_states[i].apply(tex, vs_sampler_state[i].get()); - } - else if (sampler_state->format_class != previous_format_class) - { - m_graphics_state |= rsx::vertex_program_state_dirty; - } - } - } - else - { - *sampler_state = {}; - } + const bool is_sampler_dirty = m_vertex_textures_dirty[i]; + m_vertex_textures_dirty[i] = false; - m_vertex_textures_dirty[i] = false; + if (!tex.enabled()) + { + *sampler_state = {}; + continue; + } + + *sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.vertex_textures[i], m_rtts); + + if (!sampler_state->validate()) + { + continue; + } + + if (is_sampler_dirty) + { + m_vs_sampler_states[i].apply(tex, vs_sampler_state[i].get()); + } + else if (sampler_state->format_class != previous_format_class) + { + m_graphics_state |= rsx::vertex_program_state_dirty; } } @@ -394,7 +476,9 @@ void GLGSRender::bind_texture_env() for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { if (!(textures_ref & 1)) + { continue; + } gl::texture_view* view = nullptr; auto sampler_state = static_cast(fs_sampler_state[i].get()); @@ -434,22 +518,26 @@ void GLGSRender::bind_texture_env() for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { if (!(textures_ref & 1)) + { continue; + } auto sampler_state = static_cast(vs_sampler_state[i].get()); + gl::texture_view* view = nullptr; if (rsx::method_registers.vertex_textures[i].enabled() && sampler_state->validate()) { - if (sampler_state->image_handle) [[likely]] + if (view = sampler_state->image_handle; !view) { - sampler_state->image_handle->bind(cmd, GL_VERTEX_TEXTURES_START + i); - } - else - { - m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind(cmd, GL_VERTEX_TEXTURES_START + i); + view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); } } + + if (view) [[likely]] + { + view->bind(cmd, GL_VERTEX_TEXTURES_START + i); + } else { cmd->bind_texture(GL_VERTEX_TEXTURES_START + i, GL_TEXTURE_2D, GL_NONE); diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index f8cc046569..5271390af9 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -237,6 +237,7 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_tex3D_ops = properties.has_tex3D; m_shader_props.require_shadowProj_ops = properties.shadow_sampler_mask != 0 && properties.has_texShadowProj; m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL); + m_shader_props.require_color_format_convert = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_FORMAT_CONVERT); glsl::insert_glsl_legacy_function(OS, m_shader_props); } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index ebff202303..c1acabd601 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -73,6 +73,7 @@ void GLGSRender::set_viewport() { // NOTE: scale offset matrix already contains the viewport transformation const auto [clip_width, clip_height] = rsx::apply_resolution_scale( + resolution_scaling_config, rsx::method_registers.surface_clip_width(), rsx::method_registers.surface_clip_height()); glViewport(0, 0, clip_width, clip_height); @@ -138,8 +139,7 @@ void GLGSRender::on_init_thread() gl::init(); gl::set_command_context(gl_state); - // Enable adaptive vsync if vsync is requested - gl::set_swapinterval(g_cfg.video.vsync ? -1 : 0); + update_swap_interval(); if (g_cfg.video.debug_output) gl::enable_debugging(); @@ -249,22 +249,23 @@ void GLGSRender::on_init_thread() // Fallback null texture instead of relying on texture0 { std::array pixeldata = { 0, 0, 0, 0, 0, 0, 0, 0 }; + const rsx::io_buffer src_buf = std::span(pixeldata); // 1D auto tex1D = std::make_unique(GL_TEXTURE_1D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); - tex1D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); + tex1D->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); // 2D auto tex2D = std::make_unique(GL_TEXTURE_2D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); - tex2D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); + tex2D->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); // 3D auto tex3D = std::make_unique(GL_TEXTURE_3D, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); - tex3D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); + tex3D->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); // CUBE auto texCUBE = std::make_unique(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); - texCUBE->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); + texCUBE->copy_from(src_buf, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); m_null_textures[GL_TEXTURE_1D] = std::move(tex1D); m_null_textures[GL_TEXTURE_2D] = std::move(tex2D); @@ -397,6 +398,7 @@ void GLGSRender::on_init_thread() m_ui_renderer.create(); m_video_output_pass.create(); + gl::init_global_texture_resources(); m_gl_texture_cache.initialize(); m_prog_buffer.initialize @@ -578,6 +580,33 @@ void GLGSRender::on_exit() gl::set_primary_context_thread(false); } +void GLGSRender::update_swap_interval() +{ + const vsync_mode current_mode = g_cfg.video.vsync; + if (current_mode == m_vsync_mode) + { + return; + } + + // Enable adaptive vsync if vsync is requested + int swap_interval = 0; + switch (current_mode) + { + default: + case vsync_mode::off: + break; + case vsync_mode::adaptive: + swap_interval = -1; + break; + case vsync_mode::full: + swap_interval = 1; + break; + } + + gl::set_swapinterval(swap_interval); + m_vsync_mode = current_mode; +} + void GLGSRender::clear_surface(u32 arg) { if (skip_current_frame) return; @@ -908,7 +937,7 @@ void GLGSRender::load_program_env() m_draw_processor.fill_scale_offset_data(buf, false); m_draw_processor.fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 68)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 72)) = rsx::method_registers.point_size() * rsx::get_resolution_scale(); + *(reinterpret_cast(buf + 72)) = rsx::method_registers.point_size() * resolution_scaling_config.scale_factor(); *(reinterpret_cast(buf + 76)) = rsx::method_registers.clip_min(); *(reinterpret_cast(buf + 80)) = rsx::method_registers.clip_max(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index a05eb0bf3d..779519fee7 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -186,6 +186,8 @@ private: gl::texture* get_present_source(gl::present_surface_info* info, const rsx::avconf& avconfig); + void update_swap_interval(); + public: void set_viewport(); void set_scissor(bool clip_viewport); diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.cpp b/rpcs3/Emu/RSX/GL/GLOverlays.cpp index 7d36e5598b..a758804e4f 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.cpp +++ b/rpcs3/Emu/RSX/GL/GLOverlays.cpp @@ -220,10 +220,10 @@ namespace gl m_input_filter = gl::filter::linear; } - gl::texture_view* ui_overlay_renderer::load_simple_image(rsx::overlays::image_info_base* desc, bool temp_resource, u32 owner_uid) + gl::texture_view* ui_overlay_renderer::load_simple_image(const rsx::overlays::image_info_base* desc, bool temp_resource, u32 owner_uid) { auto tex = std::make_unique(GL_TEXTURE_2D, desc->w, desc->h, 1, 1, 1, GL_RGBA8, RSX_FORMAT_CLASS_COLOR); - tex->copy_from(desc->get_data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); + tex->copy_from(desc->as_span(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); const GLenum remap[] = { GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN }; auto view = std::make_unique(tex.get(), remap); @@ -236,7 +236,7 @@ namespace gl } else { - const u64 key = reinterpret_cast(desc); + const u64 key = reinterpret_cast(desc); temp_image_cache[key] = std::make_pair(owner_uid, std::move(tex)); temp_view_cache[key] = std::move(view); } @@ -287,7 +287,7 @@ namespace gl } } - gl::texture_view* ui_overlay_renderer::find_font(rsx::overlays::font* font) + gl::texture_view* ui_overlay_renderer::find_font(const rsx::overlays::font* font) { const auto font_size = font->get_glyph_data_dimensions(); @@ -308,7 +308,7 @@ namespace gl const std::vector& glyph_data = font->get_glyph_data(); auto tex = std::make_unique(GL_TEXTURE_2D_ARRAY, font_size.width, font_size.height, font_size.depth, 1, 1, GL_R8, RSX_FORMAT_CLASS_COLOR); - tex->copy_from(glyph_data.data(), gl::texture::format::r, gl::texture::type::ubyte, {}); + tex->copy_from(std::span(glyph_data), gl::texture::format::r, gl::texture::type::ubyte, {}); GLenum remap[] = { GL_RED, GL_RED, GL_RED, GL_RED }; auto view = std::make_unique(tex.get(), remap); @@ -320,7 +320,7 @@ namespace gl return result; } - gl::texture_view* ui_overlay_renderer::find_temp_image(rsx::overlays::image_info_base* desc, u32 owner_uid) + gl::texture_view* ui_overlay_renderer::find_temp_image(const rsx::overlays::image_info_base* desc, u32 owner_uid) { const bool dirty = std::exchange(desc->dirty, false); const u64 key = reinterpret_cast(desc); @@ -332,7 +332,7 @@ namespace gl if (dirty) { - view->image()->copy_from(desc->get_data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); + view->image()->copy_from(desc->as_span(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); } return view; @@ -399,8 +399,15 @@ namespace gl void ui_overlay_renderer::run(gl::command_context& cmd_, const areau& viewport, GLuint target, rsx::overlays::overlay& ui, bool flip_vertically) { - program_handle.uniforms["viewport"] = color4f(static_cast(viewport.width()), static_cast(viewport.height()), static_cast(viewport.x1), static_cast(viewport.y1)); - program_handle.uniforms["ui_scale"] = color4f(static_cast(ui.virtual_width), static_cast(ui.virtual_height), 1.f, 1.f); + ui.set_render_viewport( + static_cast(std::min(viewport.width(), std::numeric_limits::max())), + static_cast(std::min(viewport.height(), std::numeric_limits::max())) + ); + const auto ui_scale = color4f(static_cast(ui.virtual_width), static_cast(ui.virtual_height), 1.f, 1.f); + const auto ui_viewport = color4f(static_cast(viewport.width()), static_cast(viewport.height()), static_cast(viewport.x1), static_cast(viewport.y1)); + + program_handle.uniforms["viewport"] = ui_viewport; + program_handle.uniforms["ui_scale"] = ui_scale; saved_sampler_state save_30(30, m_sampler); saved_sampler_state save_31(31, m_sampler); @@ -431,7 +438,7 @@ namespace gl } case rsx::overlays::image_resource_id::raw_image: { - cmd_->bind_texture(31, GL_TEXTURE_2D, find_temp_image(static_cast(cmd.config.external_data_ref), ui.uid)->id()); + cmd_->bind_texture(31, GL_TEXTURE_2D, find_temp_image(static_cast(cmd.config.external_data_ref), ui.uid)->id()); break; } case rsx::overlays::image_resource_id::font_file: @@ -458,12 +465,24 @@ namespace gl .texture_mode(texture_mode) .clip_fragments(cmd.config.clip_region) .pulse_glow(cmd.config.pulse_glow) + .set_sdf(cmd.config.sdf_config.func) .get(); program_handle.uniforms["timestamp"] = cmd.config.get_sinus_value(); program_handle.uniforms["albedo"] = cmd.config.color; program_handle.uniforms["clip_bounds"] = cmd.config.clip_rect; program_handle.uniforms["blur_intensity"] = static_cast(cmd.config.blur_strength); + + if (cmd.config.sdf_config.func != rsx::overlays::sdf_function::none) + { + auto sdf_config = cmd.config.sdf_config; + sdf_config.transform(static_cast(viewport).flipped_vertical(), {ui_scale.x, ui_scale.y}); + + program_handle.uniforms["sdf_params"] = color4f(sdf_config.hx, sdf_config.hy, sdf_config.br, sdf_config.bw); + program_handle.uniforms["sdf_origin"] = color2f(sdf_config.cx, sdf_config.cy); + program_handle.uniforms["sdf_border_color"] = sdf_config.border_color; + } + overlay_pass::run(cmd_, viewport, target, gl::image_aspect::color, true); } @@ -551,7 +570,8 @@ namespace gl const pixel_buffer_layout& layout) { const u32 bpp = dst->image()->pitch() / dst->image()->width(); - const u32 row_length = utils::align(dst_region.width * bpp, std::max(layout.alignment, 1)) / bpp; + const u32 aligned_width = utils::align(dst_region.width * bpp, std::max(layout.alignment, 1)) / bpp; + const u32 row_length = layout.row_length ? layout.row_length : aligned_width; program_handle.uniforms["src_pitch"] = row_length; program_handle.uniforms["swap_bytes"] = layout.swap_bytes; diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h index 96478a161e..8ccfd67305 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.h +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -75,16 +75,16 @@ namespace gl ui_overlay_renderer(); - gl::texture_view* load_simple_image(rsx::overlays::image_info_base* desc, bool temp_resource, u32 owner_uid); + gl::texture_view* load_simple_image(const rsx::overlays::image_info_base* desc, bool temp_resource, u32 owner_uid); void create(); void destroy(); void remove_temp_resources(u64 key); - gl::texture_view* find_font(rsx::overlays::font* font); + gl::texture_view* find_font(const rsx::overlays::font* font); - gl::texture_view* find_temp_image(rsx::overlays::image_info_base* desc, u32 owner_uid); + gl::texture_view* find_temp_image(const rsx::overlays::image_info_base* desc, u32 owner_uid); void set_primitive_type(rsx::overlays::primitive_type type); diff --git a/rpcs3/Emu/RSX/GL/GLPresent.cpp b/rpcs3/Emu/RSX/GL/GLPresent.cpp index e7c03dfcf5..2aa11868ee 100644 --- a/rpcs3/Emu/RSX/GL/GLPresent.cpp +++ b/rpcs3/Emu/RSX/GL/GLPresent.cpp @@ -95,6 +95,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons image = section.surface->get_surface(rsx::surface_access::transfer_read); std::tie(info->width, info->height) = rsx::apply_resolution_scale( + resolution_scaling_config, std::min(surface_width, info->width), std::min(surface_height, info->height)); } @@ -132,7 +133,8 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons const auto range = utils::address_range32::start_length(info->address, info->pitch * info->height); m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::read); - flip_image->copy_from(vm::base(info->address), static_cast(expected_format), gl::texture::type::uint_8_8_8_8, unpack_settings); + const rsx::io_buffer read_buf = { vm::base(info->address), range.length() }; + flip_image->copy_from(read_buf, static_cast(expected_format), gl::texture::type::uint_8_8_8_8, unpack_settings); image = flip_image.get(); } else if (image->get_internal_format() != static_cast(expected_format)) @@ -224,7 +226,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) if (avconfig.stereo_enabled) [[unlikely]] { - const auto [unused, min_expected_height] = rsx::apply_resolution_scale(RSX_SURFACE_DIMENSION_IGNORED, buffer_height + 30); + const auto [unused, min_expected_height] = rsx::apply_resolution_scale(resolution_scaling_config, RSX_SURFACE_DIMENSION_IGNORED, buffer_height + 30); if (image_to_flip->height() < min_expected_height) { // Get image for second eye @@ -239,7 +241,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) else { // Account for possible insets - const auto [unused2, scaled_buffer_height] = rsx::apply_resolution_scale(RSX_SURFACE_DIMENSION_IGNORED, buffer_height); + const auto [unused2, scaled_buffer_height] = rsx::apply_resolution_scale(resolution_scaling_config, RSX_SURFACE_DIMENSION_IGNORED, buffer_height); buffer_height = std::min(image_to_flip->height() - min_expected_height, scaled_buffer_height); } } @@ -251,6 +253,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) if (info.emu_flip) { evaluate_cpu_usage_reduction_limits(); + update_swap_interval(); } // Get window state @@ -314,9 +317,11 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) // Lock to avoid modification during run-update chain std::lock_guard lock(*m_overlay_manager); + const areau display_area = {0, 0, static_cast(m_frame->client_width()), static_cast(m_frame->client_height())}; for (const auto& view : m_overlay_manager->get_views()) { - m_ui_renderer.run(cmd, aspect_ratio, target, *view.get(), flip_vertically); + const areau render_area = view->use_window_space ? display_area : aspect_ratio; + m_ui_renderer.run(cmd, render_area, target, *view.get(), flip_vertically); } } }; @@ -368,7 +373,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) std::vector sshot_frame(buffer_height * buffer_width * 4); glGetError(); - tex->copy_to(sshot_frame.data(), gl::texture::format::rgba, gl::texture::type::ubyte, pack_settings); + tex->copy_to(std::span(sshot_frame), gl::texture::format::rgba, gl::texture::type::ubyte, pack_settings); m_sshot_tex.reset(); @@ -475,7 +480,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) "Texture uploads: %11u (%u from CPU - %02u%%, %u copies avoided)\n" "Vertex cache hits: %9u/%u (%u%%)\n" "Program cache lookup ellision: %u/%u (%u%%)", - info.stats.framebuffer_stats.to_string(!backend_config.supports_hw_msaa), + info.stats.framebuffer_stats.to_string(resolution_scaling_config, !backend_config.supports_hw_msaa), get_load(), info.stats.draw_calls, info.stats.setup_time, info.stats.vertex_upload_time, info.stats.textures_upload_time, info.stats.draw_exec_time, num_dirty_textures, texture_memory_size, num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate, @@ -512,6 +517,19 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) m_frame->flip(m_context); rsx::thread::flip(info); + // Data sync + const rsx::surface_scaling_config_t active_res_scaling_config = + { + .scale_percent = static_cast(g_cfg.video.resolution_scale_percent), + .min_scalable_dimension = static_cast(g_cfg.video.min_scalable_dimension), + }; + + if (active_res_scaling_config != this->resolution_scaling_config) + { + m_rtts.sync_scaling_config(cmd, active_res_scaling_config); + this->resolution_scaling_config = active_res_scaling_config; + } + // Cleanup m_gl_texture_cache.on_frame_end(); m_vertex_cache->purge(); diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 8f42995a9d..ad8943bd4f 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -218,6 +218,9 @@ OPENGL_PROC(PFNGLNAMEDBUFFERDATAEXTPROC, NamedBufferDataEXT); OPENGL_PROC(PFNGLNAMEDBUFFERSUBDATAPROC, NamedBufferSubData); OPENGL_PROC(PFNGLNAMEDBUFFERSUBDATAEXTPROC, NamedBufferSubDataEXT); +OPENGL_PROC(PFNGLCLEARNAMEDBUFFERSUBDATAPROC, ClearNamedBufferSubData); +OPENGL_PROC(PFNGLCLEARNAMEDBUFFERSUBDATAEXTPROC, ClearNamedBufferSubDataEXT); + // ARB_shader_image_load_store OPENGL_PROC(PFNGLBINDIMAGETEXTUREPROC, BindImageTexture); @@ -256,6 +259,7 @@ OPENGL_PROC(PFNGLDELETESYNCPROC, DeleteSync); // KHR_debug OPENGL_PROC(PFNGLDEBUGMESSAGECALLBACKPROC, DebugMessageCallback); +OPENGL_PROC(PFNGLOBJECTLABELPROC, ObjectLabel); // Immutable textures OPENGL_PROC(PFNGLTEXSTORAGE1DPROC, TexStorage1D); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 18058842aa..34c2ca72d3 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -141,7 +141,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool /* m_framebuffer_layout.width, m_framebuffer_layout.height, m_framebuffer_layout.target, m_framebuffer_layout.aa_mode, m_framebuffer_layout.raster_type, m_framebuffer_layout.color_addresses, m_framebuffer_layout.zeta_address, - m_framebuffer_layout.actual_color_pitch, m_framebuffer_layout.actual_zeta_pitch); + m_framebuffer_layout.actual_color_pitch, m_framebuffer_layout.actual_zeta_pitch, + resolution_scaling_config); std::array color_targets; GLuint depth_stencil_target; @@ -448,7 +449,7 @@ void gl::render_target::load_memory(gl::command_context& cmd) subres.data = { vm::get_super_ptr(base_addr), static_cast::size_type>(rsx_pitch * surface_height * samples_y) }; // TODO: MSAA support - if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]] + if (resolution_scaling_config.scale_percent == 100 && spp == 1) [[likely]] { gl::upload_texture(cmd, this, get_gcm_format(), is_swizzled, { subres }); } @@ -689,6 +690,8 @@ gl::viewable_image* gl::render_target::get_resolve_target_safe(gl::command_conte static_cast(get_internal_format()), format_class() )); + + resolve_surface->set_name(fmt::format("MSAA_Resolve_%u@0x%x", resolve_surface->id(), base_addr)); } return static_cast(resolve_surface.get()); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 9b01746a87..f5e2252b92 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -98,7 +98,7 @@ namespace gl bool matches_dimensions(u16 _width, u16 _height) const { //Use forward scaling to account for rounding and clamping errors - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(_width, _height); + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(resolution_scaling_config, _width, _height); return (scaled_w == width()) && (scaled_h == height()); } @@ -138,11 +138,12 @@ struct gl_render_target_traits u32 address, rsx::surface_color_format surface_color_format, usz width, usz height, usz pitch, - rsx::surface_antialiasing antialias + rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& resolution_scaling_config ) { auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); - const auto [width_, height_] = rsx::apply_resolution_scale(static_cast(width), static_cast(height)); + const auto [width_, height_] = rsx::apply_resolution_scale(resolution_scaling_config, static_cast(width), static_cast(height)); u8 samples; rsx::surface_sample_layout sample_layout; @@ -160,7 +161,9 @@ struct gl_render_target_traits std::unique_ptr result(new gl::render_target(width_, height_, samples, static_cast(format.internal_format), RSX_FORMAT_CLASS_COLOR)); + result->set_name(fmt::format("RTV_%u@0x%x", result->id(), address)); result->set_aa_mode(antialias); + result->set_resolution_scaling_config(resolution_scaling_config); result->set_native_pitch(static_cast(width) * get_format_block_size_in_bytes(surface_color_format) * result->samples_x); result->set_surface_dimensions(static_cast(width), static_cast(height), static_cast(pitch)); result->set_format(surface_color_format); @@ -181,11 +184,12 @@ struct gl_render_target_traits u32 address, rsx::surface_depth_format2 surface_depth_format, usz width, usz height, usz pitch, - rsx::surface_antialiasing antialias + rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& resolution_scaling_config ) { auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format); - const auto [width_, height_] = rsx::apply_resolution_scale(static_cast(width), static_cast(height)); + const auto [width_, height_] = rsx::apply_resolution_scale(resolution_scaling_config, static_cast(width), static_cast(height)); u8 samples; rsx::surface_sample_layout sample_layout; @@ -203,7 +207,9 @@ struct gl_render_target_traits std::unique_ptr result(new gl::render_target(width_, height_, samples, static_cast(format.internal_format), rsx::classify_format(surface_depth_format))); + result->set_name(fmt::format("DSV_%u@0x%x", result->id(), address)); result->set_aa_mode(antialias); + result->set_resolution_scaling_config(resolution_scaling_config); result->set_surface_dimensions(static_cast(width), static_cast(height), static_cast(pitch)); result->set_format(surface_depth_format); result->set_native_pitch(static_cast(width) * get_format_block_size_in_bytes(surface_depth_format) * result->samples_x); @@ -223,13 +229,17 @@ struct gl_render_target_traits void clone_surface( gl::command_context& cmd, std::unique_ptr& sink, gl::render_target* ref, - u32 address, barrier_descriptor_t& prev) + u32 address, barrier_descriptor_t& prev, + const rsx::surface_scaling_config_t& scaling_config) { if (!sink) { auto internal_format = static_cast(ref->get_internal_format()); - const auto [new_w, new_h] = rsx::apply_resolution_scale(prev.width, prev.height, - ref->get_surface_width(), ref->get_surface_height()); + const auto [new_w, new_h] = rsx::apply_resolution_scale( + scaling_config, + prev.width, prev.height, + ref->get_surface_width(), + ref->get_surface_height()); sink = std::make_unique(new_w, new_h, ref->samples(), internal_format, ref->format_class()); sink->add_ref(); @@ -238,6 +248,10 @@ struct gl_render_target_traits sink->state_flags = rsx::surface_state_flags::erase_bkgnd; sink->format_info = ref->format_info; + sink->sample_layout = ref->sample_layout; + sink->resolution_scaling_config = scaling_config; + + sink->set_name(fmt::format("SINK_%u@0x%x", sink->id(), address)); sink->set_spp(ref->get_spp()); sink->set_native_pitch(static_cast(prev.width) * ref->get_bpp() * ref->samples_x); sink->set_rsx_pitch(ref->get_rsx_pitch()); @@ -325,6 +339,7 @@ struct gl_render_target_traits std::array native_layout = { static_cast(fmt.swizzle.a), static_cast(fmt.swizzle.r), static_cast(fmt.swizzle.g), static_cast(fmt.swizzle.b) }; surface->set_native_component_layout(native_layout); surface->set_format(format); + surface->set_name(fmt::format("RTV_%u@0x%x", surface->id(), address)); int_invalidate_surface_contents(cmd, surface, address, pitch); } @@ -338,6 +353,7 @@ struct gl_render_target_traits usz pitch) { surface->set_format(format); + surface->set_name(fmt::format("DSV_%u@0x%x", surface->id(), address)); int_invalidate_surface_contents(cmd, surface, address, pitch); } @@ -370,6 +386,7 @@ struct gl_render_target_traits gl::texture::internal_format format, usz width, usz height, rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& scaling_config, bool check_refs = false) { if (check_refs && surface->has_refs()) @@ -377,7 +394,8 @@ struct gl_render_target_traits return surface->get_internal_format() == format && surface->get_spp() == get_format_sample_count(antialias) && - surface->matches_dimensions(static_cast(width), static_cast(height)); + surface->matches_dimensions(static_cast(width), static_cast(height)) && + surface->resolution_scaling_config == scaling_config; } static @@ -386,10 +404,11 @@ struct gl_render_target_traits rsx::surface_color_format format, usz width, usz height, rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& scaling_config, bool check_refs=false) { const auto internal_fmt = rsx::internals::surface_color_format_to_gl(format).internal_format; - return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, check_refs); + return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, scaling_config, check_refs); } static @@ -398,10 +417,11 @@ struct gl_render_target_traits rsx::surface_depth_format2 format, usz width, usz height, rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& scaling_config, bool check_refs = false) { const auto internal_fmt = rsx::internals::surface_depth_format_to_gl(format).internal_format; - return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, check_refs); + return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, scaling_config, check_refs); } static diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp index c7f9ec2622..fa5b3627c4 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp @@ -300,6 +300,7 @@ namespace gl } builder << "\n" + "#undef TEX_PARAM\n" "#define TEX_PARAM(index) texture_parameters[index + texture_base_index]\n" "#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n" "#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n" diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 9a439177f4..6f566b9a82 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -22,6 +22,13 @@ namespace gl legacy_ring_buffer g_upload_transfer_buffer; scratch_ring_buffer g_compute_decode_buffer; scratch_ring_buffer g_deswizzle_scratch_buffer; + blitter g_blitter; + + void init_global_texture_resources() + { + g_blitter.init(); + g_hw_blitter = &g_blitter; + } void destroy_global_texture_resources() { @@ -29,6 +36,8 @@ namespace gl g_upload_transfer_buffer.remove(); g_compute_decode_buffer.remove(); g_deswizzle_scratch_buffer.remove(); + g_blitter.destroy(); + g_hw_blitter = nullptr; } template @@ -157,42 +166,42 @@ namespace gl case texture::internal_format::compressed_rgba_s3tc_dxt1: case texture::internal_format::compressed_rgba_s3tc_dxt3: case texture::internal_format::compressed_rgba_s3tc_dxt5: - return { GL_RGBA, GL_UNSIGNED_BYTE, 1, false }; + return { .format = GL_RGBA, .type = GL_UNSIGNED_BYTE, .block_size = 1, .swap_bytes = false }; case texture::internal_format::r8: - return { GL_RED, GL_UNSIGNED_BYTE, 1, false }; + return { .format = GL_RED, .type = GL_UNSIGNED_BYTE, .block_size = 1, .swap_bytes = false }; case texture::internal_format::r16: - return { GL_RED, GL_UNSIGNED_SHORT, 2, true }; + return { .format = GL_RED, .type = GL_UNSIGNED_SHORT, .block_size = 2, .swap_bytes = true }; case texture::internal_format::r32f: - return { GL_RED, GL_FLOAT, 4, true }; + return { .format = GL_RED, .type = GL_FLOAT, .block_size = 4, .swap_bytes = true }; case texture::internal_format::rg8: - return { GL_RG, GL_UNSIGNED_SHORT, 2, true }; + return { .format = GL_RG, .type = GL_UNSIGNED_SHORT, .block_size = 2, .swap_bytes = true }; case texture::internal_format::rg16: - return { GL_RG, GL_UNSIGNED_SHORT, 2, true }; + return { .format = GL_RG, .type = GL_UNSIGNED_SHORT, .block_size = 2, .swap_bytes = true }; case texture::internal_format::rg16f: - return { GL_RG, GL_HALF_FLOAT, 2, true }; + return { .format = GL_RG, .type = GL_HALF_FLOAT, .block_size = 2, .swap_bytes = true }; case texture::internal_format::rgb565: - return { GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 2, true }; + return { .format = GL_RGB, .type = GL_UNSIGNED_SHORT_5_6_5, .block_size = 2, .swap_bytes = true }; case texture::internal_format::rgb5a1: - return { GL_RGB, GL_UNSIGNED_SHORT_5_5_5_1, 2, true }; + return { .format = GL_RGB, .type = GL_UNSIGNED_SHORT_5_5_5_1, .block_size = 2, .swap_bytes = true }; case texture::internal_format::bgr5a1: - return { GL_RGB, GL_UNSIGNED_SHORT_1_5_5_5_REV, 2, true }; + return { .format = GL_RGB, .type = GL_UNSIGNED_SHORT_1_5_5_5_REV, .block_size = 2, .swap_bytes = true }; case texture::internal_format::rgba4: - return { GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4, 2, false }; + return { .format = GL_BGRA, .type = GL_UNSIGNED_SHORT_4_4_4_4, .block_size = 2, .swap_bytes = false }; case texture::internal_format::rgba8: - return { GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 4, true }; + return { .format = GL_RGBA, .type = GL_UNSIGNED_INT_8_8_8_8_REV, .block_size = 4, .swap_bytes = true }; case texture::internal_format::bgra8: - return { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 4, true }; + return { .format = GL_BGRA, .type = GL_UNSIGNED_INT_8_8_8_8_REV, .block_size = 4, .swap_bytes = true }; case texture::internal_format::rgba16f: - return { GL_RGBA, GL_HALF_FLOAT, 2, true }; + return { .format = GL_RGBA, .type = GL_HALF_FLOAT, .block_size = 2, .swap_bytes = true }; case texture::internal_format::rgba32f: - return { GL_RGBA, GL_FLOAT, 4, true }; + return { .format = GL_RGBA, .type = GL_FLOAT, .block_size = 4, .swap_bytes = true }; case texture::internal_format::depth16: - return { GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 2, true }; + return { .format = GL_DEPTH_COMPONENT, .type = GL_UNSIGNED_SHORT, .block_size = 2, .swap_bytes = true }; case texture::internal_format::depth32f: - return { GL_DEPTH_COMPONENT, GL_FLOAT, 2, true }; + return { .format = GL_DEPTH_COMPONENT, .type = GL_FLOAT, .block_size = 2, .swap_bytes = true }; case texture::internal_format::depth24_stencil8: case texture::internal_format::depth32f_stencil8: - return { GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 4, true }; + return { .format = GL_DEPTH_STENCIL, .type = GL_UNSIGNED_INT_24_8, .block_size = 4, .swap_bytes = true }; default: fmt::throw_exception("Unexpected internal format 0x%X", static_cast(format)); } @@ -266,6 +275,44 @@ namespace gl fmt::throw_exception("Unknown format 0x%x", texture_format); } + GLenum get_compatible_snorm_format(GLenum base_format) + { + switch (base_format) + { + case GL_R8: + return GL_R8_SNORM; + case GL_RG8: + return GL_RG8_SNORM; + case GL_RGBA8: + return GL_RGBA8_SNORM; + case GL_R16: + return GL_R16_SNORM; + case GL_RG16: + return GL_RG16_SNORM; + case GL_RGBA16: + return GL_RGBA16_SNORM; + default: + return GL_NONE; + } + } + + GLenum get_compatible_srgb_format(GLenum base_format) + { + switch (base_format) + { + case GL_RGBA8: + return GL_SRGB8_ALPHA8_EXT; + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT; + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT; + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT; + default: + return GL_NONE; + } + } + cs_shuffle_base* get_trivial_transform_job(const pixel_buffer_layout& pack_info) { if (!pack_info.swap_bytes) @@ -273,7 +320,7 @@ namespace gl return nullptr; } - switch (pack_info.size) + switch (pack_info.block_size) { case 1: return nullptr; @@ -325,8 +372,10 @@ namespace gl } } - dst->bind(buffer::target::pixel_pack); - src->copy_to(reinterpret_cast(static_cast(dst_offset)), static_cast(pack_info.format), static_cast(pack_info.type), src_level, src_region, {}); + pixel_pack_settings pack_settings{}; + if (pack_info.alignment) pack_settings.alignment(pack_info.alignment); + if (pack_info.row_length) pack_settings.row_length(pack_info.row_length); + src->copy_to(*dst, dst_offset, static_cast(pack_info.format), static_cast(pack_info.type), src_level, src_region, pack_settings); return false; }; @@ -573,10 +622,12 @@ namespace gl } glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE); - transfer_buf->bind(buffer::target::pixel_unpack); - dst->copy_from(reinterpret_cast(u64(out_offset)), static_cast(unpack_info.format), - static_cast(unpack_info.type), dst_level, dst_region, {}); + pixel_unpack_settings unpack_settings{}; + if (unpack_info.alignment) unpack_settings.alignment(unpack_info.alignment); + if (unpack_info.format) unpack_settings.row_length(unpack_info.row_length); + dst->copy_from(*transfer_buf, out_offset, static_cast(unpack_info.format), + static_cast(unpack_info.type), dst_level, dst_region, unpack_settings); } } @@ -666,172 +717,168 @@ namespace gl } } } + + return; } - else + + std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; + image_memory_requirements mem_info; + pixel_buffer_layout mem_layout; + + std::span dst_buffer = staging_buffer; + u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); + u64 image_linear_size = staging_buffer.size(); + + const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); + + if (driver_caps.ARB_compute_shader_supported) { - std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; - image_memory_requirements mem_info; - pixel_buffer_layout mem_layout; - - std::span dst_buffer = staging_buffer; - void* out_pointer = staging_buffer.data(); - u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); - u64 image_linear_size = staging_buffer.size(); - - const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); - - if (driver_caps.ARB_compute_shader_supported) + if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) { - if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) - { - g_upload_transfer_buffer.remove(); - g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size); - } - - if (g_compute_decode_buffer.size() < min_required_buffer_size) - { - g_compute_decode_buffer.remove(); - g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); - } - - out_pointer = nullptr; + g_upload_transfer_buffer.remove(); + g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size); } - for (const rsx::subresource_layout& layout : input_layouts) + if (g_compute_decode_buffer.size() < min_required_buffer_size) { - if (driver_caps.ARB_compute_shader_supported) + g_compute_decode_buffer.remove(); + g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); + } + } + + for (const rsx::subresource_layout& layout : input_layouts) + { + if (driver_caps.ARB_compute_shader_supported) + { + u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); + + // We're in the "else" branch, so "is_compressed_host_format()" is always false. + // Handle emulated compressed formats with host unpack (R8G8 compressed) + row_pitch = std::max(row_pitch, dst->pitch()); + + // FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch. + image_linear_size = row_pitch * layout.height_in_texel * layout.depth; + + compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; + compute_scratch_mem.first = reinterpret_cast(static_cast(compute_scratch_mem.second)); + + g_upload_transfer_buffer.reserve_storage_on_heap(static_cast(image_linear_size)); + upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast(image_linear_size), 256); + dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; + } + + rsx::io_buffer io_buf = dst_buffer; + caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024); + auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); + + // Define upload region + coord3u region; + region.x = 0; + region.y = 0; + region.z = layout.layer; + region.width = layout.width_in_texel; + region.height = layout.height_in_texel; + region.depth = layout.depth; + + if (!driver_caps.ARB_compute_shader_supported) + { + unpack_settings.swap_bytes(op.require_swap); + dst->copy_from(staging_buffer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); + continue; + } + + // 0. Preconf + mem_layout.alignment = static_cast(caps.alignment); + mem_layout.swap_bytes = op.require_swap; + mem_layout.format = gl_format; + mem_layout.type = gl_type; + mem_layout.block_size = block_size_in_bytes; + + // 2. Upload memory to GPU + if (!op.require_deswizzle) + { + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size); + } + else + { + // 2.1 Copy data to deswizzle buf + if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size) { - u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); - - // We're in the "else" branch, so "is_compressed_host_format()" is always false. - // Handle emulated compressed formats with host unpack (R8G8 compressed) - row_pitch = std::max(row_pitch, dst->pitch()); - - // FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch. - image_linear_size = row_pitch * layout.height_in_texel * layout.depth; - - compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; - compute_scratch_mem.first = reinterpret_cast(static_cast(compute_scratch_mem.second)); - - g_upload_transfer_buffer.reserve_storage_on_heap(static_cast(image_linear_size)); - upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast(image_linear_size), 256); - dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; + g_deswizzle_scratch_buffer.remove(); + g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); } - rsx::io_buffer io_buf = dst_buffer; - caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024); - auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); + u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast(image_linear_size), 256); + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); - // Define upload region - coord3u region; - region.x = 0; - region.y = 0; - region.z = layout.layer; - region.width = layout.width_in_texel; - region.height = layout.height_in_texel; - region.depth = layout.depth; + // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem + const auto block_size = op.element_size * op.block_length; - if (driver_caps.ARB_compute_shader_supported) + if (op.require_swap) { - // 0. Preconf - mem_layout.alignment = static_cast(caps.alignment); - mem_layout.swap_bytes = op.require_swap; - mem_layout.format = gl_format; - mem_layout.type = gl_type; - mem_layout.size = block_size_in_bytes; + mem_layout.swap_bytes = false; - // 2. Upload memory to GPU - if (!op.require_deswizzle) + switch (op.element_size) { - g_upload_transfer_buffer.unmap(); - g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size); + case 1: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 2: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); } - else - { - // 2.1 Copy data to deswizzle buf - if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size) - { - g_deswizzle_scratch_buffer.remove(); - g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); - } - - u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast(image_linear_size), 256); - g_upload_transfer_buffer.unmap(); - g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); - - // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem - const auto block_size = op.element_size * op.block_length; - - if (op.require_swap) - { - mem_layout.swap_bytes = false; - - switch (op.element_size) - { - case 1: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 2: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 4: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - default: - fmt::throw_exception("Unimplemented element size deswizzle"); - } - } - else - { - switch (op.element_size) - { - case 1: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 2: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 4: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - default: - fmt::throw_exception("Unimplemented element size deswizzle"); - } - } - - // Barrier - g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast(image_linear_size)); - } - - // 3. Update configuration - mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes; - mem_info.image_size_in_bytes = image_linear_size; - mem_info.memory_required = 0; - - // 4. Dispatch compute routines - copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info); - - // Barrier - g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast(image_linear_size)); } else { - unpack_settings.swap_bytes(op.require_swap); - dst->copy_from(out_pointer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); + switch (op.element_size) + { + case 1: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 2: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); + } } + + // Barrier + g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast(image_linear_size)); } + + // 3. Update configuration + mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes; + mem_info.image_size_in_bytes = image_linear_size; + mem_info.memory_required = 0; + + // 4. Dispatch compute routines + copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info); + + // Barrier + g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast(image_linear_size)); } } @@ -1019,7 +1066,7 @@ namespace gl skip_transform = (pack_info.format == unpack_info.format && pack_info.type == unpack_info.type && pack_info.swap_bytes == unpack_info.swap_bytes && - pack_info.size == unpack_info.size); + pack_info.block_size == unpack_info.block_size); } if (skip_transform) [[likely]] @@ -1100,7 +1147,7 @@ namespace gl if (src->aspect() & image_aspect::depth) { // Source is depth, modify unpack rule - if (pack_info.size == 4 && unpack_info.size == 4) + if (pack_info.block_size == 4 && unpack_info.block_size == 4) { unpack_info.swap_bytes = !unpack_info.swap_bytes; } @@ -1108,7 +1155,7 @@ namespace gl else { // Dest is depth, modify pack rule - if (pack_info.size == 4 && unpack_info.size == 4) + if (pack_info.block_size == 4 && unpack_info.block_size == 4) { pack_info.swap_bytes = !pack_info.swap_bytes; } @@ -1118,9 +1165,7 @@ namespace gl // Start pack operation pixel_pack_settings pack_settings{}; pack_settings.swap_bytes(pack_info.swap_bytes); - - g_typeless_transfer_buffer.get().bind(buffer::target::pixel_pack); - src->copy_to(nullptr, static_cast(pack_info.format), static_cast(pack_info.type), 0, src_region, pack_settings); + src->copy_to(g_typeless_transfer_buffer.get(), 0, static_cast(pack_info.format), static_cast(pack_info.type), 0, src_region, pack_settings); glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); @@ -1128,8 +1173,7 @@ namespace gl pixel_unpack_settings unpack_settings{}; unpack_settings.swap_bytes(unpack_info.swap_bytes); - g_typeless_transfer_buffer.get().bind(buffer::target::pixel_unpack); - dst->copy_from(nullptr, static_cast(unpack_info.format), static_cast(unpack_info.type), 0, dst_region, unpack_settings); + dst->copy_from(g_typeless_transfer_buffer.get(), 0, static_cast(unpack_info.format), static_cast(unpack_info.type), 0, dst_region, unpack_settings); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE); } } diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index dc6d90098a..6d0bbb18a3 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -14,18 +14,20 @@ namespace gl { struct pixel_buffer_layout { - GLenum format; - GLenum type; - u8 size; - bool swap_bytes; - u8 alignment; + GLenum format = GL_RGBA; + GLenum type = GL_UNSIGNED_BYTE; + u32 row_length = 0; + u8 block_size = 0; + bool swap_bytes = false; + u8 alignment = 0; + u8 reserved; }; struct image_memory_requirements { - u64 image_size_in_texels; - u64 image_size_in_bytes; - u64 memory_required; + u64 image_size_in_texels = 0; + u64 image_size_in_bytes = 0; + u64 memory_required = 0; }; struct clear_cmd_info @@ -62,6 +64,8 @@ namespace gl std::tuple get_format_type(u32 texture_format); pixel_buffer_layout get_format_type(texture::internal_format format); std::array get_swizzle_remap(u32 texture_format); + GLenum get_compatible_snorm_format(GLenum base_format); + GLenum get_compatible_srgb_format(GLenum base_format); viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type); @@ -84,5 +88,6 @@ namespace gl extern std::unique_ptr g_vis_texture; } + void init_global_texture_resources(); void destroy_global_texture_resources(); } diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp index c2b0e3c252..f8aa9d95df 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp @@ -152,6 +152,8 @@ namespace gl dst = data.get(); dst->properties_encoding = match_key; m_temporary_surfaces.emplace_back(std::move(data)); + + dst->set_name(fmt::format("[Temp View] id=%u, fmt=0x%x", dst->id(), gcm_format)); } dst->add_ref(); @@ -179,6 +181,10 @@ namespace gl auto components = get_component_mapping(gcm_format, rsx::component_order::default_); dst->set_native_component_layout(components); } + else + { + dst->set_native_component_layout(src->get_native_component_layout()); + } return dst->get_view(remap); } diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index ae3b8e37e9..93c0ba2f5c 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -48,7 +48,7 @@ namespace gl void init_buffer(const gl::texture* src) { - const u32 vram_size = src->pitch() * src->height(); + const u32 vram_size = std::max(src->pitch() * src->height(), get_section_size()); const u32 buffer_size = utils::align(vram_size, 4096); if (pbo) @@ -148,7 +148,7 @@ namespace gl } } - void dma_transfer(gl::command_context& cmd, gl::texture* src, const areai& /*src_area*/, const utils::address_range32& /*valid_range*/, u32 pitch) + void dma_transfer(gl::command_context& cmd, gl::texture* src, const areai& src_area, const utils::address_range32& valid_range, u32 pitch) { init_buffer(src); glGetError(); @@ -165,6 +165,20 @@ namespace gl real_pitch = src->pitch(); rsx_pitch = pitch; + const coord3u src_rgn = + { + { static_cast(src_area.x1), static_cast(src_area.y1), 0 }, + { static_cast(src_area.width()), static_cast(src_area.height()), 1 } + }; + + u32 pbo_offset = 0; + if (valid_range.valid()) + { + const u32 section_base = get_section_base(); + pbo_offset = valid_range.start - section_base; + ensure(valid_range.start >= section_base && pbo_offset <= pbo.size()); + } + bool use_driver_pixel_transform = true; if (get_driver_caps().ARB_compute_shader_supported) [[likely]] { @@ -180,11 +194,12 @@ namespace gl pack_info.format = static_cast(format); pack_info.type = static_cast(type); - pack_info.size = (src->aspect() & image_aspect::stencil) ? 4 : 2; + pack_info.block_size = (src->aspect() & image_aspect::stencil) ? 4 : 2; pack_info.swap_bytes = true; + pack_info.row_length = rsx_pitch / pack_info.block_size; - mem_info.image_size_in_texels = src->width() * src->height(); - mem_info.image_size_in_bytes = src->pitch() * src->height(); + mem_info.image_size_in_texels = pack_info.row_length * src_area.height(); + mem_info.image_size_in_bytes = rsx_pitch * src_area.height(); mem_info.memory_required = 0; if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) @@ -193,14 +208,16 @@ namespace gl mem_info.image_size_in_bytes *= 2; } - void* out_offset = copy_image_to_buffer(cmd, pack_info, src, &scratch_mem, 0, 0, { {}, src->size3D() }, &mem_info); + void* out_offset = copy_image_to_buffer(cmd, pack_info, src, &scratch_mem, 0, 0, src_rgn, &mem_info); + real_pitch = rsx_pitch; glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - real_pitch = pack_info.size * src->width(); - const u64 data_length = pack_info.size * mem_info.image_size_in_texels; - scratch_mem.copy_to(&pbo, reinterpret_cast(out_offset), 0, data_length); + const u64 data_length = mem_info.image_size_in_bytes - rsx_pitch + (src_area.width() * pack_info.block_size); + ensure(data_length + pbo_offset <= static_cast(pbo.size()), "Memory allocation cannot fit image contents. Report to developers."); + + scratch_mem.copy_to(&pbo, reinterpret_cast(out_offset), pbo_offset, data_length); } else { @@ -219,13 +236,16 @@ namespace gl pack_unpack_swap_bytes = false; } - pbo.bind(buffer::target::pixel_pack); + const auto bpp = src->pitch() / src->width(); + real_pitch = rsx_pitch; + ensure((real_pitch % bpp) == 0); pixel_pack_settings pack_settings; pack_settings.alignment(1); pack_settings.swap_bytes(pack_unpack_swap_bytes); + pack_settings.row_length(rsx_pitch / bpp); - src->copy_to(nullptr, format, type, pack_settings); + src->copy_to(pbo, pbo_offset, format, type, 0, src_rgn, pack_settings); } if (auto error = glGetError()) @@ -266,6 +286,8 @@ namespace gl gl::texture* target_texture = vram_texture; u32 transfer_width = width; u32 transfer_height = height; + u32 transfer_x = 0, transfer_y = 0; + u16 resolution_scale_percent = 100; if (context == rsx::texture_upload_context::framebuffer_storage) { @@ -274,9 +296,10 @@ namespace gl target_texture = surface->get_surface(rsx::surface_access::transfer_read); transfer_width *= surface->samples_x; transfer_height *= surface->samples_y; + resolution_scale_percent = surface->resolution_scaling_config.scale_percent; } - if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) || + if ((resolution_scale_percent != 100 && context == rsx::texture_upload_context::framebuffer_storage) || (vram_texture->pitch() != rsx_pitch)) { areai src_area = { 0, 0, 0, 0 }; @@ -311,7 +334,35 @@ namespace gl } } - dma_transfer(cmd, target_texture, {}, {}, rsx_pitch); + const auto valid_range = get_confirmed_range(); + if (const auto section_range = get_section_range(); section_range != valid_range) + { + if (const auto offset = (valid_range.start - get_section_base())) + { + transfer_y = offset / rsx_pitch; + transfer_x = (offset % rsx_pitch) / rsx::get_format_block_size_in_bytes(gcm_format); + + ensure(transfer_width >= transfer_x); + ensure(transfer_height >= transfer_y); + transfer_width -= transfer_x; + transfer_height -= transfer_y; + } + + if (const auto tail = (section_range.end - valid_range.end)) + { + const auto row_count = tail / rsx_pitch; + + ensure(transfer_height >= row_count); + transfer_height -= row_count; + } + } + + areai src_area; + src_area.x1 = static_cast(transfer_x); + src_area.y1 = static_cast(transfer_y); + src_area.x2 = s32(transfer_x + transfer_width); + src_area.y2 = s32(transfer_y + transfer_height); + dma_transfer(cmd, target_texture, src_area, valid_range, rsx_pitch); } /** @@ -427,9 +478,7 @@ namespace gl using gl::viewable_image::viewable_image; }; - blitter m_hw_blitter; std::vector> m_temporary_surfaces; - const u32 max_cached_image_pool_size = 256; private: @@ -537,7 +586,8 @@ namespace gl gl::texture_view* generate_cubemap_from_images(gl::command_context& cmd, u32 gcm_format, u16 size, const rsx::simple_array& sources, const rsx::texture_channel_remap_t& remap_vector) override { auto _template = get_template_from_collection_impl(sources); - auto result = create_temporary_subresource_impl(cmd, _template, GL_NONE, GL_TEXTURE_CUBE_MAP, gcm_format, 0, 0, size, size, 1, 1, remap_vector, false); + const u8 mip_count = 1 + sources.reduce(0, FN(std::max(x, y.level))); + auto result = create_temporary_subresource_impl(cmd, _template, GL_NONE, GL_TEXTURE_CUBE_MAP, gcm_format, 0, 0, size, size, 1, mip_count, remap_vector, false); copy_transfer_regions_impl(cmd, result->image(), sources); return result; @@ -733,6 +783,7 @@ namespace gl gl::upload_texture(cmd, section->get_raw_texture(), gcm_format, input_swizzled, subresource_layout); + section->get_raw_texture()->set_name(fmt::format("Raw Texture @0x%x", rsx_range.start)); section->last_write_tag = rsx::get_shared_tag(); return section; } @@ -810,16 +861,11 @@ namespace gl using baseclass::texture_cache; void initialize() - { - m_hw_blitter.init(); - g_hw_blitter = &m_hw_blitter; - } + {} void destroy() override { clear(); - g_hw_blitter = nullptr; - m_hw_blitter.destroy(); } bool is_depth_texture(u32 rsx_address, u32 rsx_size) override @@ -865,7 +911,7 @@ namespace gl bool blit(gl::command_context& cmd, const rsx::blit_src_info& src, const rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts) { - auto result = upload_scaled_image(src, dst, linear_interpolate, cmd, m_rtts, m_hw_blitter); + auto result = upload_scaled_image(src, dst, linear_interpolate, cmd, m_rtts, *g_hw_blitter); if (result.succeeded) { diff --git a/rpcs3/Emu/RSX/GL/glutils/blitter.cpp b/rpcs3/Emu/RSX/GL/glutils/blitter.cpp index 57998b761d..47c7d7b1ca 100644 --- a/rpcs3/Emu/RSX/GL/glutils/blitter.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/blitter.cpp @@ -8,6 +8,18 @@ namespace gl { blitter* g_hw_blitter = nullptr; + void blitter::init() + { + blit_src.create(); + blit_dst.create(); + } + + void blitter::destroy() + { + blit_dst.remove(); + blit_src.remove(); + } + void blitter::copy_image(gl::command_context&, const texture* src, const texture* dst, int src_level, int dst_level, const position3i& src_offset, const position3i& dst_offset, const size3i& size) const { ensure(src_level == 0); @@ -147,6 +159,9 @@ namespace gl gl::fbo::attachment dst_att{ blit_dst, static_cast(attachment) }; dst_att = *real_dst; + blit_src.check(); + blit_dst.check(); + blit_src.blit(blit_dst, src_rect, dst_rect, target, interp); // Release the attachments explicitly (not doing so causes glitches, e.g Journey Menu) diff --git a/rpcs3/Emu/RSX/GL/glutils/blitter.h b/rpcs3/Emu/RSX/GL/glutils/blitter.h index d7adc1dd14..d56754fcae 100644 --- a/rpcs3/Emu/RSX/GL/glutils/blitter.h +++ b/rpcs3/Emu/RSX/GL/glutils/blitter.h @@ -30,17 +30,9 @@ namespace gl public: - void init() - { - blit_src.create(); - blit_dst.create(); - } + void init(); - void destroy() - { - blit_dst.remove(); - blit_src.remove(); - } + void destroy(); void scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, const rsx::typeless_xfer& xfer_info); diff --git a/rpcs3/Emu/RSX/GL/glutils/buffer_object.cpp b/rpcs3/Emu/RSX/GL/glutils/buffer_object.cpp index 5c1f0d0447..f3849718d0 100644 --- a/rpcs3/Emu/RSX/GL/glutils/buffer_object.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/buffer_object.cpp @@ -131,6 +131,11 @@ namespace gl DSA_CALL2(NamedBufferSubData, m_id, offset, length, data); } + void buffer::fill(GLsizeiptr offset, GLsizeiptr length, GLuint pattern) + { + DSA_CALL2(ClearNamedBufferSubData, m_id, GL_R32UI, offset, length, GL_RED, GL_UNSIGNED_INT, &pattern); + } + GLubyte* buffer::map(GLsizeiptr offset, GLsizeiptr length, access access_) { ensure(m_memory_type == memory_type::host_visible); diff --git a/rpcs3/Emu/RSX/GL/glutils/buffer_object.h b/rpcs3/Emu/RSX/GL/glutils/buffer_object.h index dccb2a314e..28730bdd0d 100644 --- a/rpcs3/Emu/RSX/GL/glutils/buffer_object.h +++ b/rpcs3/Emu/RSX/GL/glutils/buffer_object.h @@ -15,7 +15,9 @@ namespace gl element_array = GL_ELEMENT_ARRAY_BUFFER, uniform = GL_UNIFORM_BUFFER, texture = GL_TEXTURE_BUFFER, - ssbo = GL_SHADER_STORAGE_BUFFER + ssbo = GL_SHADER_STORAGE_BUFFER, + copy_src = GL_COPY_READ_BUFFER, + copy_dst = GL_COPY_WRITE_BUFFER }; enum class access @@ -65,6 +67,8 @@ namespace gl case target::uniform: pname = GL_UNIFORM_BUFFER_BINDING; break; case target::texture: pname = GL_TEXTURE_BUFFER_BINDING; break; case target::ssbo: pname = GL_SHADER_STORAGE_BUFFER_BINDING; break; + case target::copy_src: pname = GL_COPY_READ_BUFFER_BINDING; break; + case target::copy_dst: pname = GL_COPY_WRITE_BUFFER_BINDING; break; default: fmt::throw_exception("Invalid binding state target (0x%x)", static_cast(target_)); } @@ -113,6 +117,7 @@ namespace gl void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW); void sub_data(GLsizeiptr offset, GLsizeiptr length, const GLvoid* data); + void fill(GLsizeiptr offset, GLsizeiptr length, GLuint pattern); GLubyte* map(GLsizeiptr offset, GLsizeiptr length, access access_); void unmap(); diff --git a/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp b/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp index d6a51beb09..a6999d6f9c 100644 --- a/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/capabilities.cpp @@ -2,6 +2,7 @@ #include "capabilities.h" #include "Utilities/StrUtil.h" +#include "Emu/system_config.h" #include @@ -43,6 +44,8 @@ namespace gl all_extensions.emplace(reinterpret_cast(glGetStringi(GL_EXTENSIONS, i))); } + RENDERDOC_debug = !!g_cfg.video.renderdoc_compatiblity; + #define CHECK_EXTENSION_SUPPORT(extension_short_name)\ do {\ if (all_extensions.contains("GL_"#extension_short_name)) {\ diff --git a/rpcs3/Emu/RSX/GL/glutils/capabilities.h b/rpcs3/Emu/RSX/GL/glutils/capabilities.h index 756250430b..cfe104ffbe 100644 --- a/rpcs3/Emu/RSX/GL/glutils/capabilities.h +++ b/rpcs3/Emu/RSX/GL/glutils/capabilities.h @@ -23,6 +23,7 @@ namespace gl bool initialized = false; version_info glsl_version; + bool RENDERDOC_debug = false; bool EXT_direct_state_access_supported = false; bool EXT_depth_bounds_test_supported = false; bool AMD_pinned_memory_supported = false; diff --git a/rpcs3/Emu/RSX/GL/glutils/common.h b/rpcs3/Emu/RSX/GL/glutils/common.h index f99c3590a6..24e62a9189 100644 --- a/rpcs3/Emu/RSX/GL/glutils/common.h +++ b/rpcs3/Emu/RSX/GL/glutils/common.h @@ -76,10 +76,30 @@ namespace gl } }; - // Very useful util when capturing traces with RenderDoc - static inline void push_debug_label(const char* label) + template + struct named_object { - glInsertEventMarkerEXT(static_cast(strlen(label)), label); + protected: + GLuint m_id = GL_NONE; + std::string m_name = "Unnamed"; + + public: + void set_name(std::string_view name) + { + m_name = name.data(); + glObjectLabel(Ns, m_id, static_cast(name.length()), name.data()); + } + + std::string_view name() const + { + return m_name; + } + }; + + // Very useful util when capturing traces with RenderDoc + static inline void push_debug_label(std::string_view label) + { + glInsertEventMarkerEXT(static_cast(label.size()), label.data()); } // Checks if GL state is still valid diff --git a/rpcs3/Emu/RSX/GL/glutils/image.cpp b/rpcs3/Emu/RSX/GL/glutils/image.cpp index bb6439cc05..e2f02afdfa 100644 --- a/rpcs3/Emu/RSX/GL/glutils/image.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/image.cpp @@ -19,6 +19,54 @@ namespace gl } } + static const char* gl_type_to_str(texture::type type) + { + switch (type) + { + case texture::type::ubyte: return "GL_UNSIGNED_BYTE"; + case texture::type::ushort: return "GL_UNSIGNED_SHORT"; + case texture::type::uint: return "GL_UNSIGNED_INT"; + case texture::type::ubyte_3_3_2: return "GL_UNSIGNED_BYTE_3_3_2"; + case texture::type::ubyte_2_3_3_rev: return "GL_UNSIGNED_BYTE_2_3_3_REV"; + case texture::type::ushort_5_6_5: return "GL_UNSIGNED_SHORT_5_6_5"; + case texture::type::ushort_5_6_5_rev: return "GL_UNSIGNED_SHORT_5_6_5_REV"; + case texture::type::ushort_4_4_4_4: return "GL_UNSIGNED_SHORT_4_4_4_4"; + case texture::type::ushort_4_4_4_4_rev: return "GL_UNSIGNED_SHORT_4_4_4_4_REV"; + case texture::type::ushort_5_5_5_1: return "GL_UNSIGNED_SHORT_5_5_5_1"; + case texture::type::ushort_1_5_5_5_rev: return "GL_UNSIGNED_SHORT_1_5_5_5_REV"; + case texture::type::uint_8_8_8_8: return "GL_UNSIGNED_INT_8_8_8_8"; + case texture::type::uint_8_8_8_8_rev: return "GL_UNSIGNED_INT_8_8_8_8_REV"; + case texture::type::uint_10_10_10_2: return "GL_UNSIGNED_INT_10_10_10_2"; + case texture::type::uint_2_10_10_10_rev: return "GL_UNSIGNED_INT_2_10_10_10_REV"; + case texture::type::uint_24_8: return "GL_UNSIGNED_INT_24_8"; + case texture::type::float32_uint8: return "GL_FLOAT_32_UNSIGNED_INT_24_8_REV"; + case texture::type::sbyte: return "GL_BYTE"; + case texture::type::sshort: return "GL_SHORT"; + case texture::type::sint: return "GL_INT"; + case texture::type::f16: return "GL_HALF_FLOAT"; + case texture::type::f32: return "GL_FLOAT"; + case texture::type::f64: return "GL_DOUBLE"; + default: return "UNKNOWN"; + } + } + + static const char* gl_format_to_str(texture::format format) + { + switch (format) + { + case texture::format::r: return "GL_RED"; + case texture::format::rg: return "GL_RG"; + case texture::format::rgb: return "GL_RGB"; + case texture::format::rgba: return "GL_RGBA"; + case texture::format::bgr: return "GL_BGR"; + case texture::format::bgra: return "GL_BGRA"; + case texture::format::stencil: return "GL_STENCIL_INDEX"; + case texture::format::depth: return "GL_DEPTH_COMPONENT"; + case texture::format::depth_stencil: return "GL_DEPTH_STENCIL"; + default: return "UNKNOWN"; + } + } + texture::texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLubyte samples, GLenum sized_format, rsx::format_class format_class) { // Upgrade targets for MSAA @@ -175,7 +223,7 @@ namespace gl m_id = GL_NONE; } - void texture::copy_from(const void* src, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings) + void texture::copy_from(const rsx::io_buffer& src, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings) { ensure(m_samples <= 1, "Transfer operations are unsupported on multisampled textures."); @@ -185,30 +233,30 @@ namespace gl { case GL_TEXTURE_1D: { - DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, level, region.x, region.width, static_cast(format), static_cast(type), src); + DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, level, region.x, region.width, static_cast(format), static_cast(type), src.data()); break; } case GL_TEXTURE_2D: { - DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, level, region.x, region.y, region.width, region.height, static_cast(format), static_cast(type), src); + DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, level, region.x, region.y, region.width, region.height, static_cast(format), static_cast(type), src.data()); break; } case GL_TEXTURE_3D: case GL_TEXTURE_2D_ARRAY: { - DSA_CALL(TextureSubImage3D, m_id, target_, level, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast(format), static_cast(type), src); + DSA_CALL(TextureSubImage3D, m_id, target_, level, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast(format), static_cast(type), src.data()); break; } case GL_TEXTURE_CUBE_MAP: { if (get_driver_caps().ARB_direct_state_access_supported) { - glTextureSubImage3D(m_id, level, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast(format), static_cast(type), src); + glTextureSubImage3D(m_id, level, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast(format), static_cast(type), src.data()); } else { rsx_log.warning("Cubemap upload via texture::copy_from is halfplemented!"); - auto ptr = static_cast(src); + auto ptr = static_cast(src.data()); const auto end = std::min(6u, region.z + region.depth); for (unsigned face = region.z; face < end; ++face) { @@ -221,40 +269,51 @@ namespace gl } } - void texture::copy_from(buffer& buf, u32 gl_format_type, u32 offset, u32 length) + void texture::copy_from(buffer& buf, GLsizeiptr offset, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings) { ensure(m_samples <= 1, "Transfer operations are unsupported on multisampled textures."); - if (get_target() != target::textureBuffer) - fmt::throw_exception("OpenGL error: texture cannot copy from buffer"); + buf.bind(buffer::target::pixel_unpack); - DSA_CALL(TextureBufferRange, m_id, GL_TEXTURE_BUFFER, gl_format_type, buf.id(), offset, length); + const rsx::io_buffer src{ reinterpret_cast(static_cast(offset)), buf.size() - offset }; + copy_from(src, format, type, level, region, pixel_settings); } void texture::copy_from(buffer_view& view) { - copy_from(*view.value(), view.format(), view.offset(), view.range()); + if (get_target() != target::textureBuffer) + fmt::throw_exception("OpenGL error: texture cannot copy from buffer"); + + DSA_CALL(TextureBufferRange, m_id, GL_TEXTURE_BUFFER, view.format(), view.value()->id(), view.offset(), view.range()); } - void texture::copy_to(void* dst, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const + void texture::copy_to(const rsx::io_buffer& dst, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const { ensure(m_samples <= 1, "Transfer operations are unsupported on multisampled textures."); pixel_settings.apply(); const auto& caps = get_driver_caps(); + if (caps.RENDERDOC_debug) + { + const auto msg = fmt::format("glGetTextureSubImage('[%u] %s', %u, %u, %u, %u, %u, %u, %u, %s, %s, %d, %p)", + m_id, m_name.c_str(), level, region.x, region.y, region.z, region.width, region.height, region.depth, + gl_format_to_str(format), gl_type_to_str(type), s32{ smax }, dst.data()); + push_debug_label(msg); + } + if (!region.x && !region.y && !region.z && region.width == m_width && region.height == m_height && region.depth == m_depth) { if (caps.ARB_direct_state_access_supported) - glGetTextureImage(m_id, level, static_cast(format), static_cast(type), s32{ smax }, dst); + glGetTextureImage(m_id, level, static_cast(format), static_cast(type), dst.size(), dst.data()); else - glGetTextureImageEXT(m_id, static_cast(m_target), level, static_cast(format), static_cast(type), dst); + glGetTextureImageEXT(m_id, static_cast(m_target), level, static_cast(format), static_cast(type), dst.data()); } else if (caps.ARB_direct_state_access_supported) { glGetTextureSubImage(m_id, level, region.x, region.y, region.z, region.width, region.height, region.depth, - static_cast(format), static_cast(type), s32{ smax }, dst); + static_cast(format), static_cast(type), s32{ smax }, dst.data()); } else { @@ -269,6 +328,16 @@ namespace gl } } + void texture::copy_to(buffer& buf, GLsizeiptr offset, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const + { + ensure(offset < buf.size(), "PBO write is out of range"); + + buf.bind(buffer::target::pixel_pack); + + const rsx::io_buffer dst{ reinterpret_cast(static_cast(offset)), buf.size() - offset }; + copy_to(dst, format, type, level, region, pixel_settings); + } + void texture_view::create(texture* data, GLenum target, GLenum sized_format, const subresource_range& range, const GLenum* argb_swizzle) { m_target = target; @@ -318,6 +387,34 @@ namespace gl } } + texture_view* texture_view::as(GLenum format) + { + if (format == this->m_view_format) + { + return this; + } + + auto self = m_root_view ? m_root_view : this; + if (auto found = self->m_subviews.find(format); + found != self->m_subviews.end()) + { + return found->second.get(); + } + + GLenum swizzle_argb[4] = + { + component_swizzle[3], + component_swizzle[0], + component_swizzle[1], + component_swizzle[2], + }; + + auto view = std::make_unique(m_image_data, m_target, format, swizzle_argb, m_aspect_flags); + auto ret = view.get(); + self->m_subviews.emplace(format, std::move(view)); + return ret; + } + void texture_view::bind(gl::command_context& cmd, GLuint layer) const { cmd->bind_texture(layer, m_target, m_id); @@ -369,6 +466,8 @@ namespace gl auto view = std::make_unique(this, swizzle, aspect_flags); auto result = view.get(); views.emplace(key, std::move(view)); + + result->set_name(fmt::format("%s, remap=%x", name(), remap.encoded)); return result; } diff --git a/rpcs3/Emu/RSX/GL/glutils/image.h b/rpcs3/Emu/RSX/GL/glutils/image.h index 6617caa54c..bd974c226a 100644 --- a/rpcs3/Emu/RSX/GL/glutils/image.h +++ b/rpcs3/Emu/RSX/GL/glutils/image.h @@ -4,6 +4,7 @@ #include "Utilities/geometry.h" #include "Emu/RSX/Common/TextureUtils.h" +#include "Emu/RSX/Common/io_buffer.h" //using enum rsx::format_class; using namespace ::rsx::format_class_; @@ -58,7 +59,7 @@ namespace gl GLuint num_layers; }; - class texture + class texture : public named_object { friend class texture_view; @@ -180,7 +181,6 @@ namespace gl }; protected: - GLuint m_id = GL_NONE; GLuint m_width = 0; GLuint m_height = 0; GLuint m_depth = 0; @@ -321,39 +321,43 @@ namespace gl } // Data management - void copy_from(const void* src, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings); + void copy_from(const rsx::io_buffer& src, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings); - void copy_from(buffer& buf, u32 gl_format_type, u32 offset, u32 length); + void copy_from(buffer& buf, GLsizeiptr offset, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings); void copy_from(buffer_view& view); - void copy_to(void* dst, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const; + void copy_to(const rsx::io_buffer& dst, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const; + + void copy_to(buffer& buf, GLsizeiptr offset, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const; // Convenience wrappers - void copy_from(const void* src, texture::format format, texture::type type, const pixel_unpack_settings& pixel_settings) + void copy_from(const rsx::io_buffer& src, texture::format format, texture::type type, const pixel_unpack_settings& pixel_settings) { const coord3u region = { {}, size3D() }; copy_from(src, format, type, 0, region, pixel_settings); } - void copy_to(void* dst, texture::format format, texture::type type, const pixel_pack_settings& pixel_settings) const + void copy_to(const rsx::io_buffer& dst, texture::format format, texture::type type, const pixel_pack_settings& pixel_settings) const { const coord3u region = { {}, size3D() }; copy_to(dst, format, type, 0, region, pixel_settings); } }; - class texture_view + class texture_view : public named_object { protected: - GLuint m_id = GL_NONE; GLenum m_target = 0; GLenum m_format = 0; GLenum m_view_format = 0; GLenum m_aspect_flags = 0; texture* m_image_data = nullptr; - GLenum component_swizzle[4] {}; + GLenum component_swizzle[4]{}; + + std::unordered_map> m_subviews; + texture_view* m_root_view = nullptr; texture_view() = default; @@ -395,6 +399,8 @@ namespace gl virtual ~texture_view(); + texture_view* as(GLenum format); + GLuint id() const { return m_id; @@ -457,6 +463,7 @@ namespace gl class viewable_image : public texture { + protected: std::unordered_map> views; public: diff --git a/rpcs3/Emu/RSX/GL/glutils/sampler.cpp b/rpcs3/Emu/RSX/GL/glutils/sampler.cpp index 580caf0dc2..4b1b603fc6 100644 --- a/rpcs3/Emu/RSX/GL/glutils/sampler.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/sampler.cpp @@ -72,7 +72,7 @@ namespace gl } // Apply sampler state settings - void sampler_state::apply(const rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image) + void sampler_state::apply(const rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image, bool allow_mipmaps) { set_parameteri(GL_TEXTURE_WRAP_S, wrap_mode(tex.wrap_s())); set_parameteri(GL_TEXTURE_WRAP_T, wrap_mode(tex.wrap_t())); @@ -82,17 +82,39 @@ namespace gl { // NOTE: In OpenGL, the border texels are processed by the pipeline and will be swizzled by the texture view. // Therefore, we pass the raw value here, and the texture view will handle the rest for us. - const auto encoded_color = tex.border_color(); - if (get_parameteri(GL_TEXTURE_BORDER_COLOR) != encoded_color) + const bool sext_conv_required = (sampled_image->format_ex.texel_remap_control & rsx::SEXT_MASK) != 0; + const auto encoded_color = tex.border_color(sext_conv_required); + const auto host_features = sampled_image->format_ex.host_features; + + if (get_parameteri(GL_TEXTURE_BORDER_COLOR) != encoded_color || + get_parameteri(GL_TEXTURE_BORDER_VALUES_NV) != host_features) { m_propertiesi[GL_TEXTURE_BORDER_COLOR] = encoded_color; - const auto border_color = rsx::decode_border_color(encoded_color); + m_propertiesi[GL_TEXTURE_BORDER_VALUES_NV] = host_features; + + auto border_color = rsx::decode_border_color(encoded_color); + if (sampled_image->format_ex.host_snorm_format_active()) [[ unlikely ]] + { + // Hardware SNORM is active + // Convert the border color in host space (2N - 1) + // HW does the conversion in integer space as (x - 128) / 127 which introduces a biasing error. + const float bias_v = 128.f / 255.f; + const float scale_v = 255.f / 127.f; + + color4f scale{ 1.f }, bias{ 0.f }; + const auto snorm_mask = tex.argb_signed(); + if (snorm_mask & 1) { scale.a = scale_v; bias.a = -bias_v; } + if (snorm_mask & 2) { scale.r = scale_v; bias.r = -bias_v; } + if (snorm_mask & 4) { scale.g = scale_v; bias.g = -bias_v; } + if (snorm_mask & 8) { scale.b = scale_v; bias.b = -bias_v; } + border_color = (border_color + bias) * scale; + } + glSamplerParameterfv(sampler_handle, GL_TEXTURE_BORDER_COLOR, border_color.rgba); } } - if (sampled_image->upload_context != rsx::texture_upload_context::shader_read || - tex.get_exact_mipmap_count() == 1) + if (!allow_mipmaps || tex.get_exact_mipmap_count() == 1) { GLint min_filter = tex_min_filter(tex.min_filter()); diff --git a/rpcs3/Emu/RSX/GL/glutils/sampler.h b/rpcs3/Emu/RSX/GL/glutils/sampler.h index 89200915f8..8e8482f196 100644 --- a/rpcs3/Emu/RSX/GL/glutils/sampler.h +++ b/rpcs3/Emu/RSX/GL/glutils/sampler.h @@ -75,7 +75,7 @@ namespace gl return (prop == m_propertiesf.end()) ? 0 : prop->second; } - void apply(const rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image); + void apply(const rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image, bool allow_mipmaps = true); void apply(const rsx::vertex_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image); void apply_defaults(GLenum default_filter = GL_NEAREST); diff --git a/rpcs3/Emu/RSX/GL/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/GL/upscalers/fsr1/fsr_pass.cpp index cfd7b1cc08..75e2d3f3db 100644 --- a/rpcs3/Emu/RSX/GL/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/GL/upscalers/fsr1/fsr_pass.cpp @@ -152,7 +152,7 @@ namespace gl void rcas_pass::configure() { // 0 is actually the sharpest with 2 being the chosen limit. Each progressive unit 'halves' the sharpening intensity. - auto cas_attenuation = 2.f - (g_cfg.video.vk.rcas_sharpening_intensity / 50.f); + auto cas_attenuation = 2.f - (g_cfg.video.rcas_sharpening_intensity / 50.f); FsrRcasCon(&m_constants_buf[0], cas_attenuation); } } diff --git a/rpcs3/Emu/RSX/GSRender.cpp b/rpcs3/Emu/RSX/GSRender.cpp index e9e859952e..f09f4704da 100644 --- a/rpcs3/Emu/RSX/GSRender.cpp +++ b/rpcs3/Emu/RSX/GSRender.cpp @@ -13,6 +13,8 @@ GSRender::GSRender(utils::serial* ar) noexcept : rsx::thread(ar) { m_frame = nullptr; } + + m_vsync_mode = g_cfg.video.vsync; } GSRender::~GSRender() diff --git a/rpcs3/Emu/RSX/GSRender.h b/rpcs3/Emu/RSX/GSRender.h index d2a6fd9c5f..f597b5562b 100644 --- a/rpcs3/Emu/RSX/GSRender.h +++ b/rpcs3/Emu/RSX/GSRender.h @@ -23,6 +23,8 @@ protected: draw_context_t m_context = nullptr; bool m_continuous_mode = false; + vsync_mode m_vsync_mode{}; + public: ~GSRender() override; diff --git a/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp b/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp index 0fe726d417..7b2591f544 100644 --- a/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp +++ b/rpcs3/Emu/RSX/NV47/FW/draw_call.hpp @@ -33,7 +33,7 @@ namespace rsx u32 draw_command_barrier_mask = 0; // Draw-time iterator to the draw_command_barriers struct - mutable rsx::simple_array::iterator current_barrier_it; + mutable rsx::simple_array::iterator current_barrier_it {}; // Subranges memory cache mutable rsx::simple_array subranges_store; diff --git a/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp b/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp index fcd30c45f8..d929caa488 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv0039.cpp @@ -14,7 +14,7 @@ namespace rsx namespace nv0039 { // Transfer with stride - inline void block2d_copy_with_stride(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch, u8 src_stride, u8 dst_stride) + inline void block2d_copy_with_stride(u8* dst, const u8* src, u32 width, u32 height, s32 src_pitch, s32 dst_pitch, u8 src_stride, u8 dst_stride) { for (u32 row = 0; row < height; ++row) { @@ -33,7 +33,7 @@ namespace rsx } } - inline void block2d_copy(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch) + inline void block2d_copy(u8* dst, const u8* src, u32 width, u32 height, s32 src_pitch, s32 dst_pitch) { for (u32 i = 0; i < height; ++i) { diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 17ee040f8c..3cc40efed1 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -306,7 +306,7 @@ namespace rsx REGS(ctx)->decode(reg, REGS(ctx)->latch); } - void set_aa_control(context* ctx, u32 reg, u32 arg) + void set_aa_control(context* ctx, u32 /*reg*/, u32 arg) { const auto latch = REGS(ctx)->latch; if (arg == latch) @@ -633,9 +633,17 @@ namespace rsx case 2: break; default: - rsx_log.error("Unknown render mode %d", mode); + { + struct logged_t + { + atomic_t logged_cause[256]{}; + }; + + const auto& is_error = ::at32(g_fxo->get().logged_cause, mode).try_inc(10); + (is_error ? rsx_log.error : rsx_log.trace)("Unknown render mode %d", mode); return; } + } const u32 offset = arg & 0xffffff; auto address_ptr = util::get_report_data_impl(ctx, offset); diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_icons.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_icons.cpp new file mode 100644 index 0000000000..4ef9dbea9a --- /dev/null +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_icons.cpp @@ -0,0 +1,83 @@ +#include "stdafx.h" +#include "overlay_home_icons.h" + +#include "Emu/RSX/Overlays/overlay_controls.h" + +#include + +namespace rsx::overlays::home_menu +{ + std::unordered_map> g_icons_cache; + std::mutex g_icons_cache_lock; + + static const char* fa_icon_to_filename(fa_icon icon) + { + switch (icon) + { + default: + case fa_icon::none: + return ""; + case fa_icon::home: + return "home.png"; + case fa_icon::settings: + return "settings.png"; + case fa_icon::back: + return "circle-left-solid.png"; + case fa_icon::floppy: + return "floppy-disk-solid.png"; + case fa_icon::maximize: + return "maximize-solid.png"; + case fa_icon::play: + return "play-button-arrowhead.png"; + case fa_icon::poweroff: + return "power-off-solid.png"; + case fa_icon::restart: + return "rotate-left-solid.png"; + case fa_icon::screenshot: + return "screenshot.png"; + case fa_icon::video_camera: + return "video-camera.png"; + case fa_icon::friends: + return "user-group-solid.png"; + case fa_icon::trophy: + return "trophy-solid.png"; + case fa_icon::audio: + return "headphones-solid.png"; + case fa_icon::video: + return "display-solid.png"; + case fa_icon::gamepad: + return "gamepad-solid.png"; + case fa_icon::settings_sliders: + return "sliders-solid.png"; + case fa_icon::settings_gauge: + return "gauge-solid.png"; + case fa_icon::bug: + return "bug-solid.png"; + } + } + + void load_icon(fa_icon icon) + { + const std::string image_path = fmt::format("home/32/%s", fa_icon_to_filename(icon)); + g_icons_cache[icon] = rsx::overlays::resource_config::load_icon(image_path); + } + + const image_info* get_icon(fa_icon icon) + { + if (icon == fa_icon::none) + { + return nullptr; + } + + std::lock_guard lock(g_icons_cache_lock); + + auto found = g_icons_cache.find(icon); + if (found != g_icons_cache.end()) + { + return found->second.get(); + } + + load_icon(icon); + return g_icons_cache.at(icon).get(); + } +} diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_icons.h b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_icons.h new file mode 100644 index 0000000000..4b05b852e1 --- /dev/null +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_icons.h @@ -0,0 +1,32 @@ +#pragma once + +#include "Emu/RSX/Overlays/overlays.h" +#include "Emu/RSX/Overlays/overlay_controls.h" + +namespace rsx::overlays::home_menu +{ + enum class fa_icon + { + none = 0, + home, + settings, + back, + floppy, + maximize, + play, + poweroff, + restart, + screenshot, + video_camera, + trophy, + friends, + audio, + video, + gamepad, + settings_sliders, + settings_gauge, + bug, + }; + + const image_info* get_icon(fa_icon icon); +} diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu.cpp index 497ce12031..1dadaa5e7d 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu.cpp +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu.cpp @@ -21,7 +21,7 @@ namespace rsx m_allow_input_on_pause = true; m_dim_background.set_size(virtual_width, virtual_height); - m_dim_background.back_color.a = 0.5f; + m_dim_background.back_color.a = 0.85f; m_description.set_font("Arial", 20); m_description.set_pos(20, 37); @@ -56,6 +56,8 @@ namespace rsx m_time_display.auto_resize(); last_time = std::move(new_time); } + + m_main_menu.update(timestamp_us); } void home_menu_dialog::on_button_pressed(pad_button button_press, bool is_auto_repeat) @@ -88,6 +90,11 @@ namespace rsx std::string path = page->title; for (home_menu_page* parent = page->parent; parent; parent = parent->parent) { + if (parent->title.empty()) + { + break; + } + path = parent->title + " > " + path; } m_description.set_text(path); diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.cpp index d0ed9dc460..94e70f4c63 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.cpp +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.cpp @@ -5,61 +5,106 @@ namespace rsx { namespace overlays { - home_menu_entry::home_menu_entry(const std::string& text) + home_menu_entry::home_menu_entry(home_menu::fa_icon icon, const std::string& text, u16 width, text_align alignment) { - std::unique_ptr text_stack = std::make_unique(); - std::unique_ptr padding = std::make_unique(); - std::unique_ptr title = std::make_unique