Initial new PPU interpreter implementation

Replaced dangerous offset32 usages with safe OFFSET_OF macro
This commit is contained in:
DH 2025-04-24 13:41:04 +03:00
parent dcc965c2bb
commit 7115851c82
81 changed files with 13791 additions and 5398 deletions

View file

@ -1,7 +1,5 @@
#!/bin/sh -ex
cd rpcs3/ || exit 1
git config --global --add safe.directory '*'
# Pull all the submodules except llvm and opencv
@ -9,33 +7,30 @@ git config --global --add safe.directory '*'
git submodule -q update --init $(awk '/path/ && !/llvm/ && !/opencv/ { print $3 }' .gitmodules)
if [ "$COMPILER" = "gcc" ]; then
# These are set in the dockerfile
export CC="${GCC_BINARY}"
export CXX="${GXX_BINARY}"
export LINKER=gold
export CC=gcc-14
export CXX=g++-14
else
export CC="${CLANG_BINARY}"
export CXX="${CLANGXX_BINARY}"
export LINKER="${LLD_BINARY}"
export CC=clang
export CXX=clang++
export CFLAGS="$CFLAGS -fuse-ld=lld"
fi
export CFLAGS="$CFLAGS -fuse-ld=${LINKER}"
export CXXFLAGS="$CXXFLAGS -fuse-ld=${LINKER}"
cmake -B build \
-DCMAKE_INSTALL_PREFIX=/usr \
-DUSE_NATIVE_INSTRUCTIONS=OFF \
-DUSE_PRECOMPILED_HEADERS=OFF \
-DCMAKE_C_FLAGS="$CFLAGS" \
-DCMAKE_CXX_FLAGS="$CFLAGS" \
-DUSE_NATIVE_INSTRUCTIONS=OFF \
-DUSE_PRECOMPILED_HEADERS=OFF \
-DUSE_SYSTEM_CURL=ON \
-DUSE_SDL=ON \
-DUSE_SYSTEM_SDL=ON \
-DUSE_SDL=OFF \
-DUSE_SYSTEM_FFMPEG=OFF \
-DUSE_SYSTEM_CURL=OFF \
-DUSE_SYSTEM_OPENAL=OFF \
-DUSE_SYSTEM_FFMPEG=OFF \
-DUSE_SYSTEM_OPENCV=ON \
-DUSE_DISCORD_RPC=ON \
-DOpenGL_GL_PREFERENCE=LEGACY \
-DLLVM_DIR=/opt/llvm/lib/cmake/llvm \
-DSTATIC_LINK_LLVM=ON \
-DBUILD_LLVM=on \
-DWITH_RPCSX=off \
-DWITH_RPCS3=on \
-DWITH_RPCS3_QT_UI=on \

View file

@ -1,7 +1,5 @@
#!/bin/sh -ex
cd rpcs3/ || exit 1
git config --global --add safe.directory '*'
# Pull all the submodules except llvm and opencv
@ -11,40 +9,29 @@ git submodule -q update --init $(awk '/path/ && !/llvm/ && !/opencv/ { print $3
if [ "$COMPILER" = "gcc" ]; then
# These are set in the dockerfile
export CC="${GCC_BINARY}"
export CXX="${GXX_BINARY}"
export LINKER=gold
# We need to set the following variables for LTO to link properly
export AR=/usr/bin/gcc-ar-"$GCCVER"
export RANLIB=/usr/bin/gcc-ranlib-"$GCCVER"
export CFLAGS="-fuse-linker-plugin"
export CC=gcc-14
export CXX=g++-14
else
export CC="${CLANG_BINARY}"
export CXX="${CLANGXX_BINARY}"
export LINKER=lld
export AR=/usr/bin/llvm-ar-"$LLVMVER"
export RANLIB=/usr/bin/llvm-ranlib-"$LLVMVER"
export CC=clang
export CXX=clang++
export LD=clang
export CFLAGS="$CFLAGS -fuse-ld=lld"
fi
export CFLAGS="$CFLAGS -fuse-ld=${LINKER}"
cmake -B build \
-DCMAKE_INSTALL_PREFIX=/usr \
-DUSE_NATIVE_INSTRUCTIONS=OFF \
-DUSE_PRECOMPILED_HEADERS=OFF \
-DCMAKE_C_FLAGS="$CFLAGS" \
-DCMAKE_CXX_FLAGS="$CFLAGS" \
-DCMAKE_AR="$AR" \
-DCMAKE_RANLIB="$RANLIB" \
-DUSE_SYSTEM_CURL=ON \
-DUSE_SDL=ON \
-DUSE_SYSTEM_SDL=ON \
-DUSE_NATIVE_INSTRUCTIONS=OFF \
-DUSE_PRECOMPILED_HEADERS=OFF \
-DUSE_SDL=OFF \
-DUSE_SYSTEM_CURL=OFF \
-DUSE_SYSTEM_OPENAL=OFF \
-DUSE_SYSTEM_FFMPEG=OFF \
-DUSE_SYSTEM_OPENCV=ON \
-DUSE_DISCORD_RPC=ON \
-DOpenGL_GL_PREFERENCE=LEGACY \
-DLLVM_DIR=/opt/llvm/lib/cmake/llvm \
-DSTATIC_LINK_LLVM=ON \
-DBUILD_LLVM=on \
-DWITH_RPCSX=off \
-DWITH_RPCS3=on \
-DWITH_RPCS3_QT_UI=on \

View file

@ -7,10 +7,10 @@ CPU_ARCH="${1:-x86_64}"
if [ "$DEPLOY_APPIMAGE" = "true" ]; then
DESTDIR=AppDir ninja install
curl -fsSLo /usr/bin/linuxdeploy "https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-$CPU_ARCH.AppImage"
chmod +x /usr/bin/linuxdeploy
curl -fsSLo /usr/bin/linuxdeploy-plugin-qt "https://github.com/linuxdeploy/linuxdeploy-plugin-qt/releases/download/continuous/linuxdeploy-plugin-qt-$CPU_ARCH.AppImage"
chmod +x /usr/bin/linuxdeploy-plugin-qt
sudo curl -fsSLo /usr/bin/linuxdeploy "https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-$CPU_ARCH.AppImage"
sudo chmod a+x /usr/bin/linuxdeploy
sudo curl -fsSLo /usr/bin/linuxdeploy-plugin-qt "https://github.com/linuxdeploy/linuxdeploy-plugin-qt/releases/download/continuous/linuxdeploy-plugin-qt-$CPU_ARCH.AppImage"
sudo chmod a+x /usr/bin/linuxdeploy-plugin-qt
curl -fsSLo linuxdeploy-plugin-checkrt.sh https://github.com/darealshinji/linuxdeploy-plugin-checkrt/releases/download/continuous/linuxdeploy-plugin-checkrt.sh
chmod +x ./linuxdeploy-plugin-checkrt.sh

View file

@ -1,2 +1,3 @@
CompileFlags:
Add: [-Wall, -Wextra, -Wno-missing-designated-field-initializers]
Remove: [ -fno-lifetime-dse ]

View file

@ -29,16 +29,13 @@ jobs:
matrix:
include:
- os: ubuntu-24.04
docker_img: "rpcs3/rpcs3-ci-jammy:1.4"
build_sh: "rpcs3/.ci/build-linux.sh"
build_sh: ".ci/build-linux.sh"
compiler: clang
- os: ubuntu-24.04
docker_img: "rpcs3/rpcs3-ci-jammy:1.4"
build_sh: "rpcs3/.ci/build-linux.sh"
build_sh: ".ci/build-linux.sh"
compiler: gcc
- os: ubuntu-24.04-arm
docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.4"
build_sh: "rpcs3/.ci/build-linux-aarch64.sh"
build_sh: ".ci/build-linux-aarch64.sh"
compiler: clang
name: RPCS3 Qt UI (Legacy) for Linux ${{ matrix.os }} ${{ matrix.compiler }}
runs-on: ${{ matrix.os }}
@ -46,9 +43,8 @@ jobs:
CCACHE_DIR: ${{ github.workspace }}/ccache
CI_HAS_ARTIFACTS: true
DEPLOY_APPIMAGE: true
APPDIR: "/rpcs3/build/appdir"
ARTDIR: "/root/artifacts"
RELEASE_MESSAGE: "/rpcs3/GitHubReleaseMessage.txt"
APPDIR: "./appdir"
ARTDIR: "./artifacts"
COMPILER: ${{ matrix.compiler }}
RX_VERSION: "Unknown"
RX_SHA: "Unknown"
@ -66,16 +62,26 @@ jobs:
restore-keys: |
${{ runner.os }}-ccache-${{ matrix.compiler }}-${{ runner.arch }}-
- name: Docker setup and build
- name: Setup dependencies
run: |
docker pull --quiet ${{ matrix.docker_img }}
docker run \
-v $PWD:/rpcs3 \
--env-file .ci/docker.env \
-v ${{ env.CCACHE_DIR }}:/root/.ccache \
-v ${{ github.workspace }}/artifacts:/root/artifacts \
${{ matrix.docker_img }} \
${{ matrix.build_sh }}
echo "Types: deb" | sudo tee -a /etc/apt/sources.list.d/ubuntu.sources
echo "URIs: ${{ matrix.os == 'ubuntu-24.04-arm' && 'http://ports.ubuntu.com/ubuntu-ports' || 'http://azure.archive.ubuntu.com/ubuntu/' }}" | sudo tee -a /etc/apt/sources.list.d/ubuntu.sources
echo "Suites: plucky plucky-updates plucky-security" | sudo tee -a /etc/apt/sources.list.d/ubuntu.sources
echo "Components: main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/ubuntu.sources
echo "Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg" | sudo tee -a /etc/apt/sources.list.d/ubuntu.sources
sudo apt update
sudo apt install -y cmake build-essential libunwind-dev \
libvulkan-dev vulkan-validationlayers \
libsox-dev ninja-build libasound2-dev libglfw3-dev nasm libudev-dev \
libpulse-dev libopenal-dev libglew-dev zlib1g-dev libedit-dev \
libevdev-dev libjack-dev libsndio-dev libglvnd-dev \
qt6-base-dev qt6-svg-dev qt6-base-private-dev qt6-multimedia-dev \
clang lld gcc-14 g++-14 \
- name: Build
run: |
${{ matrix.build_sh }}
RX_VERSION=`cat .rx.version | awk -F'-' '{print $1}'`
RX_SHA=`cat .rx.version | awk -F'-' '{print $5}'`

View file

@ -104,6 +104,9 @@ add_subdirectory(zlib EXCLUDE_FROM_ALL)
# ZSTD
add_subdirectory(zstd EXCLUDE_FROM_ALL)
# workaround for LLVM
add_library(zstd::libzstd_static ALIAS libzstd_static)
# 7zip sdk
add_subdirectory(7zip EXCLUDE_FROM_ALL)
@ -342,10 +345,6 @@ if(NOT MSVC AND NOT ANDROID AND NOT WITHOUT_OPENGLEW)
target_link_libraries(3rdparty_glew INTERFACE GLEW::GLEW)
endif()
# LLVM
add_subdirectory(llvm EXCLUDE_FROM_ALL)
# WOLFSSL
add_subdirectory(wolfssl EXCLUDE_FROM_ALL)

View file

@ -1,6 +1,6 @@
if(WITH_LLVM)
set(USE_LLVM_VERSION 19.1.7)
set(USE_LLVM_VERSION 20.1.3)
if (NOT MSVC)
check_cxx_compiler_flag("-msse -msse2 -mcx16" COMPILER_X86)
check_cxx_compiler_flag("-march=armv8-a+lse" COMPILER_ARM)
@ -42,7 +42,18 @@ if(WITH_LLVM)
set(LLVM_DOWNLOAD_BINARY "")
if ((WIN32 AND MSVC) OR (LINUX AND NOT ANDROID))
if (ANDROID)
string(APPEND LLVM_DOWNLOAD_BINARY llvm-android-)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
string(APPEND LLVM_DOWNLOAD_BINARY arm64-v8a)
else()
string(APPEND LLVM_DOWNLOAD_BINARY x64)
endif()
string(APPEND LLVM_DOWNLOAD_BINARY .7z)
elseif ((WIN32 AND MSVC) OR LINUX)
string(APPEND LLVM_DOWNLOAD_BINARY llvm-)
if (WIN32)
string(APPEND LLVM_DOWNLOAD_BINARY windows-)
else()
@ -62,6 +73,8 @@ if(WITH_LLVM)
string(APPEND LLVM_DOWNLOAD_BINARY MD)
endif()
endif()
string(APPEND LLVM_DOWNLOAD_BINARY .7z)
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
@ -79,55 +92,76 @@ if(WITH_LLVM)
# LLVM needs to be built out-of-tree
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/llvm/llvm ${CMAKE_CURRENT_BINARY_DIR}/llvm_build EXCLUDE_FROM_ALL)
set(LLVM_DIR "${CMAKE_CURRENT_BINARY_DIR}/llvm_build/lib/cmake/llvm/")
set(MLIR_DIR "${CMAKE_CURRENT_BINARY_DIR}/llvm_build/lib/cmake/mlir/")
else()
set(LLVM_DOWNLOAD_LINK https://github.com/RPCSX/llvm-build/releases/download/${USE_LLVM_VERSION})
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.7z" AND
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}" AND
NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.unpacked")
message(STATUS "Downloading LLVM")
file(DOWNLOAD ${LLVM_DOWNLOAD_LINK}/${LLVM_DOWNLOAD_BINARY}
"${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.7z.tmp" SHOW_PROGRESS
"${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.tmp" SHOW_PROGRESS
STATUS FILE_STATUS)
list(GET FILE_STATUS 0 STATUS_CODE)
if (NOT STATUS_CODE EQUAL 0)
file(REMOVE "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.7z.tmp")
file(REMOVE "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.tmp")
message(FATAL_ERROR "Failed to download LLVM")
endif()
file(RENAME
"${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.7z.tmp"
"${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.7z"
"${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.tmp"
"${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}"
)
endif()
if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.unpacked")
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}")
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.7z"
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}" RESULT_VARIABLE STATUS_CODE)
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.dir")
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}"
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.dir" RESULT_VARIABLE STATUS_CODE)
if (NOT STATUS_CODE EQUAL 0)
message(FATAL_ERROR "Failed to unpack LLVM")
endif()
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.unpacked")
file(REMOVE "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.7z")
file(REMOVE "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}")
endif()
file(GLOB LLVM_ROOT_DIR_LIST LIST_DIRECTORIES true "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}/*")
file(GLOB LLVM_ROOT_DIR_LIST LIST_DIRECTORIES true "${CMAKE_CURRENT_BINARY_DIR}/${USE_LLVM_VERSION}-${LLVM_DOWNLOAD_BINARY}.dir/*")
list(GET LLVM_ROOT_DIR_LIST 0 LLVM_ROOT_DIR)
set(LLVM_DIR "${LLVM_ROOT_DIR}/lib/cmake/llvm")
set(LLVM_DIR "${LLVM_ROOT_DIR}/lib/cmake/llvm")
set(MLIR_DIR "${LLVM_ROOT_DIR}/lib/cmake/mlir")
if (NOT EXISTS "${LLVM_DIR}")
message(FATAL_ERROR "Failed to locate LLVM: ${LLVM_ROOT_DIR}")
endif()
if (NOT EXISTS "${MLIR_DIR}")
message(FATAL_ERROR "Failed to locate MLIR: ${LLVM_ROOT_DIR}")
endif()
if (NOT ANDROID)
set(Clang_DIR "${LLVM_ROOT_DIR}/lib/cmake/clang")
if (NOT EXISTS "${Clang_DIR}")
message(FATAL_ERROR "Failed to locate Clang: ${LLVM_ROOT_DIR}")
endif()
endif()
endif()
set(STATIC_LINK_LLVM ON CACHE BOOL "Link against LLVM statically. This will get set to ON if you build LLVM from the submodule." FORCE)
find_package(LLVM ${USE_LLVM_VERSION} CONFIG)
find_package(MLIR ${USE_LLVM_VERSION} CONFIG)
if(NOT LLVM_FOUND)
if(NOT LLVM_FOUND OR NOT MLIR_FOUND)
message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
endif()
if (NOT ANDROID)
find_package(Clang ${USE_LLVM_VERSION} CONFIG)
if(NOT Clang_FOUND)
message(FATAL_ERROR "Couldn't build Clang from the submodule. You might need to run `git submodule update --init`")
endif()
endif()
else()
message(STATUS "Using prebuilt or system LLVM")
@ -136,15 +170,36 @@ if(WITH_LLVM)
set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
endif()
if (MLIR_DIR AND NOT IS_ABSOLUTE "${MLIR_DIR}")
set(MLIR_DIR ${CMAKE_SOURCE_DIR}/${MLIR_DIR})
endif()
if (Clang_DIR AND NOT IS_ABSOLUTE "${Clang_DIR}")
set(Clang_DIR ${CMAKE_SOURCE_DIR}/${Clang_DIR})
endif()
find_package(LLVM CONFIG)
find_package(MLIR CONFIG)
if (NOT LLVM_FOUND)
message(FATAL_ERROR "Can't find LLVM libraries from the CMAKE_PREFIX_PATH path or LLVM_DIR. \
Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
endif()
if (LLVM_VERSION VERSION_LESS 18)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 18 or above. \
Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 18 or above.")
endif()
if (NOT MLIR_FOUND)
message(FATAL_ERROR "Can't find MLIR libraries from the CMAKE_PREFIX_PATH path or MLIR_DIR")
endif()
if (NOT ANDROID)
find_package(Clang CONFIG)
if (NOT Clang_FOUND)
message(FATAL_ERROR "Can't find Clang from the CMAKE_PREFIX_PATH path or Clang_DIR.")
endif()
endif()
endif()
@ -164,9 +219,9 @@ if(WITH_LLVM)
endif()
# For Linux even if BUILD_LLVM is disabled (precompiled llvm used)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
list (APPEND LLVM_ADDITIONAL_LIBS PerfJITEvents)
endif()
# if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
# list (APPEND LLVM_ADDITIONAL_LIBS PerfJITEvents)
# endif()
llvm_map_components_to_libnames(LLVM_LIBS
${LLVM_TARGETS_TO_BUILD}
@ -176,17 +231,42 @@ if(WITH_LLVM)
MCJIT
Passes
)
set(MLIR_LIBS MLIRIR MLIRInferTypeOpInterface MLIRFuncDialect MLIRSCFDialect MLIRSCFToControlFlow MLIRAffineAnalysis MLIRAsyncToLLVM)
else()
set(LLVM_LIBS LLVM)
set(LLVM_LIBS LLVM MLIR)
endif()
list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}")
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
include(TableGen)
include(AddLLVM)
include(AddMLIR)
if (NOT ANDROID)
list(APPEND CMAKE_MODULE_PATH "${CLANG_CMAKE_DIR}")
include(AddClang)
get_target_property(CLANG_EXECUTABLE clang LOCATION)
endif()
# include(HandleLLVMOptions)
add_library(3rdparty_llvm INTERFACE)
target_link_libraries(3rdparty_llvm INTERFACE ${LLVM_LIBS})
target_include_directories(3rdparty_llvm INTERFACE ${LLVM_INCLUDE_DIRS})
separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
target_compile_definitions(3rdparty_llvm INTERFACE ${LLVM_DEFINITIONS_LIST} LLVM_AVAILABLE)
add_library(3rdparty_mlir INTERFACE)
target_link_libraries(3rdparty_mlir INTERFACE 3rdparty_llvm ${MLIR_LIBS})
target_include_directories(3rdparty_mlir INTERFACE ${MLIR_INCLUDE_DIRS})
separate_arguments(MLIR_DEFINITIONS_LIST NATIVE_COMMAND ${MLIR_DEFINITIONS})
target_compile_definitions(3rdparty_mlir INTERFACE ${MLIR_DEFINITIONS_LIST} MLIR_AVAILABLE)
add_library(3rdparty::llvm ALIAS 3rdparty_llvm)
add_library(3rdparty::mlir ALIAS 3rdparty_mlir)
else()
add_library(3rdparty::llvm ALIAS 3rdparty_dummy_lib)
add_library(3rdparty::mlir ALIAS 3rdparty_dummy_lib)
endif()

View file

@ -121,6 +121,8 @@ endif()
add_subdirectory(3rdparty EXCLUDE_FROM_ALL)
add_subdirectory(rx EXCLUDE_FROM_ALL)
include(3rdparty/llvm/CMakeLists.txt)
if (NOT RX_TAG)
set(RX_TAG 0)
endif()
@ -183,9 +185,10 @@ if (WITH_RPCSX)
add_subdirectory(tools)
add_subdirectory(orbis-kernel)
add_subdirectory(rpcsx)
endif()
add_subdirectory(rpcsx)
if (WITH_RPCS3)
include(ConfigureCompiler)
include(CheckFunctionExists)

View file

@ -5,7 +5,7 @@ set(CMAKE_CXX_STANDARD 20)
set(CMAKE_POSITION_INDEPENDENT_CODE on)
set(FFMPEG_VERSION 5.1)
set(LLVM_VERSION 19.1)
set(LLVM_VERSION 20.1.2)
option(USE_ARCH "Specify arch to build" "")
@ -88,26 +88,6 @@ target_link_libraries(3rdparty_ffmpeg INTERFACE
add_dependencies(3rdparty_ffmpeg ffmpeg-unpack)
if(NOT EXISTS ${CMAKE_BINARY_DIR}/llvm-${LLVM_VERSION}.tar.gz)
message(STATUS "Downloading llvm-${LLVM_VERSION}")
file(DOWNLOAD
https://github.com/RPCS3-Android/llvm-android/releases/download/${LLVM_VERSION}/llvm-${RPCS3_DOWNLOAD_ARCH}-Android.tar.gz
${CMAKE_BINARY_DIR}/llvm-${LLVM_VERSION}.tar.gz
SHOW_PROGRESS
)
endif()
set(LLVM_DIR ${CMAKE_BINARY_DIR}/llvm-${LLVM_VERSION}.7-Android/lib/cmake/llvm)
if (NOT EXISTS ${LLVM_DIR})
message(STATUS "Unpacking llvm-${LLVM_VERSION}")
execute_process(
COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_BINARY_DIR}/llvm-${LLVM_VERSION}.tar.gz
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
)
endif()
set(WITH_RPCSX off)
set(WITH_RPCS3 on)
set(USE_SYSTEM_LIBUSB off)
@ -117,10 +97,10 @@ set(USE_SYSTEM_OPENCV off)
set(USE_SYSTEM_FFMPEG off)
set(USE_FAUDIO off)
set(USE_SDL2 off)
set(BUILD_LLVM off)
set(BUILD_LLVM on)
set(STATIC_LINK_LLVM on)
set(DISABLE_LTO on)
set(USE_LTO off)
set(DISABLE_LTO off)
set(USE_LTO on)
set(USE_OPENSL off)
set(ASMJIT_NO_SHM_OPEN on)
set(USE_SYSTEM_ZLIB on)

View file

@ -152,7 +152,7 @@ vm::ptr<CellGcmReportData> cellGcmGetReportDataAddressLocation(u32 index, u32 lo
cellGcmSys.error("cellGcmGetReportDataAddressLocation: Wrong local index (%d)", index);
}
return vm::cast(rsx::get_current_renderer()->label_addr + ::offset32(&RsxReports::report) + index * 0x10);
return vm::cast(rsx::get_current_renderer()->label_addr + OFFSET_OF(RsxReports, report) + index * 0x10);
}
u64 cellGcmGetTimeStamp(u32 index)
@ -164,7 +164,7 @@ u64 cellGcmGetTimeStamp(u32 index)
cellGcmSys.error("cellGcmGetTimeStamp: Wrong local index (%d)", index);
}
const u32 address = rsx::get_current_renderer()->label_addr + ::offset32(&RsxReports::report) + index * 0x10;
const u32 address = rsx::get_current_renderer()->label_addr + OFFSET_OF(RsxReports, report) + index * 0x10;
return *vm::get_super_ptr<u64>(address);
}
@ -193,7 +193,7 @@ u32 cellGcmGetNotifyDataAddress(u32 index)
*/
vm::ptr<CellGcmReportData> _cellGcmFunc12()
{
return vm::ptr<CellGcmReportData>::make(rsx::get_current_renderer()->label_addr + ::offset32(&RsxReports::report)); // TODO
return vm::ptr<CellGcmReportData>::make(rsx::get_current_renderer()->label_addr + OFFSET_OF(RsxReports, report)); // TODO
}
u32 cellGcmGetReport(u32 type, u32 index)
@ -223,7 +223,7 @@ u32 cellGcmGetReportDataAddress(u32 index)
cellGcmSys.error("cellGcmGetReportDataAddress: Wrong local index (%d)", index);
}
return rsx::get_current_renderer()->label_addr + ::offset32(&RsxReports::report) + index * 0x10;
return rsx::get_current_renderer()->label_addr + OFFSET_OF(RsxReports, report) + index * 0x10;
}
u32 cellGcmGetReportDataLocation(u32 index, u32 location)

View file

@ -574,20 +574,7 @@ public:
for (gem_controller& c : controllers)
{
ar(c.status, c.ext_status, c.ext_id, c.port, c.enabled_magnetometer, c.calibrated_magnetometer, c.enabled_filtering, c.enabled_tracking, c.enabled_LED, c.hue_set, c.rumble);
// We need to add padding because we used bitwise serialization in version 1
if (version < 2)
{
ar.add_padding(&gem_controller::rumble, &gem_controller::sphere_rgb);
}
ar(c.sphere_rgb, c.hue, c.distance_mm, c.radius, c.radius_valid, c.is_calibrating);
if (version < 2)
{
ar.add_padding(&gem_controller::is_calibrating, &gem_controller::calibration_start_us);
}
ar(c.calibration_start_us);
if (ar.is_writing() || version >= 2)

View file

@ -1022,7 +1022,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
listSet->focusPosition = CELL_SAVEDATA_FOCUSPOS_LISTHEAD;
std::memset(result.get_ptr(), 0,
::offset32(&CellSaveDataCBResult::userdata));
OFFSET_OF(CellSaveDataCBResult, userdata));
// List Callback
funcList(ppu, result, listGet, listSet);
@ -1313,7 +1313,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
}
std::memset(result.get_ptr(), 0,
::offset32(&CellSaveDataCBResult::userdata));
OFFSET_OF(CellSaveDataCBResult, userdata));
if (!funcDone)
{
@ -1436,8 +1436,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
{
lv2_sleep(ppu, 250);
std::memset(result.get_ptr(), 0,
::offset32(&CellSaveDataCBResult::userdata));
std::memset(result.get_ptr(), 0, OFFSET_OF(CellSaveDataCBResult, userdata));
// Fixed Callback
funcFixed(ppu, result, listGet, fixedSet);
@ -1780,7 +1779,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
!save_entry.isNew ? ::narrow<s32>((size_bytes / 1024) + statGet->sysSizeKB) : 0;
std::memset(result.get_ptr(), 0,
::offset32(&CellSaveDataCBResult::userdata));
OFFSET_OF(CellSaveDataCBResult, userdata));
// Stat Callback
funcStat(ppu, result, statGet, statSet);
@ -2036,7 +2035,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
std::memset(fileSet.get_ptr(), 0, fileSet.size());
std::memset(fileGet->reserved, 0, sizeof(fileGet->reserved));
std::memset(result.get_ptr(), 0,
::offset32(&CellSaveDataCBResult::userdata));
OFFSET_OF(CellSaveDataCBResult, userdata));
funcFile(ppu, result, fileGet, fileSet);
ppu.state += cpu_flag::wait;

View file

@ -1215,7 +1215,7 @@ void spursSysServiceTraceUpdate(spu_thread& spu, SpursKernelContext* ctxt, u32 a
if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) || (arg3 != 0))
{
// vm::reservation_acquire(ctxt->spurs.ptr(&CellSpurs::traceBuffer).addr());
auto spurs = spu._ptr<CellSpurs>(0x80 - offset32(&CellSpurs::traceBuffer));
auto spurs = spu._ptr<CellSpurs>(0x80 - OFFSET_OF(CellSpurs, traceBuffer));
if (ctxt->traceMsgCount != 0xffu || spurs->traceBuffer.addr() == 0u)
{
@ -1238,7 +1238,7 @@ void spursSysServiceTraceUpdate(spu_thread& spu, SpursKernelContext* ctxt, u32 a
if (notify)
{
auto spurs = spu._ptr<CellSpurs>(0x2D80 - offset32(&CellSpurs::wklState1));
auto spurs = spu._ptr<CellSpurs>(0x2D80 - OFFSET_OF(CellSpurs, wklState1));
sys_spu_thread_send_event(spu, spurs->spuPort, 2, 0);
}
}
@ -1427,12 +1427,12 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
// vm::reservation_op(vm::cast(ctxt->taskset.addr()), 128, [&]()
{
auto taskset = ctxt->taskset;
v128 waiting = vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::waiting));
v128 running = vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::running));
v128 ready = vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::ready));
v128 pready = vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::pending_ready));
v128 enabled = vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::enabled));
v128 signalled = vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::signalled));
v128 waiting = vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, waiting));
v128 running = vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, running));
v128 ready = vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, ready));
v128 pready = vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, pending_ready));
v128 enabled = vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, enabled));
v128 signalled = vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, signalled));
// Verify taskset state is valid
if ((waiting & running) != v128{} || (ready & pready) != v128{} ||
@ -1599,12 +1599,12 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
spursHalt(spu);
}
vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::waiting)) = waiting;
vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::running)) = running;
vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::ready)) = ready;
vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::pending_ready)) = v128{};
vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::enabled)) = enabled;
vm::_ref<v128>(ctxt->taskset.addr() + ::offset32(&CellSpursTaskset::signalled)) = signalled;
vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, waiting)) = waiting;
vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, running)) = running;
vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, ready)) = ready;
vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, pending_ready)) = v128{};
vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, enabled)) = enabled;
vm::_ref<v128>(ctxt->taskset.addr() + OFFSET_OF(CellSpursTaskset, signalled)) = signalled;
std::memcpy(spu._ptr<void>(0x2700), spu._ptr<void>(0x100), 128); // Copy data
} //);

View file

@ -1675,7 +1675,7 @@ error_code cellVdecGetPicItem(ppu_thread& ppu, u32 handle,
info->status = CELL_OK;
info->attr = attr;
const vm::addr_t picinfo_addr{info.addr() + ::offset32(&all_info_t::picInfo)};
const vm::addr_t picinfo_addr{info.addr() + OFFSET_OF(all_info_t, picInfo)};
info->picInfo_addr = picinfo_addr;
if (vdec->type == CELL_VDEC_CODEC_TYPE_AVC)

View file

@ -445,6 +445,8 @@ target_link_libraries(rpcs3_emu
3rdparty::libusb 3rdparty::wolfssl
Vulkan::Headers
rpcsx::fw::ps3::api
rpcsx::cpu::cell::ppu
rpcsx::cpu::cell::ppu::semantic
PRIVATE
3rdparty::glslang

View file

@ -1908,7 +1908,7 @@ auto gen_ghc_cpp_trampoline(ppu_intrp_func_t fn_target)
// Take second ghc arg
c.mov(args[0], x86::rbp);
c.mov(args[2].r32(), x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)));
c.mov(args[2].r32(), x86::dword_ptr(args[0], OFFSET_OF(ppu_thread, cia)));
c.add(args[2], x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
c.jmp(fn_target);
};
@ -1942,7 +1942,7 @@ auto gen_ghc_cpp_trampoline(ppu_intrp_func_t fn_target)
c.bind(base_addr);
c.embedUInt64(reinterpret_cast<u64>(&vm::g_base_addr));
c.bind(cia_offset);
c.embedUInt64(static_cast<u64>(::offset32(&ppu_thread::cia)));
c.embedUInt64(static_cast<u64>(OFFSET_OF(ppu_thread, cia)));
c.bind(jmp_target);
c.embedUInt64(reinterpret_cast<u64>(fn_target));
};

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,11 @@
#pragma once
#include "PPUOpcodes.h"
#include "rx/cpu/cell/ppu/Instruction.hpp"
#include "rx/cpu/cell/ppu/Opcode.hpp"
#include "rx/cpu/cell/ppu/PPUContext.hpp"
#include "rx/refl.hpp"
#include <array>
class ppu_thread;
@ -42,3 +47,12 @@ struct ppu_interpreter_rt : ppu_interpreter_rt_base
private:
ppu_decoder<ppu_interpreter_t<ppu_intrp_func_t>, ppu_intrp_func_t> table;
};
struct PPUContext;
struct PPUInterpreter
{
std::array<void (*)(PPUContext& context, rx::cell::ppu::Instruction inst), rx::fieldCount<rx::cell::ppu::Opcode>> impl;
PPUInterpreter();
void interpret(PPUContext& context, std::uint32_t inst);
};

View file

@ -333,7 +333,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link, utils::serial* ar = n
};
// Initialize double-purpose fake OPD array for HLE functions
const auto& hle_funcs = ppu_function_manager::get(g_cfg.core.ppu_decoder != ppu_decoder_type::_static);
const auto& hle_funcs = ppu_function_manager::get(g_cfg.core.ppu_decoder == ppu_decoder_type::llvm_legacy);
u32& hle_funcs_addr = g_fxo->get<ppu_function_manager>().addr;

View file

@ -1,4 +1,5 @@
#include "stdafx.h"
#include "rx/cpu/cell/ppu/Decoder.hpp"
#include "util/JIT.h"
#include "util/StrUtil.h"
#include "util/serialization.hpp"
@ -27,6 +28,9 @@
#include "lv2/sys_overlay.h"
#include "lv2/sys_process.h"
#include "lv2/sys_spu.h"
#include <cstddef>
#include <rx/format.hpp>
#include <format>
#ifdef LLVM_AVAILABLE
#ifdef _MSC_VER
@ -317,12 +321,12 @@ const auto ppu_gateway = build_function_asm<void (*)(ppu_thread*)>("ppu_gateway"
#endif
// Save native stack pointer for longjmp emulation
c.mov(x86::qword_ptr(args[0], ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)), x86::rsp);
c.mov(x86::qword_ptr(args[0], OFFSET_OF(ppu_thread, hv_ctx.regs)), x86::rsp);
// Initialize args
c.mov(x86::r13, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
c.mov(x86::rbp, args[0]);
c.mov(x86::edx, x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia))); // Load PC
c.mov(x86::edx, x86::dword_ptr(x86::rbp, OFFSET_OF(ppu_thread, cia))); // Load PC
c.mov(x86::rax, x86::qword_ptr(x86::r13, x86::edx, 1, 0)); // Load call target
c.mov(x86::rdx, x86::rax);
@ -333,9 +337,9 @@ const auto ppu_gateway = build_function_asm<void (*)(ppu_thread*)>("ppu_gateway"
c.mov(x86::r12d, x86::edx); // Load relocation base
c.mov(x86::rbx, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
c.mov(x86::r14, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 0))); // Load some registers
c.mov(x86::rsi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 1)));
c.mov(x86::rdi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 2)));
c.mov(x86::r14, x86::qword_ptr(x86::rbp, OFFSET_OF(ppu_thread, gpr[0]))); // Load some registers
c.mov(x86::rsi, x86::qword_ptr(x86::rbp, OFFSET_OF(ppu_thread, gpr[1])));
c.mov(x86::rdi, x86::qword_ptr(x86::rbp, OFFSET_OF(ppu_thread, gpr[2])));
if (utils::has_avx())
{
@ -403,7 +407,7 @@ const auto ppu_gateway = build_function_asm<void (*)(ppu_thread*)>("ppu_gateway"
// pc, sp
// x18, x19...x30
// NOTE: Do not touch x19..x30 before saving the registers!
const u64 hv_register_array_offset = ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
const u64 hv_register_array_offset = OFFSET_OF(ppu_thread, hv_ctx.regs);
Label hv_ctx_pc = c.newLabel(); // Used to hold the far jump return address
// Sanity
@ -434,7 +438,7 @@ const auto ppu_gateway = build_function_asm<void (*)(ppu_thread*)>("ppu_gateway"
const arm::GpX pc = a64::x15;
const arm::GpX cia_addr_reg = a64::x11;
// Load offset value
c.mov(cia_addr_reg, Imm(static_cast<u64>(::offset32(&ppu_thread::cia))));
c.mov(cia_addr_reg, Imm(static_cast<u64>(OFFSET_OF(ppu_thread, cia))));
// Load cia
c.ldr(pc.w(), arm::Mem(ppu_t_base, cia_addr_reg));
@ -459,7 +463,7 @@ const auto ppu_gateway = build_function_asm<void (*)(ppu_thread*)>("ppu_gateway"
c.ldr(a64::x22, arm::Mem(a64::x22));
const arm::GpX gpr_addr_reg = a64::x9;
c.mov(gpr_addr_reg, Imm(static_cast<u64>(::offset32(&ppu_thread::gpr))));
c.mov(gpr_addr_reg, Imm(static_cast<u64>(OFFSET_OF(ppu_thread, gpr))));
c.add(gpr_addr_reg, gpr_addr_reg, ppu_t_base);
c.ldr(a64::x23, arm::Mem(gpr_addr_reg));
c.ldr(a64::x24, arm::Mem(gpr_addr_reg, 8));
@ -514,7 +518,7 @@ const extern auto ppu_escape = build_function_asm<void (*)(ppu_thread*)>("ppu_es
#if defined(ARCH_X64)
// Restore native stack pointer (longjmp emulation)
c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)));
c.mov(x86::rsp, x86::qword_ptr(args[0], OFFSET_OF(ppu_thread, hv_ctx.regs)));
// Return to the return location
c.sub(x86::rsp, 8);
@ -523,7 +527,7 @@ const extern auto ppu_escape = build_function_asm<void (*)(ppu_thread*)>("ppu_es
// We really shouldn't be using this, but an implementation shoudln't hurt
// Far jump return. Only clobbers x30.
const arm::GpX ppu_t_base = a64::x20;
const u64 hv_register_array_offset = ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
const u64 hv_register_array_offset = OFFSET_OF(ppu_thread, hv_ctx.regs);
c.mov(ppu_t_base, args[0]);
c.mov(a64::x30, Imm(hv_register_array_offset));
c.ldr(a64::x30, arm::Mem(ppu_t_base, a64::x30));
@ -581,7 +585,7 @@ static inline ppu_intrp_func_t ppu_read(u32 addr)
// Get interpreter cache value
static ppu_intrp_func_t ppu_cache(u32 addr)
{
if (g_cfg.core.ppu_decoder != ppu_decoder_type::_static)
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm_legacy)
{
fmt::throw_exception("Invalid PPU decoder");
}
@ -882,7 +886,7 @@ extern void ppu_register_range(u32 addr, u32 size)
while (size)
{
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm_legacy)
{
// Assume addr is the start of first segment of PRX
const uptr entry_value = reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc) | (seg_base << (32 + 3));
@ -919,7 +923,7 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_intrp_func_t ptr =
return;
}
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
if (g_cfg.core.ppu_decoder != ppu_decoder_type::_static)
{
return;
}
@ -1097,14 +1101,14 @@ struct ppu_far_jumps_t
#ifdef ARCH_X64
c.mov(args[0], x86::rbp);
c.mov(x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)), pc);
c.mov(x86::dword_ptr(args[0], OFFSET_OF(ppu_thread, cia)), pc);
c.jmp(ppu_far_jump);
#else
Label jmp_address = c.newLabel();
Label imm_address = c.newLabel();
c.ldr(args[1].w(), arm::ptr(imm_address));
c.str(args[1].w(), arm::Mem(args[0], ::offset32(&ppu_thread::cia)));
c.str(args[1].w(), arm::Mem(args[0], OFFSET_OF(ppu_thread, cia)));
c.ldr(args[1], arm::ptr(jmp_address));
c.br(args[1]);
@ -1204,7 +1208,7 @@ bool ppu_form_branch_to_code(u32 entry, u32 target, bool link, bool with_toc, st
std::lock_guard lock(jumps.mutex);
jumps.vals.insert_or_assign(entry, ppu_far_jumps_t::all_info_t{target, link, with_toc, std::move(module_name)});
ppu_register_function_at(entry, 4, g_cfg.core.ppu_decoder == ppu_decoder_type::_static ? &ppu_far_jump : ensure(g_fxo->get<ppu_far_jumps_t>().gen_jump<false>(entry)));
ppu_register_function_at(entry, 4, g_cfg.core.ppu_decoder != ppu_decoder_type::llvm_legacy ? &ppu_far_jump : ensure(g_fxo->get<ppu_far_jumps_t>().gen_jump<false>(entry)));
return true;
}
@ -1288,7 +1292,7 @@ static void ppu_break(ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_int
// Set or remove breakpoint
extern bool ppu_breakpoint(u32 addr, bool is_adding)
{
if (addr % 4 || !vm::check_addr(addr, vm::page_executable) || g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
if (addr % 4 || !vm::check_addr(addr, vm::page_executable) || g_cfg.core.ppu_decoder == ppu_decoder_type::llvm_legacy)
{
return false;
}
@ -1359,7 +1363,7 @@ extern bool ppu_patch(u32 addr, u32 value)
const bool is_exec = vm::check_addr(addr, vm::page_executable);
if (is_exec && g_cfg.core.ppu_decoder == ppu_decoder_type::llvm && !Emu.IsReady())
if (is_exec && g_cfg.core.ppu_decoder == ppu_decoder_type::llvm_legacy && !Emu.IsReady())
{
// TODO: support recompilers
ppu_log.fatal("Patch failed at 0x%x: LLVM recompiler is used.", addr);
@ -1648,7 +1652,7 @@ void ppu_thread::dump_regs(std::string& ret, std::any& custom_data) const
fmt::append(ret, "LR: 0x%llx\n", lr);
fmt::append(ret, "CTR: 0x%llx\n", ctr);
fmt::append(ret, "VRSAVE: 0x%08x\n", vrsave);
fmt::append(ret, "XER: [CA=%u | OV=%u | SO=%u | CNT=%u]\n", xer.ca, xer.ov, xer.so, xer.cnt);
fmt::append(ret, "XER: [CA=%u | OV=%u | SO=%u | CNT=%u]\n", xer_ca, xer_ov, xer_so, xer_cnt);
fmt::append(ret, "VSCR: [SAT=%u | NJ=%u]\n", sat, nj);
fmt::append(ret, "FPSCR: [FL=%u | FG=%u | FE=%u | FU=%u]\n", fpscr.fl, fpscr.fg, fpscr.fe, fpscr.fu);
@ -2441,9 +2445,10 @@ void ppu_thread::cpu_wait(bs_t<cpu_flag> old)
state.wait(old);
}
// static_assert(offsetof(ppu_thread, gpr[0]) == 24);
void ppu_thread::exec_task()
{
if (g_cfg.core.ppu_decoder != ppu_decoder_type::_static)
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm_legacy)
{
// HVContext push to allow recursion. This happens with guest callback invocations.
const auto old_hv_ctx = hv_ctx;
@ -2464,9 +2469,28 @@ void ppu_thread::exec_task()
return;
}
const auto cache = vm::g_exec_addr;
const auto mem_ = vm::g_base_addr;
if (g_cfg.core.ppu_decoder == ppu_decoder_type::interpreter)
{
static PPUInterpreter interpreter;
while (true)
{
if (test_stopped()) [[unlikely]]
{
return;
}
std::uint32_t inst = *reinterpret_cast<be_t<std::uint32_t>*>(mem_ + std::uint64_t{cia});
interpreter.interpret(*this, inst);
}
return;
}
const auto cache = vm::g_exec_addr;
while (true)
{
if (test_stopped()) [[unlikely]]
@ -2556,7 +2580,7 @@ void ppu_thread::serialize_common(utils::serial& ar)
{
[[maybe_unused]] const s32 version = GET_OR_USE_SERIALIZATION_VERSION(ar.is_writing(), ppu);
ar(gpr, fpr, cr, fpscr.bits, lr, ctr, vrsave, cia, xer, sat, nj, prio.raw().all);
// ar(gpr, fpr, cr, fpscr.bits, lr, ctr, vrsave, cia, xer, sat, nj, prio.raw().all);
if (cia % 4 || (cia >> 28) >= 0xCu)
{
@ -3309,7 +3333,7 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64 (*)(u32 raddr, u64 rtim
});
// Check pause flag
c.bt(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.bt(x86::dword_ptr(args[2], OFFSET_OF(ppu_thread, state) - OFFSET_OF(ppu_thread, rdata)), static_cast<u32>(cpu_flag::pause));
c.jc(fall);
c.xbegin(tx1);
@ -3410,7 +3434,7 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u64 (*)(u32 raddr, u64 rtim
}
c.mov(x86::rax, -1);
c.mov(x86::qword_ptr(args[2], ::offset32(&ppu_thread::last_ftime) - ::offset32(&ppu_thread::rdata)), x86::rax);
c.mov(x86::qword_ptr(args[2], OFFSET_OF(ppu_thread, last_ftime) - OFFSET_OF(ppu_thread, rdata)), x86::rax);
c.xor_(x86::eax, x86::eax);
// c.jmp(_ret);
@ -4016,7 +4040,7 @@ extern void ppu_finalize(const ppu_module<lv2_obj>& info, bool force_mem_release
}
}
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm)
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm_legacy)
{
return;
}
@ -4034,7 +4058,7 @@ extern void ppu_finalize(const ppu_module<lv2_obj>& info, bool force_mem_release
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_module<lv2_obj>*>* loaded_modules)
{
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm)
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm_legacy)
{
return;
}
@ -4744,7 +4768,7 @@ extern void ppu_initialize()
bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_size, concurent_memory_limit& memory_limit)
{
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm)
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm_legacy)
{
if (check_only || vm::base(info.segs[0].addr) != info.segs[0].ptr)
{
@ -5106,7 +5130,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
c.add(x86::edx, seg0);
c.mov(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
c.mov(x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia)), x86::edx);
c.mov(x86::dword_ptr(x86::rbp, OFFSET_OF(ppu_thread, cia)), x86::edx);
c.mov(x86::rax, x86::qword_ptr(x86::rax, x86::rdx, 1, 0)); // Load call target
c.mov(x86::rdx, x86::rax);
@ -5137,7 +5161,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
code_size_until_jump = buf_end - buf_start;
// Load offset value
c.mov(cia_addr_reg, static_cast<u64>(::offset32(&ppu_thread::cia)));
c.mov(cia_addr_reg, static_cast<u64>(OFFSET_OF(ppu_thread, cia)));
// Update CIA
c.str(pc.w(), arm::Mem(ppu_t_base, cia_addr_reg));

View file

@ -3,6 +3,7 @@
#include "../CPU/CPUThread.h"
#include "../CPU/Hypervisor.h"
#include "../Memory/vm_ptr.h"
#include "rx/cpu/cell/ppu/PPUContext.hpp"
#include "util/lockless.h"
#include "util/BitField.h"
@ -134,7 +135,7 @@ enum class ppu_debugger_mode : u32
max_mode,
};
class ppu_thread : public cpu_thread
class ppu_thread : public cpu_thread, public PPUContext
{
public:
static const u32 id_base = 0x01000000; // TODO (used to determine thread type)
@ -165,107 +166,6 @@ public:
using cpu_thread::operator=;
u64 gpr[32] = {}; // General-Purpose Registers
f64 fpr[32] = {}; // Floating Point Registers
v128 vr[32] = {}; // Vector Registers
union alignas(16) cr_bits
{
u8 bits[32];
u32 fields[8];
u8& operator[](usz i)
{
return bits[i];
}
// Pack CR bits
u32 pack() const
{
u32 result{};
for (u32 bit : bits)
{
result <<= 1;
result |= bit;
}
return result;
}
// Unpack CR bits
void unpack(u32 value)
{
for (u8& b : bits)
{
b = !!(value & (1u << 31));
value <<= 1;
}
}
};
cr_bits cr{}; // Condition Registers (unpacked)
// Floating-Point Status and Control Register (unpacked)
union
{
struct
{
// TODO
bool _start[16];
bool fl; // FPCC.FL
bool fg; // FPCC.FG
bool fe; // FPCC.FE
bool fu; // FPCC.FU
bool _end[12];
};
u32 fields[8];
cr_bits bits;
} fpscr{};
u64 lr{}; // Link Register
u64 ctr{}; // Counter Register
u32 vrsave{0xffffffff}; // VR Save Register
u32 cia{}; // Current Instruction Address
// Fixed-Point Exception Register (abstract representation)
struct
{
ENABLE_BITWISE_SERIALIZATION;
bool so{}; // Summary Overflow
bool ov{}; // Overflow
bool ca{}; // Carry
u8 cnt{}; // 0..6
} xer;
/*
Non-Java. A mode control bit that determines whether vector floating-point operations will be performed
in a Java-IEEE-C9X-compliant mode or a possibly faster non-Java/non-IEEE mode.
0 The Java-IEEE-C9X-compliant mode is selected. Denormalized values are handled as specified
by Java, IEEE, and C9X standard.
1 The non-Java/non-IEEE-compliant mode is selected. If an element in a source vector register
contains a denormalized value, the value '0' is used instead. If an instruction causes an underflow
exception, the corresponding element in the target vr is cleared to '0'. In both cases, the '0'
has the same sign as the denormalized or underflowing value.
*/
bool nj = true;
// Sticky saturation bit
v128 sat{};
// Optimization: precomputed java-mode mask for handling denormals
u32 jm_mask = 0x7f80'0000;
u32 raddr{0}; // Reservation addr
u64 rtime{0};
alignas(64) std::byte rdata[128]{}; // Reservation data
bool use_full_rdata{};
u32 res_cached{0}; // Reservation "cached" addresss
u32 res_notify{0};
u64 res_notify_time{0};
union ppu_prio_t
{
u64 all;

View file

@ -60,7 +60,7 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
{
.debug_info = false, // Set to "true" to insert debug frames on x27
.use_stack_frames = false, // We don't need this since the PPU GW allocates global scratch on the stack
.hypervisor_context_offset = ::offset32(&ppu_thread::hv_ctx),
.hypervisor_context_offset = OFFSET_OF(ppu_thread, hv_ctx),
.exclusion_callback = {}, // Unused, we don't have special exclusion functions on PPU
.base_register_lookup = base_reg_lookup,
.faux_function_list = std::move(faux_functions_list)};
@ -76,8 +76,8 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
reset_transforms();
// Thread context struct (TODO: safer member access)
const u32 off0 = offset32(&ppu_thread::state);
const u32 off1 = offset32(&ppu_thread::gpr);
const u32 off0 = OFFSET_OF(ppu_thread, state);
const u32 off1 = OFFSET_OF(ppu_thread, gpr);
std::vector<Type*> thread_struct;
thread_struct.emplace_back(ArrayType::get(GetType<char>(), off0));
thread_struct.emplace_back(GetType<u32>()); // state

File diff suppressed because it is too large Load diff

View file

@ -5,8 +5,6 @@
#include <functional>
union v128;
// SPU ASMJIT Recompiler
class spu_recompiler : public spu_recompiler_base
{

View file

@ -203,7 +203,7 @@ DECLARE(spu_runtime::tr_all) = []
*raw++ = 0x41;
*raw++ = 0x8b;
*raw++ = 0x45;
*raw++ = ::narrow<s8>(::offset32(&spu_thread::pc));
*raw++ = ::narrow<s8>(OFFSET_OF(spu_thread, pc));
// Get LS address starting from PC: lea rcx, [rbp + rax]
*raw++ = 0x48;
@ -233,7 +233,7 @@ DECLARE(spu_runtime::tr_all) = []
*raw++ = 0x49;
*raw++ = 0xc7;
*raw++ = 0x45;
*raw++ = ::narrow<s8>(::offset32(&spu_thread::block_hash));
*raw++ = ::narrow<s8>(OFFSET_OF(spu_thread, block_hash));
*raw++ = 0x00;
*raw++ = 0x00;
*raw++ = 0x00;
@ -259,11 +259,11 @@ DECLARE(spu_runtime::tr_all) = []
// x19 = m_thread a.k.a arg[0]
// x20 = ls_base
// x21 - x22 = args[2 - 3]
// ensure(::offset32(&spu_thread::pc) <= 32760);
// ensure(::offset32(&spu_thread::block_hash) <= 32760);
// ensure(OFFSET_OF(spu_thread, pc) <= 32760);
// ensure(OFFSET_OF(spu_thread, block_hash) <= 32760);
// Load PC
c.ldr(a64::w1, arm::Mem(a64::x19, ::offset32(&spu_thread::pc))); // REG_Base + offset(spu_thread::pc)
c.ldr(a64::w1, arm::Mem(a64::x19, OFFSET_OF(spu_thread, pc))); // REG_Base + offset(spu_thread::pc)
// Compute LS address = REG_Sp + PC, store into x7 (use later)
c.add(a64::x7, a64::x20, a64::x1);
// Load 32b from LS address
@ -274,7 +274,7 @@ DECLARE(spu_runtime::tr_all) = []
c.mov(a64::x4, Imm(reinterpret_cast<u64>(g_dispatcher)));
// Update block hash
c.mov(a64::x5, Imm(0));
c.str(a64::x5, arm::Mem(a64::x19, ::offset32(&spu_thread::block_hash))); // REG_Base + offset(spu_thread::block_hash)
c.str(a64::x5, arm::Mem(a64::x19, OFFSET_OF(spu_thread, block_hash))); // REG_Base + offset(spu_thread::block_hash)
// Jump to [g_dispatcher + idx * 8]
c.mov(a64::x6, Imm(8));
c.mul(a64::x6, a64::x3, a64::x6);
@ -327,7 +327,7 @@ DECLARE(spu_runtime::g_gateway) = build_function_asm<spu_function_t>("spu_gatewa
#endif
// Save native stack pointer for longjmp emulation
c.mov(x86::qword_ptr(args[0], ::offset32(&spu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)), x86::rsp);
c.mov(x86::qword_ptr(args[0], OFFSET_OF(spu_thread, hv_ctx.regs)), x86::rsp);
// Move 4 args (despite spu_function_t def)
c.mov(x86::r13, args[0]);
@ -381,7 +381,7 @@ DECLARE(spu_runtime::g_gateway) = build_function_asm<spu_function_t>("spu_gatewa
#elif defined(ARCH_ARM64)
// Save non-volatile regs. We do this within the thread context instead of normal stack
const u32 hv_regs_base = ::offset32(&spu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
const u32 hv_regs_base = OFFSET_OF(spu_thread, hv_ctx.regs);
// NOTE: A64 gp-gp-imm add only takes immediates of upto 4095. Larger numbers can work, but need to be multiples of 2 for lowering to replace the instruction correctly
// Unfortunately asmjit fails silently on these patterns which can generate incorrect code
c.mov(a64::x15, args[0]);
@ -447,14 +447,14 @@ DECLARE(spu_runtime::g_escape) = build_function_asm<void (*)(spu_thread*)>("spu_
#if defined(ARCH_X64)
// Restore native stack pointer (longjmp emulation)
c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&spu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)));
c.mov(x86::rsp, x86::qword_ptr(args[0], OFFSET_OF(spu_thread, hv_ctx.regs)));
// Return to the return location
c.sub(x86::rsp, 8);
c.ret();
#elif defined(ARCH_ARM64)
// Far ret, jumps to gateway epilogue
const u32 reg_base = ::offset32(&spu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
const u32 reg_base = OFFSET_OF(spu_thread, hv_ctx.regs);
c.mov(a64::x19, args[0]);
c.mov(a64::x15, Imm(reg_base));
c.add(a64::x15, a64::x15, args[0]);
@ -471,28 +471,28 @@ DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void (*)(spu_thread*, s
#if defined(ARCH_X64)
// Restore native stack pointer (longjmp emulation)
c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&spu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)));
c.mov(x86::rsp, x86::qword_ptr(args[0], OFFSET_OF(spu_thread, hv_ctx.regs)));
// Adjust stack for initial call instruction in the gateway
c.sub(x86::rsp, 16);
// Tail call, GHC CC (second arg)
c.mov(x86::r13, args[0]);
c.mov(x86::rbp, x86::qword_ptr(args[0], ::offset32(&spu_thread::ls)));
c.mov(x86::rbp, x86::qword_ptr(args[0], OFFSET_OF(spu_thread, ls)));
c.mov(x86::r12, args[2]);
c.xor_(x86::ebx, x86::ebx);
c.mov(x86::qword_ptr(x86::rsp), args[1]);
c.ret();
#elif defined(ARCH_ARM64)
// HV pointer
const u32 reg_base = ::offset32(&spu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
const u32 reg_base = OFFSET_OF(spu_thread, hv_ctx.regs);
// Tail call, GHC CC
c.mov(a64::x19, args[0]); // REG_Base
c.mov(a64::x15, Imm(::offset32(&spu_thread::ls))); // SPU::ls offset cannot be correctly encoded for ldr as it is too large
c.ldr(a64::x20, arm::Mem(a64::x19, a64::x15)); // REG_Sp
c.mov(a64::x21, args[2]); // REG_Hp
c.mov(a64::x22, a64::xzr); // REG_R1
c.mov(a64::x19, args[0]); // REG_Base
c.mov(a64::x15, Imm(OFFSET_OF(spu_thread, ls))); // SPU::ls offset cannot be correctly encoded for ldr as it is too large
c.ldr(a64::x20, arm::Mem(a64::x19, a64::x15)); // REG_Sp
c.mov(a64::x21, args[2]); // REG_Hp
c.mov(a64::x22, a64::xzr); // REG_R1
// Reset sp to patch leaks. Calls to tail escape may leave their stack "dirty" due to optimizations.
c.mov(a64::x14, Imm(reg_base + 8));
@ -7754,13 +7754,13 @@ struct spu_fast : public spu_recompiler_base
*raw++ = 0x49;
*raw++ = 0x89;
*raw++ = 0x45;
*raw++ = ::narrow<s8>(::offset32(&spu_thread::block_hash));
*raw++ = ::narrow<s8>(OFFSET_OF(spu_thread, block_hash));
// Load PC: mov eax, [r13 + spu_thread::pc]
*raw++ = 0x41;
*raw++ = 0x8b;
*raw++ = 0x45;
*raw++ = ::narrow<s8>(::offset32(&spu_thread::pc));
*raw++ = ::narrow<s8>(OFFSET_OF(spu_thread, pc));
// Get LS address starting from PC: lea rcx, [rbp + rax]
*raw++ = 0x48;
@ -7824,18 +7824,18 @@ struct spu_fast : public spu_recompiler_base
*raw++ = 0x48;
*raw++ = 0x8d;
*raw++ = 0x7d;
*raw++ = ::narrow<s8>(::offset32(&spu_thread::gpr));
*raw++ = ::narrow<s8>(OFFSET_OF(spu_thread, gpr));
// Save base pc: mov [rbp + spu_thread::base_pc], eax
*raw++ = 0x89;
*raw++ = 0x45;
*raw++ = ::narrow<s8>(::offset32(&spu_thread::base_pc));
*raw++ = ::narrow<s8>(OFFSET_OF(spu_thread, base_pc));
// inc block_counter
*raw++ = 0x48;
*raw++ = 0xff;
*raw++ = 0x85;
const u32 blc_off = ::offset32(&spu_thread::block_counter);
const u32 blc_off = OFFSET_OF(spu_thread, block_counter);
std::memcpy(raw, &blc_off, 4);
raw += 4;
@ -7858,7 +7858,7 @@ struct spu_fast : public spu_recompiler_base
*raw++ = 0x44;
*raw++ = 0x89;
*raw++ = 0x65;
*raw++ = ::narrow<s8>(::offset32(&spu_thread::pc));
*raw++ = ::narrow<s8>(OFFSET_OF(spu_thread, pc));
// Epilogue: add rsp,0x28
*raw++ = 0x48;
@ -7890,7 +7890,7 @@ struct spu_fast : public spu_recompiler_base
*raw++ = type == spu_itype::BRHZ || type == spu_itype::BRHNZ ? 0x66 : 0x90;
*raw++ = 0x83;
*raw++ = 0xbd;
const u32 off = ::offset32(&spu_thread::gpr, op.rt) + 12;
const u32 off = OFFSET_OF(spu_thread, gpr[op.rt]) + 12;
std::memcpy(raw, &off, 4);
raw += 4;
*raw++ = 0x00;
@ -7957,7 +7957,7 @@ struct spu_fast : public spu_recompiler_base
// sub eax, [rbp + spu_thread::base_pc]
*raw++ = 0x2b;
*raw++ = 0x45;
*raw++ = ::narrow<s8>(::offset32(&spu_thread::base_pc));
*raw++ = ::narrow<s8>(OFFSET_OF(spu_thread, base_pc));
// cmp eax, (0 - size)
*raw++ = 0x3d;
@ -7992,7 +7992,7 @@ struct spu_fast : public spu_recompiler_base
*raw++ = 0x44;
*raw++ = 0x89;
*raw++ = 0x65;
*raw++ = ::narrow<s8>(::offset32(&spu_thread::pc));
*raw++ = ::narrow<s8>(OFFSET_OF(spu_thread, pc));
// Epilogue: add rsp,0x28 ; ret
*raw++ = 0x48;

View file

@ -99,7 +99,7 @@ namespace asmjit
c.shl(x86::eax, I + 4);
}
const auto ptr = x86::oword_ptr(spu, x86::rax, 0, ::offset32(&spu_thread::gpr));
const auto ptr = x86::oword_ptr(spu, x86::rax, 0, OFFSET_OF(spu_thread, gpr));
if (utils::has_avx())
{

View file

@ -329,9 +329,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (!m_finfo->fn && !m_block)
{
lr = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::gpr, +s_reg_lr, &v128::_u32, 3));
sp = m_ir->CreateLoad(get_type<u32[4]>(), spu_ptr<u32[4]>(&spu_thread::gpr, +s_reg_sp));
r3 = m_ir->CreateLoad(get_type<u32[4]>(), spu_ptr<u32[4]>(&spu_thread::gpr, 3));
lr = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, gpr[+s_reg_lr]._u32[3])));
sp = m_ir->CreateLoad(get_type<u32[4]>(), spu_ptr<u32[4]>(OFFSET_OF(spu_thread, gpr[+s_reg_sp])));
r3 = m_ir->CreateLoad(get_type<u32[4]>(), spu_ptr<u32[4]>(OFFSET_OF(spu_thread, gpr[3])));
}
else
{
@ -348,8 +348,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (!m_finfo->fn)
{
lr = m_ir->CreateAnd(lr, 0x3fffc);
m_ir->CreateStore(lr, spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(_call, spu_ptr<u32[4]>(&spu_thread::gpr, 3));
m_ir->CreateStore(lr, spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
m_ir->CreateStore(_call, spu_ptr<u32[4]>(OFFSET_OF(spu_thread, gpr[3])));
m_ir->CreateBr(add_block_indirect({}, value<u32>(lr)));
}
else if (tail)
@ -392,7 +392,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_blocks.clear();
m_block_queue.clear();
m_ir->SetInsertPoint(llvm::BasicBlock::Create(m_context, "", m_function));
m_memptr = m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::memory_base_addr));
m_memptr = m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(OFFSET_OF(spu_thread, memory_base_addr)));
}
// Add block with current block as a predecessor
@ -415,7 +415,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_lsptr = fn->getArg(1);
m_base_pc = fn->getArg(2);
m_ir->SetInsertPoint(llvm::BasicBlock::Create(m_context, "", fn));
m_memptr = m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::memory_base_addr));
m_memptr = m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(OFFSET_OF(spu_thread, memory_base_addr)));
// Load registers at the entry chunk
for (u32 i = 0; i < s_reg_max; i++)
@ -452,7 +452,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto fail = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_base_pc, m_ir->getInt32(m_base)), next, fail);
m_ir->SetInsertPoint(fail);
m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
tail_chunk(nullptr);
m_ir->SetInsertPoint(next);
}
@ -490,7 +490,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
{
ensure(!m_finfo->fn);
m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
}
else
{
@ -539,16 +539,16 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return m_ir->CreateGEP(get_type<u8>(), base, offset);
}
template <typename T, typename... Args>
llvm::Value* spu_ptr(Args... offset_args)
template <typename T>
llvm::Value* spu_ptr(std::uint32_t offset)
{
return _ptr<T>(m_thread, ::offset32(offset_args...));
return _ptr<T>(m_thread, offset);
}
template <typename T, typename... Args>
llvm::Value* spu_ptr(value_t<u64> add, Args... offset_args)
template <typename T>
llvm::Value* spu_ptr(value_t<u64> add, std::uint32_t offset)
{
const auto off = m_ir->CreateGEP(get_type<u8>(), m_thread, m_ir->getInt64(::offset32(offset_args...)));
const auto off = m_ir->CreateGEP(get_type<u8>(), m_thread, m_ir->getInt64(offset));
return m_ir->CreateAdd(off, add.value);
}
@ -578,15 +578,15 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
{
if (index < 128)
{
return ::offset32(&spu_thread::gpr, index);
return OFFSET_OF(spu_thread, gpr[index]);
}
switch (index)
{
case s_reg_mfc_eal: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::eal);
case s_reg_mfc_lsa: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::lsa);
case s_reg_mfc_tag: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::tag);
case s_reg_mfc_size: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::size);
case s_reg_mfc_eal: return OFFSET_OF(spu_thread, ch_mfc_cmd.eal);
case s_reg_mfc_lsa: return OFFSET_OF(spu_thread, ch_mfc_cmd.lsa);
case s_reg_mfc_tag: return OFFSET_OF(spu_thread, ch_mfc_cmd.tag);
case s_reg_mfc_size: return OFFSET_OF(spu_thread, ch_mfc_cmd.size);
default:
fmt::throw_exception("get_reg_offset(%u): invalid register index", index);
}
@ -1049,13 +1049,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Update PC for current or explicitly specified instruction address
void update_pc(u32 target = -1)
{
m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr<u32>(&spu_thread::pc))->setVolatile(true);
m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr<u32>(OFFSET_OF(spu_thread, pc)))->setVolatile(true);
}
// Call cpu_thread::check_state if necessary and return or continue (full check)
void check_state(u32 addr, bool may_be_unsafe_for_savestate = true)
{
const auto pstate = spu_ptr<u32>(&spu_thread::state);
const auto pstate = spu_ptr<u32>(OFFSET_OF(spu_thread, state));
const auto _body = llvm::BasicBlock::Create(m_context, "", m_function);
const auto check = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_ir->CreateLoad(get_type<u32>(), pstate, true), m_ir->getInt32(0)), _body, check, m_md_likely);
@ -1069,14 +1069,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (may_be_unsafe_for_savestate)
{
m_ir->CreateStore(m_ir->getInt8(1), spu_ptr<u8>(&spu_thread::unsavable))->setVolatile(true);
m_ir->CreateStore(m_ir->getInt8(1), spu_ptr<u8>(OFFSET_OF(spu_thread, unsavable)))->setVolatile(true);
}
m_ir->CreateCall(m_test_state, {m_thread});
if (may_be_unsafe_for_savestate)
{
m_ir->CreateStore(m_ir->getInt8(0), spu_ptr<u8>(&spu_thread::unsavable))->setVolatile(true);
m_ir->CreateStore(m_ir->getInt8(0), spu_ptr<u8>(OFFSET_OF(spu_thread, unsavable)))->setVolatile(true);
}
m_ir->CreateBr(_body);
@ -1145,7 +1145,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto _final = llvm::BasicBlock::Create(m_context, "__putllc16_final", m_function);
const auto _eal = (get_reg_fixed<u32>(s_reg_mfc_eal) & -128).eval(m_ir);
const auto _raddr = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::raddr));
const auto _raddr = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, raddr)));
m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpEQ(_eal, _raddr), m_ir->CreateIsNotNull(_raddr)), _raddr_match, _fail, m_md_likely);
m_ir->SetInsertPoint(_raddr_match);
@ -1259,7 +1259,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_ir->SetInsertPoint(_fail);
call("PUTLLC16_fail", +on_fail, m_thread, _eal);
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_FAILURE), spu_ptr<u64>(&spu_thread::ch_atomic_stat));
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_FAILURE), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_atomic_stat)));
m_ir->CreateBr(_final);
m_ir->SetInsertPoint(_final);
@ -1269,7 +1269,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto diff = m_ir->CreateZExt(m_ir->CreateSub(dest, _lsa), get_type<u64>());
const auto _new = m_ir->CreateAlignedLoad(get_type<u128>(), _ptr<u128>(m_lsptr, dest), llvm::MaybeAlign{16});
const auto _rdata = m_ir->CreateAlignedLoad(get_type<u128>(), _ptr<u128>(spu_ptr<u8>(&spu_thread::rdata), m_ir->CreateAnd(diff, 0x70)), llvm::MaybeAlign{16});
const auto _rdata = m_ir->CreateAlignedLoad(get_type<u128>(), _ptr<u128>(spu_ptr<u8>(OFFSET_OF(spu_thread, rdata)), m_ir->CreateAnd(diff, 0x70)), llvm::MaybeAlign{16});
const bool is_accurate_op = !!g_cfg.core.spu_accurate_reservations;
@ -1289,8 +1289,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Touch memory (on the opposite side of the page)
m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, _ptr<u8>(m_memptr, m_ir->CreateXor(_eal, 4096 / 2)), m_ir->getInt8(0), llvm::MaybeAlign{16}, llvm::AtomicOrdering::SequentiallyConsistent);
const auto rptr = _ptr<u64>(m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::reserv_base_addr)), ((eal_val & 0xff80) >> 1).eval(m_ir));
const auto rtime = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::rtime));
const auto rptr = _ptr<u64>(m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(OFFSET_OF(spu_thread, reserv_base_addr))), ((eal_val & 0xff80) >> 1).eval(m_ir));
const auto rtime = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(OFFSET_OF(spu_thread, rtime)));
m_ir->CreateBr(_repeat_lock);
m_ir->SetInsertPoint(_repeat_lock);
@ -1313,7 +1313,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_ir->SetInsertPoint(_lock_success);
// Commit 16 bytes compare-exchange
const auto sudo_ptr = _ptr<u8>(m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::memory_sudo_addr)), _eal);
const auto sudo_ptr = _ptr<u8>(m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(OFFSET_OF(spu_thread, memory_sudo_addr))), _eal);
m_ir->CreateCondBr(
m_ir->CreateExtractValue(m_ir->CreateAtomicCmpXchg(_ptr<u128>(sudo_ptr, diff), _rdata, _new, llvm::MaybeAlign{16}, llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering::SequentiallyConsistent), 1), _success_and_unlock, _fail_and_unlock);
@ -1331,13 +1331,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Perform unlocked vm::reservation_update if no physical memory changes needed
m_ir->SetInsertPoint(_inc_res);
const auto rptr2 = _ptr<u64>(m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::reserv_base_addr)), ((eal_val & 0xff80) >> 1).eval(m_ir));
const auto rptr2 = _ptr<u64>(m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(OFFSET_OF(spu_thread, reserv_base_addr))), ((eal_val & 0xff80) >> 1).eval(m_ir));
llvm::Value* old_val{};
if (true || is_accurate_op)
{
old_val = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::rtime));
old_val = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(OFFSET_OF(spu_thread, rtime)));
}
else
{
@ -1358,8 +1358,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}
m_ir->SetInsertPoint(_success);
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_SUCCESS), spu_ptr<u64>(&spu_thread::ch_atomic_stat));
m_ir->CreateStore(m_ir->getInt32(0), spu_ptr<u32>(&spu_thread::raddr));
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_SUCCESS), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_atomic_stat)));
m_ir->CreateStore(m_ir->getInt32(0), spu_ptr<u32>(OFFSET_OF(spu_thread, raddr)));
m_ir->CreateBr(_final);
m_ir->SetInsertPoint(_fail_and_unlock);
@ -1368,7 +1368,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_ir->SetInsertPoint(_fail);
call("PUTLLC16_fail", +on_fail, m_thread, _eal);
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_FAILURE), spu_ptr<u64>(&spu_thread::ch_atomic_stat));
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_FAILURE), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_atomic_stat)));
m_ir->CreateBr(_final);
m_ir->SetInsertPoint(_final);
@ -1408,7 +1408,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto _final = llvm::BasicBlock::Create(m_context, "", m_function);
const auto _eal = (get_reg_fixed<u32>(s_reg_mfc_eal) & -128).eval(m_ir);
const auto _raddr = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::raddr));
const auto _raddr = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, raddr)));
m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpEQ(_eal, _raddr), m_ir->CreateIsNotNull(_raddr)), _next, _fail, m_md_likely);
m_ir->SetInsertPoint(_next);
@ -1416,23 +1416,23 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
value_t<u32> eal_val;
eal_val.value = _eal;
const auto rptr = _ptr<u64>(m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::reserv_base_addr)), ((eal_val & 0xff80) >> 1).eval(m_ir));
const auto rval = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::rtime));
const auto rptr = _ptr<u64>(m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(OFFSET_OF(spu_thread, reserv_base_addr))), ((eal_val & 0xff80) >> 1).eval(m_ir));
const auto rval = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(OFFSET_OF(spu_thread, rtime)));
m_ir->CreateCondBr(
m_ir->CreateExtractValue(m_ir->CreateAtomicCmpXchg(rptr, rval, m_ir->CreateAdd(rval, m_ir->getInt64(128)), llvm::MaybeAlign{16}, llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering::SequentiallyConsistent), 1), _next0, g_cfg.core.spu_accurate_reservations ? _fail : _next0); // Succeed unconditionally
m_ir->SetInsertPoint(_next0);
// call("atomic_wait_engine::notify_all", static_cast<void(*)(const void*)>(atomic_wait_engine::notify_all), rptr);
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_SUCCESS), spu_ptr<u64>(&spu_thread::ch_atomic_stat));
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_SUCCESS), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_atomic_stat)));
m_ir->CreateBr(_final);
m_ir->SetInsertPoint(_fail);
call("PUTLLC0_fail", +on_fail, m_thread, _eal);
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_FAILURE), spu_ptr<u64>(&spu_thread::ch_atomic_stat));
m_ir->CreateStore(m_ir->getInt64(spu_channel::bit_count | MFC_PUTLLC_FAILURE), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_atomic_stat)));
m_ir->CreateBr(_final);
m_ir->SetInsertPoint(_final);
m_ir->CreateStore(m_ir->getInt32(0), spu_ptr<u32>(&spu_thread::raddr));
m_ir->CreateStore(m_ir->getInt32(0), spu_ptr<u32>(OFFSET_OF(spu_thread, raddr)));
}
public:
@ -1470,7 +1470,7 @@ public:
{
.debug_info = false, // Set to "true" to insert debug frames on x27
.use_stack_frames = false, // We don't need this since the SPU GW allocates global scratch on the stack
.hypervisor_context_offset = ::offset32(&spu_thread::hv_ctx),
.hypervisor_context_offset = OFFSET_OF(spu_thread, hv_ctx),
.exclusion_callback = should_exclude_function,
.base_register_lookup = {} // Unused, always x19 on SPU
};
@ -1618,10 +1618,10 @@ public:
const auto label_stop = BasicBlock::Create(m_context, "", m_function);
// Load PC, which will be the actual value of 'm_base'
m_base_pc = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::pc));
m_base_pc = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
// Emit state check
const auto pstate = spu_ptr<u32>(&spu_thread::state);
const auto pstate = spu_ptr<u32>(OFFSET_OF(spu_thread, state));
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u32>(), pstate), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely);
// Emit code check
@ -1630,7 +1630,7 @@ public:
// Set block hash for profiling (if enabled)
if (g_cfg.core.spu_prof && g_cfg.core.spu_verification)
m_ir->CreateStore(m_ir->getInt64((m_hash_start & -65536)), spu_ptr<u64>(&spu_thread::block_hash));
m_ir->CreateStore(m_ir->getInt64((m_hash_start & -65536)), spu_ptr<u64>(OFFSET_OF(spu_thread, block_hash)));
if (!g_cfg.core.spu_verification)
{
@ -1893,7 +1893,7 @@ public:
// Increase block counter with statistics
m_ir->SetInsertPoint(label_body);
const auto pbcount = spu_ptr<u64>(&spu_thread::block_counter);
const auto pbcount = spu_ptr<u64>(OFFSET_OF(spu_thread, block_counter));
m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(get_type<u64>(), pbcount), m_ir->getInt64(check_iterations)), pbcount);
// Call the entry function chunk
@ -1927,7 +1927,7 @@ public:
if (g_cfg.core.spu_verification)
{
const auto pbfail = spu_ptr<u64>(&spu_thread::block_failure);
const auto pbfail = spu_ptr<u64>(OFFSET_OF(spu_thread, block_failure));
m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(get_type<u64>(), pbfail), m_ir->getInt64(1)), pbfail);
const auto dispci = call("spu_dispatch", spu_runtime::tr_dispatch, m_thread, m_lsptr, main_arg2);
dispci->setCallingConv(CallingConv::GHC);
@ -1987,7 +1987,7 @@ public:
// Set block hash for profiling (if enabled)
if (g_cfg.core.spu_prof)
m_ir->CreateStore(m_ir->getInt64((m_hash_start & -65536) | (m_entry >> 2)), spu_ptr<u64>(&spu_thread::block_hash));
m_ir->CreateStore(m_ir->getInt64((m_hash_start & -65536) | (m_entry >> 2)), spu_ptr<u64>(OFFSET_OF(spu_thread, block_hash)));
m_finfo = &m_functions[m_entry];
m_ir->CreateBr(add_block(m_entry));
@ -2918,7 +2918,7 @@ public:
set_function(main_func);
// Load pc and opcode
m_interp_pc = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::pc));
m_interp_pc = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
m_interp_op = m_ir->CreateLoad(get_type<u32>(), m_ir->CreateGEP(get_type<u8>(), m_lsptr, m_ir->CreateZExt(m_interp_pc, get_type<u64>())));
m_interp_op = m_ir->CreateCall(get_intrinsic<u32>(Intrinsic::bswap), {m_interp_op});
@ -2932,7 +2932,7 @@ public:
m_interp_regs = _ptr(m_thread, get_reg_offset(0));
// Save host thread's stack pointer
const auto native_sp = spu_ptr<u64>(&spu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
const auto native_sp = spu_ptr<u64>(OFFSET_OF(spu_thread, hv_ctx.regs));
#if defined(ARCH_X64)
const auto rsp_name = MetadataAsValue::get(m_context, MDNode::get(m_context, {MDString::get(m_context, "rsp")}));
#elif defined(ARCH_ARM64)
@ -3018,7 +3018,7 @@ public:
m_interp_regs = f->getArg(6);
m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", f));
m_memptr = m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(&spu_thread::memory_base_addr));
m_memptr = m_ir->CreateLoad(get_type<u8*>(), spu_ptr<u8*>(OFFSET_OF(spu_thread, memory_base_addr)));
switch (itype)
{
@ -3034,7 +3034,7 @@ public:
case spu_itype::WRCH:
{
// Invalid or abortable instruction. Save current address.
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
[[fallthrough]];
}
default:
@ -3078,7 +3078,7 @@ public:
{
if (check)
{
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
}
// Decode next instruction.
@ -3115,9 +3115,9 @@ public:
{
const auto _stop = BasicBlock::Create(m_context, "", f);
const auto _next = BasicBlock::Create(m_context, "", f);
m_ir->CreateCondBr(m_ir->CreateIsNotNull(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::state))), _stop, _next, m_md_unlikely);
m_ir->CreateCondBr(m_ir->CreateIsNotNull(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, state)))), _stop, _next, m_md_unlikely);
m_ir->SetInsertPoint(_stop);
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
const auto escape_yes = BasicBlock::Create(m_context, "", f);
const auto escape_no = BasicBlock::Create(m_context, "", f);
@ -3171,7 +3171,7 @@ public:
// Call next instruction.
const auto _stop = BasicBlock::Create(m_context, "", f);
const auto _next = BasicBlock::Create(m_context, "", f);
m_ir->CreateCondBr(m_ir->CreateIsNotNull(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::state))), _stop, _next, m_md_unlikely);
m_ir->CreateCondBr(m_ir->CreateIsNotNull(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, state)))), _stop, _next, m_md_unlikely);
m_ir->SetInsertPoint(_next);
if (itype == spu_itype::WRCH ||
@ -3189,7 +3189,7 @@ public:
ncall->setTailCall();
m_ir->CreateRetVoid();
m_ir->SetInsertPoint(_stop);
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall();
m_ir->CreateRetVoid();
}
@ -3314,7 +3314,7 @@ public:
{
if (m_interp_magn)
{
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
call("spu_unknown", &exec_unk, m_thread, m_ir->getInt32(op_unk.opcode));
return;
}
@ -3469,7 +3469,7 @@ public:
{
case SPU_RdSRR0:
{
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::srr0));
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, srr0)));
break;
}
case SPU_RdInMbox:
@ -3481,36 +3481,36 @@ public:
}
case MFC_RdTagStat:
{
res.value = get_rdch(op, ::offset32(&spu_thread::ch_tag_stat), false);
res.value = get_rdch(op, OFFSET_OF(spu_thread, ch_tag_stat), false);
break;
}
case MFC_RdTagMask:
{
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_tag_mask));
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, ch_tag_mask)));
break;
}
case SPU_RdSigNotify1:
{
update_pc();
ensure_gpr_stores();
res.value = get_rdch(op, ::offset32(&spu_thread::ch_snr1), true);
res.value = get_rdch(op, OFFSET_OF(spu_thread, ch_snr1), true);
break;
}
case SPU_RdSigNotify2:
{
update_pc();
ensure_gpr_stores();
res.value = get_rdch(op, ::offset32(&spu_thread::ch_snr2), true);
res.value = get_rdch(op, OFFSET_OF(spu_thread, ch_snr2), true);
break;
}
case MFC_RdAtomicStat:
{
res.value = get_rdch(op, ::offset32(&spu_thread::ch_atomic_stat), false);
res.value = get_rdch(op, OFFSET_OF(spu_thread, ch_atomic_stat), false);
break;
}
case MFC_RdListStallStat:
{
res.value = get_rdch(op, ::offset32(&spu_thread::ch_stall_stat), false);
res.value = get_rdch(op, OFFSET_OF(spu_thread, ch_stall_stat), false);
break;
}
case SPU_RdDec:
@ -3519,13 +3519,13 @@ public:
if (utils::get_tsc_freq() && !(g_cfg.core.spu_loop_detection) && (g_cfg.core.clocks_scale == 100))
{
const auto timebase_offs = m_ir->CreateLoad(get_type<u64>(), m_ir->CreateIntToPtr(m_ir->getInt64(reinterpret_cast<u64>(&g_timebase_offs)), get_type<u64*>()));
const auto timestamp = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_dec_start_timestamp));
const auto dec_value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_dec_value));
const auto timestamp = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_dec_start_timestamp)));
const auto dec_value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, ch_dec_value)));
const auto tsc = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_rdtsc));
const auto tscx = m_ir->CreateMul(m_ir->CreateUDiv(tsc, m_ir->getInt64(utils::get_tsc_freq())), m_ir->getInt64(80000000));
const auto tscm = m_ir->CreateUDiv(m_ir->CreateMul(m_ir->CreateURem(tsc, m_ir->getInt64(utils::get_tsc_freq())), m_ir->getInt64(80000000)), m_ir->getInt64(utils::get_tsc_freq()));
const auto tsctb = m_ir->CreateSub(m_ir->CreateAdd(tscx, tscm), timebase_offs);
const auto frz = m_ir->CreateLoad(get_type<u8>(), spu_ptr<u8>(&spu_thread::is_dec_frozen));
const auto frz = m_ir->CreateLoad(get_type<u8>(), spu_ptr<u8>(OFFSET_OF(spu_thread, is_dec_frozen)));
const auto frzev = m_ir->CreateICmpEQ(frz, m_ir->getInt8(0));
const auto delta = m_ir->CreateTrunc(m_ir->CreateSub(tsctb, timestamp), get_type<u32>());
@ -3539,7 +3539,7 @@ public:
}
case SPU_RdEventMask:
{
const auto value = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events));
const auto value = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_events)));
value->setAtomic(llvm::AtomicOrdering::Acquire);
res.value = m_ir->CreateTrunc(m_ir->CreateLShr(value, 32), get_type<u32>());
break;
@ -3554,22 +3554,22 @@ public:
}
else
{
m_ir->CreateStore(m_ir->getInt8(1), spu_ptr<u8>(&spu_thread::unsavable));
m_ir->CreateStore(m_ir->getInt8(1), spu_ptr<u8>(OFFSET_OF(spu_thread, unsavable)));
}
res.value = call("spu_read_events", &exec_read_events, m_thread);
if (!g_cfg.savestate.compatible_mode)
{
m_ir->CreateStore(m_ir->getInt8(0), spu_ptr<u8>(&spu_thread::unsavable));
m_ir->CreateStore(m_ir->getInt8(0), spu_ptr<u8>(OFFSET_OF(spu_thread, unsavable)));
}
break;
}
case SPU_RdMachStat:
{
res.value = m_ir->CreateZExt(m_ir->CreateLoad(get_type<u8>(), spu_ptr<u8>(&spu_thread::interrupts_enabled)), get_type<u32>());
res.value = m_ir->CreateOr(res.value, m_ir->CreateAnd(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::thread_type)), m_ir->getInt32(2)));
res.value = m_ir->CreateZExt(m_ir->CreateLoad(get_type<u8>(), spu_ptr<u8>(OFFSET_OF(spu_thread, interrupts_enabled))), get_type<u32>());
res.value = m_ir->CreateOr(res.value, m_ir->CreateAnd(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, thread_type))), m_ir->getInt32(2)));
break;
}
@ -3673,22 +3673,22 @@ public:
{
case SPU_WrOutMbox:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_mbox), true);
res.value = wait_rchcnt(OFFSET_OF(spu_thread, ch_out_mbox), true);
break;
}
case SPU_WrOutIntrMbox:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_intr_mbox), true);
res.value = wait_rchcnt(OFFSET_OF(spu_thread, ch_out_intr_mbox), true);
break;
}
case SPU_RdSigNotify1:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_snr1));
res.value = wait_rchcnt(OFFSET_OF(spu_thread, ch_snr1));
break;
}
case SPU_RdSigNotify2:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_snr2));
res.value = wait_rchcnt(OFFSET_OF(spu_thread, ch_snr2));
break;
}
case SPU_RdInMbox:
@ -3698,7 +3698,7 @@ public:
return ch->pop_wait(*_spu, false), ch->get_count();
};
res.value = call("wait_spu_inbox", +wait_inbox, m_thread, spu_ptr<void*>(&spu_thread::ch_in_mbox));
res.value = call("wait_spu_inbox", +wait_inbox, m_thread, spu_ptr<void*>(OFFSET_OF(spu_thread, ch_in_mbox)));
break;
}
default: break;
@ -3715,37 +3715,37 @@ public:
{
case SPU_WrOutMbox:
{
res.value = get_rchcnt(::offset32(&spu_thread::ch_out_mbox), true);
res.value = get_rchcnt(OFFSET_OF(spu_thread, ch_out_mbox), true);
break;
}
case SPU_WrOutIntrMbox:
{
res.value = get_rchcnt(::offset32(&spu_thread::ch_out_intr_mbox), true);
res.value = get_rchcnt(OFFSET_OF(spu_thread, ch_out_intr_mbox), true);
break;
}
case MFC_RdTagStat:
{
res.value = get_rchcnt(::offset32(&spu_thread::ch_tag_stat));
res.value = get_rchcnt(OFFSET_OF(spu_thread, ch_tag_stat));
break;
}
case MFC_RdListStallStat:
{
res.value = get_rchcnt(::offset32(&spu_thread::ch_stall_stat));
res.value = get_rchcnt(OFFSET_OF(spu_thread, ch_stall_stat));
break;
}
case SPU_RdSigNotify1:
{
res.value = get_rchcnt(::offset32(&spu_thread::ch_snr1));
res.value = get_rchcnt(OFFSET_OF(spu_thread, ch_snr1));
break;
}
case SPU_RdSigNotify2:
{
res.value = get_rchcnt(::offset32(&spu_thread::ch_snr2));
res.value = get_rchcnt(OFFSET_OF(spu_thread, ch_snr2));
break;
}
case MFC_RdAtomicStat:
{
res.value = get_rchcnt(::offset32(&spu_thread::ch_atomic_stat));
res.value = get_rchcnt(OFFSET_OF(spu_thread, ch_atomic_stat));
break;
}
case MFC_WrTagUpdate:
@ -3755,13 +3755,13 @@ public:
}
case MFC_Cmd:
{
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size));
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, mfc_size)));
res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value);
break;
}
case SPU_RdInMbox:
{
const auto value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox));
const auto value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, ch_in_mbox)));
value->setAtomic(llvm::AtomicOrdering::Acquire);
res.value = value;
res.value = m_ir->CreateLShr(res.value, 8);
@ -3770,7 +3770,7 @@ public:
}
case SPU_RdEventStat:
{
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events)), 32), get_type<u32>());
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_events))), 32), get_type<u32>());
res.value = call("spu_get_events", &exec_get_events, m_thread, mask);
break;
}
@ -3868,7 +3868,7 @@ public:
{
case SPU_WrSRR0:
{
m_ir->CreateStore(eval(val & 0x3fffc).value, spu_ptr<u32>(&spu_thread::srr0));
m_ir->CreateStore(eval(val & 0x3fffc).value, spu_ptr<u32>(OFFSET_OF(spu_thread, srr0)));
return;
}
case SPU_WrOutIntrMbox:
@ -3884,10 +3884,10 @@ public:
case MFC_WrTagMask:
{
// TODO
m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_tag_mask));
m_ir->CreateStore(val.value, spu_ptr<u32>(OFFSET_OF(spu_thread, ch_tag_mask)));
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
const auto _mfc = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_tag_upd)), m_ir->getInt32(MFC_TAG_UPDATE_IMMEDIATE)), _mfc, next);
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, ch_tag_upd))), m_ir->getInt32(MFC_TAG_UPDATE_IMMEDIATE)), _mfc, next);
m_ir->SetInsertPoint(_mfc);
update_pc();
call("spu_write_channel", &exec_wrch, m_thread, m_ir->getInt32(op.ra), val.value);
@ -3899,11 +3899,11 @@ public:
{
if (true)
{
const auto tag_mask = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_tag_mask));
const auto mfc_fence = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_fence));
const auto tag_mask = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, ch_tag_mask)));
const auto mfc_fence = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, mfc_fence)));
const auto completed = m_ir->CreateAnd(tag_mask, m_ir->CreateNot(mfc_fence));
const auto upd_ptr = spu_ptr<u32>(&spu_thread::ch_tag_upd);
const auto stat_ptr = spu_ptr<u64>(&spu_thread::ch_tag_stat);
const auto upd_ptr = spu_ptr<u32>(OFFSET_OF(spu_thread, ch_tag_upd));
const auto stat_ptr = spu_ptr<u64>(OFFSET_OF(spu_thread, ch_tag_stat));
const auto stat_val = m_ir->CreateOr(m_ir->CreateZExt(completed, get_type<u64>()), s64{smin});
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
@ -3955,7 +3955,7 @@ public:
}
spu_log.warning("[0x%x] MFC_EAH: $%u is not a zero constant", m_pos, +op.rt);
// m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::eah));
// m_ir->CreateStore(val.value, spu_ptr<u32>(OFFSET_OF(spu_thread, ch_mfc_cmd.eah)));
return;
}
case MFC_EAL:
@ -4009,8 +4009,8 @@ public:
const auto fail = llvm::BasicBlock::Create(m_context, "", m_function);
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
const auto pf = spu_ptr<u32>(&spu_thread::mfc_fence);
const auto pb = spu_ptr<u32>(&spu_thread::mfc_barrier);
const auto pf = spu_ptr<u32>(OFFSET_OF(spu_thread, mfc_fence));
const auto pb = spu_ptr<u32>(OFFSET_OF(spu_thread, mfc_barrier));
switch (u64 cmd = ci->getZExtValue())
{
@ -4035,7 +4035,7 @@ public:
m_ir->SetInsertPoint(fail);
m_ir->CreateUnreachable();
m_ir->SetInsertPoint(next);
m_ir->CreateStore(ci, spu_ptr<u8>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::cmd));
m_ir->CreateStore(ci, spu_ptr<u8>(OFFSET_OF(spu_thread, ch_mfc_cmd.cmd)));
update_pc();
ensure_gpr_stores();
call("spu_exec_mfc_cmd_saveable", &exec_mfc_cmd<true>, m_thread);
@ -4054,7 +4054,7 @@ public:
m_ir->SetInsertPoint(fail);
m_ir->CreateUnreachable();
m_ir->SetInsertPoint(next);
m_ir->CreateStore(ci, spu_ptr<u8>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::cmd));
m_ir->CreateStore(ci, spu_ptr<u8>(OFFSET_OF(spu_thread, ch_mfc_cmd.cmd)));
update_pc();
call("spu_exec_mfc_cmd", &exec_mfc_cmd<false>, m_thread);
return;
@ -4114,7 +4114,7 @@ public:
m_ir->SetInsertPoint(mmio);
}
m_ir->CreateStore(ci, spu_ptr<u8>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::cmd));
m_ir->CreateStore(ci, spu_ptr<u8>(OFFSET_OF(spu_thread, ch_mfc_cmd.cmd)));
call("spu_exec_mfc_cmd", &exec_mfc_cmd<false>, m_thread);
m_ir->CreateBr(next);
m_ir->SetInsertPoint(copy);
@ -4206,7 +4206,7 @@ public:
}
// Disable certain thing
m_ir->CreateStore(m_ir->getInt32(0), spu_ptr<u32>(&spu_thread::last_faddr));
m_ir->CreateStore(m_ir->getInt32(0), spu_ptr<u32>(OFFSET_OF(spu_thread, last_faddr)));
m_ir->CreateBr(next);
break;
}
@ -4214,7 +4214,7 @@ public:
case MFC_EIEIO_CMD:
case MFC_SYNC_CMD:
{
const auto cond = m_ir->CreateIsNull(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size)));
const auto cond = m_ir->CreateIsNull(m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, mfc_size))));
m_ir->CreateCondBr(cond, exec, fail, m_md_likely);
m_ir->SetInsertPoint(exec);
m_ir->CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
@ -4236,12 +4236,12 @@ public:
m_ir->SetInsertPoint(fail);
// Get MFC slot, redirect to invalid memory address
const auto slot = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size));
const auto off0 = m_ir->CreateAdd(m_ir->CreateMul(slot, m_ir->getInt32(sizeof(spu_mfc_cmd))), m_ir->getInt32(::offset32(&spu_thread::mfc_queue)));
const auto slot = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, mfc_size)));
const auto off0 = m_ir->CreateAdd(m_ir->CreateMul(slot, m_ir->getInt32(sizeof(spu_mfc_cmd))), m_ir->getInt32(OFFSET_OF(spu_thread, mfc_queue)));
const auto ptr0 = m_ir->CreateGEP(get_type<u8>(), m_thread, m_ir->CreateZExt(off0, get_type<u64>()));
const auto ptr1 = m_ir->CreateGEP(get_type<u8>(), m_memptr, m_ir->getInt64(0xffdeadf0));
const auto pmfc = m_ir->CreateSelect(m_ir->CreateICmpULT(slot, m_ir->getInt32(16)), ptr0, ptr1);
m_ir->CreateStore(ci, _ptr<u8>(pmfc, ::offset32(&spu_mfc_cmd::cmd)));
m_ir->CreateStore(ci, _ptr<u8>(pmfc, OFFSET_OF(spu_mfc_cmd, cmd)));
switch (u64 cmd = ci->getZExtValue())
{
@ -4281,10 +4281,10 @@ public:
case MFC_GETB_CMD:
case MFC_GETF_CMD:
{
m_ir->CreateStore(tag.value, _ptr<u8>(pmfc, ::offset32(&spu_mfc_cmd::tag)));
m_ir->CreateStore(size.value, _ptr<u16>(pmfc, ::offset32(&spu_mfc_cmd::size)));
m_ir->CreateStore(lsa.value, _ptr<u32>(pmfc, ::offset32(&spu_mfc_cmd::lsa)));
m_ir->CreateStore(eal.value, _ptr<u32>(pmfc, ::offset32(&spu_mfc_cmd::eal)));
m_ir->CreateStore(tag.value, _ptr<u8>(pmfc, OFFSET_OF(spu_mfc_cmd, tag)));
m_ir->CreateStore(size.value, _ptr<u16>(pmfc, OFFSET_OF(spu_mfc_cmd, size)));
m_ir->CreateStore(lsa.value, _ptr<u32>(pmfc, OFFSET_OF(spu_mfc_cmd, lsa)));
m_ir->CreateStore(eal.value, _ptr<u32>(pmfc, OFFSET_OF(spu_mfc_cmd, eal)));
m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(get_type<u32>(), pf), mask), pf);
if (cmd & MFC_BARRIER_MASK)
m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(get_type<u32>(), pb), mask), pb);
@ -4305,7 +4305,7 @@ public:
}
}
m_ir->CreateStore(m_ir->CreateAdd(slot, m_ir->getInt32(1)), spu_ptr<u32>(&spu_thread::mfc_size));
m_ir->CreateStore(m_ir->CreateAdd(slot, m_ir->getInt32(1)), spu_ptr<u32>(OFFSET_OF(spu_thread, mfc_size)));
m_ir->CreateBr(next);
m_ir->SetInsertPoint(next);
return;
@ -4318,7 +4318,7 @@ public:
case MFC_WrListStallAck:
{
const auto mask = eval(splat<u32>(1) << (val & 0x1f));
const auto _ptr = spu_ptr<u32>(&spu_thread::ch_stall_mask);
const auto _ptr = spu_ptr<u32>(OFFSET_OF(spu_thread, ch_stall_mask));
const auto _old = m_ir->CreateLoad(get_type<u32>(), _ptr);
const auto _new = m_ir->CreateAnd(_old, m_ir->CreateNot(mask.value));
m_ir->CreateStore(_new, _ptr);
@ -4345,16 +4345,16 @@ public:
const auto tscx = m_ir->CreateMul(m_ir->CreateUDiv(tsc, m_ir->getInt64(utils::get_tsc_freq())), m_ir->getInt64(80000000));
const auto tscm = m_ir->CreateUDiv(m_ir->CreateMul(m_ir->CreateURem(tsc, m_ir->getInt64(utils::get_tsc_freq())), m_ir->getInt64(80000000)), m_ir->getInt64(utils::get_tsc_freq()));
const auto tsctb = m_ir->CreateSub(m_ir->CreateAdd(tscx, tscm), timebase_offs);
m_ir->CreateStore(tsctb, spu_ptr<u64>(&spu_thread::ch_dec_start_timestamp));
m_ir->CreateStore(tsctb, spu_ptr<u64>(OFFSET_OF(spu_thread, ch_dec_start_timestamp)));
}
else
#endif
{
m_ir->CreateStore(call("get_timebased_time", &get_timebased_time), spu_ptr<u64>(&spu_thread::ch_dec_start_timestamp));
m_ir->CreateStore(call("get_timebased_time", &get_timebased_time), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_dec_start_timestamp)));
}
m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_dec_value));
m_ir->CreateStore(m_ir->getInt8(0), spu_ptr<u8>(&spu_thread::is_dec_frozen));
m_ir->CreateStore(val.value, spu_ptr<u32>(OFFSET_OF(spu_thread, ch_dec_value)));
m_ir->CreateStore(m_ir->getInt8(0), spu_ptr<u8>(OFFSET_OF(spu_thread, is_dec_frozen)));
return;
}
case SPU_Set_Bkmk_Tag:
@ -7641,7 +7641,7 @@ public:
m_ir->CreateCondBr(cond.value, halt, next, m_md_unlikely);
m_ir->SetInsertPoint(halt);
if (m_interp_magn)
m_ir->CreateStore(m_function->getArg(2), spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(m_function->getArg(2), spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
else
update_pc();
const auto ptr = _ptr<u32>(m_memptr, 0xffdead00);
@ -7748,7 +7748,7 @@ public:
target->addIncoming(e_addr, e_exec);
m_ir->CreateCondBr(get_imm<bool>(op.d).value, d_exec, d_done, m_md_unlikely);
m_ir->SetInsertPoint(d_exec);
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(OFFSET_OF(spu_thread, interrupts_enabled)));
m_ir->CreateBr(d_done);
m_ir->SetInsertPoint(d_done);
m_ir->CreateBr(m_interp_bblock);
@ -7784,7 +7784,7 @@ public:
}
else
{
sp.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::gpr, 1, &v128::_u32, 3));
sp.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, gpr[1]._u32[3])));
}
}
@ -7799,15 +7799,15 @@ public:
if (op.d)
{
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(OFFSET_OF(spu_thread, interrupts_enabled)));
}
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(addr.value, spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
if (ret && g_cfg.core.spu_block_size >= spu_block_size_type::mega)
{
// Compare address stored in stack mirror with addr
const auto stack0 = eval(zext<u64>(sp) + ::offset32(&spu_thread::stack_mirror));
const auto stack0 = eval(zext<u64>(sp) + OFFSET_OF(spu_thread, stack_mirror));
const auto stack1 = eval(stack0 + 8);
const auto _ret = m_ir->CreateLoad(get_type<u64>(), m_ir->CreateGEP(get_type<u8>(), m_thread, stack0.value));
const auto link = m_ir->CreateLoad(get_type<u64>(), m_ir->CreateGEP(get_type<u8>(), m_thread, stack1.value));
@ -8070,7 +8070,7 @@ public:
if (op.d && tfound != m_targets.end() && tfound->second.size() == 1 && tfound->second[0] == spu_branch_target(m_pos, 1))
{
// Interrupts-disable pattern
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(OFFSET_OF(spu_thread, interrupts_enabled)));
return;
}
@ -8130,7 +8130,7 @@ public:
// Exit function on unexpected target
m_ir->SetInsertPoint(sw->getDefaultDest());
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(addr.value, spu_ptr<u32>(OFFSET_OF(spu_thread, pc)));
if (m_finfo && m_finfo->fn)
{
@ -8165,7 +8165,7 @@ public:
if (m_block)
m_block->block_end = m_ir->GetInsertBlock();
value_t<u32> srr0;
srr0.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::srr0));
srr0.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(OFFSET_OF(spu_thread, srr0)));
m_ir->CreateBr(add_block_indirect(op, srr0));
}
@ -8175,7 +8175,7 @@ public:
m_block->block_end = m_ir->GetInsertBlock();
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
set_link(op);
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>());
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(OFFSET_OF(spu_thread, ch_events)), true), 32), get_type<u32>());
const auto res = call("spu_get_events", &exec_get_events, m_thread, mask);
const auto target = add_block_indirect(op, addr);
m_ir->CreateCondBr(m_ir->CreateICmpNE(res, m_ir->getInt32(0)), target, add_block_next());
@ -8507,7 +8507,7 @@ public:
{
// Store the return function chunk address at the stack mirror
const auto pfunc = add_function(m_pos + 4);
const auto stack0 = eval(zext<u64>(extract(get_reg_fixed(1), 3) & 0x3fff0) + ::offset32(&spu_thread::stack_mirror));
const auto stack0 = eval(zext<u64>(extract(get_reg_fixed(1), 3) & 0x3fff0) + OFFSET_OF(spu_thread, stack_mirror));
const auto stack1 = eval(stack0 + 8);
const auto rel_ptr = m_ir->CreateSub(m_ir->CreatePtrToInt(pfunc->chunk, get_type<u64>()), get_segment_base());
const auto ptr_plus_op = m_ir->CreateOr(m_ir->CreateShl(rel_ptr, 32), m_ir->getInt64(m_next_op));

View file

@ -695,7 +695,7 @@ const auto spu_putllc_tx = build_function_asm<u64 (*)(u32 raddr, u64 rtime, void
Label tx1 = build_transaction_enter(c, fall, [&]()
{
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx) - ::offset32(&spu_thread::rdata)), 1);
c.add(x86::qword_ptr(args[2], OFFSET_OF(spu_thread, ftx) - OFFSET_OF(spu_thread, rdata)), 1);
build_get_tsc(c);
c.sub(x86::rax, stamp0);
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit2)));
@ -703,7 +703,7 @@ const auto spu_putllc_tx = build_function_asm<u64 (*)(u32 raddr, u64 rtime, void
});
// Check pause flag
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.bt(x86::dword_ptr(args[2], OFFSET_OF(spu_thread, state) - OFFSET_OF(spu_thread, rdata)), static_cast<u32>(cpu_flag::pause));
c.jc(fall);
c.xbegin(tx1);
@ -761,7 +761,7 @@ const auto spu_putllc_tx = build_function_asm<u64 (*)(u32 raddr, u64 rtime, void
c.xend();
c.lock().add(x86::qword_ptr(x86::r11), 64);
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
c.add(x86::qword_ptr(args[2], OFFSET_OF(spu_thread, stx) - OFFSET_OF(spu_thread, rdata)), 1);
build_get_tsc(c);
c.sub(x86::rax, stamp0);
c.jmp(_ret);
@ -790,7 +790,7 @@ const auto spu_putllc_tx = build_function_asm<u64 (*)(u32 raddr, u64 rtime, void
}
c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
c.add(x86::qword_ptr(args[2], OFFSET_OF(spu_thread, stx) - OFFSET_OF(spu_thread, rdata)), 1);
c.jmp(fail2);
c.bind(fall);
@ -822,7 +822,7 @@ const auto spu_putllc_tx = build_function_asm<u64 (*)(u32 raddr, u64 rtime, void
}
c.mov(x86::rax, -1);
c.mov(x86::qword_ptr(args[2], ::offset32(&spu_thread::last_ftime) - ::offset32(&spu_thread::rdata)), x86::rax);
c.mov(x86::qword_ptr(args[2], OFFSET_OF(spu_thread, last_ftime) - OFFSET_OF(spu_thread, rdata)), x86::rax);
c.xor_(x86::eax, x86::eax);
// c.jmp(_ret);
@ -1031,7 +1031,7 @@ const auto spu_getllar_tx = build_function_asm<u64 (*)(u32 raddr, void* rdata, c
// Begin transaction
Label tx0 = build_transaction_enter(c, fall, [&]()
{
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
c.add(x86::qword_ptr(args[2], OFFSET_OF(spu_thread, ftx)), 1);
build_get_tsc(c);
c.sub(x86::rax, stamp0);
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit1)));
@ -1039,7 +1039,7 @@ const auto spu_getllar_tx = build_function_asm<u64 (*)(u32 raddr, void* rdata, c
});
// Check pause flag
c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
c.bt(x86::dword_ptr(args[2], OFFSET_OF(spu_thread, state)), static_cast<u32>(cpu_flag::pause));
c.jc(fall);
c.mov(x86::rax, x86::qword_ptr(x86::r11));
c.and_(x86::rax, -128);
@ -1068,7 +1068,7 @@ const auto spu_getllar_tx = build_function_asm<u64 (*)(u32 raddr, void* rdata, c
}
c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
c.add(x86::qword_ptr(args[2], OFFSET_OF(spu_thread, stx)), 1);
build_get_tsc(c);
c.sub(x86::rax, stamp0);

View file

@ -1240,7 +1240,7 @@ public:
extern void ppu_execute_syscall(ppu_thread& ppu, u64 code)
{
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm_legacy)
{
code = ppu.gpr[11];
}

View file

@ -62,7 +62,7 @@ CellError lv2_cond::on_id_create()
std::function<void(void*)> lv2_cond::load(utils::serial& ar)
{
return load_func(make_shared<lv2_cond>(stx::exact_t<utils::serial&>(ar)));
return load_func(make_shared<lv2_cond>(exact_t<utils::serial&>(ar)));
}
void lv2_cond::save(utils::serial& ar)

View file

@ -27,7 +27,7 @@ lv2_event_queue::lv2_event_queue(utils::serial& ar) noexcept
std::function<void(void*)> lv2_event_queue::load(utils::serial& ar)
{
auto queue = make_shared<lv2_event_queue>(stx::exact_t<utils::serial&>(ar));
auto queue = make_shared<lv2_event_queue>(exact_t<utils::serial&>(ar));
return [ptr = lv2_obj::load(queue->key, queue)](void* storage)
{
*static_cast<atomic_ptr<lv2_obj>*>(storage) = ptr;

View file

@ -18,7 +18,7 @@ lv2_event_flag::lv2_event_flag(utils::serial& ar)
std::function<void(void*)> lv2_event_flag::load(utils::serial& ar)
{
return load_func(make_shared<lv2_event_flag>(stx::exact_t<utils::serial&>(ar)));
return load_func(make_shared<lv2_event_flag>(exact_t<utils::serial&>(ar)));
}
void lv2_event_flag::save(utils::serial& ar)

View file

@ -27,7 +27,7 @@ lv2_memory_container::lv2_memory_container(utils::serial& ar, bool from_idm) noe
std::function<void(void*)> lv2_memory_container::load(utils::serial& ar)
{
// Use idm::last_id() only for the instances at IDM
return [ptr = make_shared<lv2_memory_container>(stx::exact_t<utils::serial&>(ar), true)](void* storage)
return [ptr = make_shared<lv2_memory_container>(exact_t<utils::serial&>(ar), true)](void* storage)
{
*static_cast<atomic_ptr<lv2_memory_container>*>(storage) = ptr;
};

View file

@ -72,7 +72,7 @@ CellError lv2_memory::on_id_create()
std::function<void(void*)> lv2_memory::load(utils::serial& ar)
{
auto mem = make_shared<lv2_memory>(stx::exact_t<utils::serial&>(ar));
auto mem = make_shared<lv2_memory>(exact_t<utils::serial&>(ar));
mem->exists++; // Disable on_id_create()
auto func = load_func(mem, +mem->pshared);
mem->exists--;

View file

@ -22,7 +22,7 @@ lv2_mutex::lv2_mutex(utils::serial& ar)
std::function<void(void*)> lv2_mutex::load(utils::serial& ar)
{
return load_func(make_shared<lv2_mutex>(stx::exact_t<utils::serial&>(ar)));
return load_func(make_shared<lv2_mutex>(exact_t<utils::serial&>(ar)));
}
void lv2_mutex::save(utils::serial& ar)

View file

@ -20,7 +20,7 @@ lv2_socket_native::lv2_socket_native(lv2_socket_family family, lv2_socket_type t
}
lv2_socket_native::lv2_socket_native(utils::serial& ar, lv2_socket_type type)
: lv2_socket(stx::make_exact(ar), type)
: lv2_socket(make_exact(ar), type)
{
[[maybe_unused]] const s32 version = GET_SERIALIZATION_VERSION(lv2_net);

View file

@ -17,7 +17,7 @@ lv2_socket_p2p::lv2_socket_p2p(lv2_socket_family family, lv2_socket_type type, l
}
lv2_socket_p2p::lv2_socket_p2p(utils::serial& ar, lv2_socket_type type)
: lv2_socket(stx::make_exact(ar), type)
: lv2_socket(make_exact(ar), type)
{
ar(port, vport, bound_addr);

View file

@ -27,7 +27,7 @@ lv2_socket_raw::lv2_socket_raw(lv2_socket_family family, lv2_socket_type type, l
}
lv2_socket_raw::lv2_socket_raw(utils::serial& ar, lv2_socket_type type)
: lv2_socket(stx::make_exact(ar), type)
: lv2_socket(make_exact(ar), type)
{
}

View file

@ -18,7 +18,7 @@ lv2_rwlock::lv2_rwlock(utils::serial& ar)
std::function<void(void*)> lv2_rwlock::load(utils::serial& ar)
{
return load_func(make_shared<lv2_rwlock>(stx::exact_t<utils::serial&>(ar)));
return load_func(make_shared<lv2_rwlock>(exact_t<utils::serial&>(ar)));
}
void lv2_rwlock::save(utils::serial& ar)

View file

@ -18,7 +18,7 @@ lv2_sema::lv2_sema(utils::serial& ar)
std::function<void(void*)> lv2_sema::load(utils::serial& ar)
{
return load_func(make_shared<lv2_sema>(stx::exact_t<utils::serial&>(ar)));
return load_func(make_shared<lv2_sema>(exact_t<utils::serial&>(ar)));
}
void lv2_sema::save(utils::serial& ar)

View file

@ -1,6 +1,7 @@
#include "stdafx.h"
#include "GDB.h"
#include "util/bit_set.h"
#include "util/logs.hpp"
#include "util/StrUtil.h"
#include "Emu/Memory/vm.h"

View file

@ -26,7 +26,7 @@ template <typename T>
concept IdmBaseCompatible = (std::is_final_v<T> ? IdmCompatible<T> : !!(requires() { u32{T::id_step}, u32{T::id_count}; }));
template <typename T>
concept IdmSavable = IdmBaseCompatible<T> && T::savestate_init_pos != 0 && (requires(T& t, utils::serial& ar) { t.save(stx::exact_t<utils::serial&>(ar)); });
concept IdmSavable = IdmBaseCompatible<T> && T::savestate_init_pos != 0 && (requires(T& t, utils::serial& ar) { t.save(exact_t<utils::serial&>(ar)); });
// If id_base is declared in base type, than storage type must declare id_type
template <typename Base, typename Type>
@ -113,13 +113,13 @@ namespace id_manager
static constexpr pointer_keeper (*load)(utils::serial&) = [](utils::serial& ar) -> pointer_keeper {
stx::shared_ptr<T> ptr;
if constexpr (std::is_constructible_v<T, stx::exact_t<const stx::launch_retainer&>, stx::exact_t<utils::serial&>>)
if constexpr (std::is_constructible_v<T, exact_t<const stx::launch_retainer&>, exact_t<utils::serial&>>)
{
ptr = stx::make_shared<T>(stx::launch_retainer{}, stx::exact_t<utils::serial&>(ar));
ptr = stx::make_shared<T>(stx::launch_retainer{}, exact_t<utils::serial&>(ar));
}
else
{
ptr = stx::make_shared<T>(stx::exact_t<utils::serial&>(ar));
ptr = stx::make_shared<T>(exact_t<utils::serial&>(ar));
}
return [ptr](void* storage)
@ -134,7 +134,7 @@ namespace id_manager
struct id_traits_load_func<T>
{
static constexpr pointer_keeper (*load)(utils::serial&) = [](utils::serial& ar) -> pointer_keeper {
return T::load(stx::exact_t<utils::serial&>(ar));
return T::load(exact_t<utils::serial&>(ar));
};
};

View file

@ -1399,7 +1399,7 @@ bool GLGSRender::release_GCM_label(u32 address, u32 args)
// Now write to DMA and then to host context
m_enqueued_host_write_buffer->get().copy_to(mapping.second, host_read_offset, mapping.first, 4);
m_enqueued_host_write_buffer->get().copy_to(m_host_gpu_context_data.get(), host_read_offset + 8, ::offset32(&rsx::host_gpu_context_t::commands_complete_event), 8);
m_enqueued_host_write_buffer->get().copy_to(m_host_gpu_context_data.get(), host_read_offset + 8, OFFSET_OF(rsx::host_gpu_context_t, commands_complete_event), 8);
m_enqueued_host_write_buffer->push_barrier(host_read_offset, 16);
host_ctx->on_label_release();
@ -1425,7 +1425,7 @@ void GLGSRender::on_guest_texture_read()
// Tag the read as being in progress
u64 event_id = m_host_dma_ctrl->host_ctx()->inc_counter();
m_host_dma_ctrl->host_ctx()->texture_load_request_event = event_id;
enqueue_host_context_write(::offset32(&rsx::host_gpu_context_t::texture_load_complete_event), 8, &event_id);
enqueue_host_context_write(OFFSET_OF(rsx::host_gpu_context_t, texture_load_complete_event), 8, &event_id);
}
void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)

View file

@ -165,7 +165,7 @@ namespace rsx
{
if (offset < sizeof(RsxReports::report) /*&& (offset % 0x10) == 0*/)
{
return render->label_addr + ::offset32(&RsxReports::report) + offset;
return render->label_addr + OFFSET_OF(RsxReports, report) + offset;
}
msg = "Local RSX REPORT offset out of range!"sv;
@ -733,8 +733,8 @@ namespace rsx
if (!ar.is_writing() && version < 3)
{
// Be compatible with previous bitwise serialization
ar(std::span<u8>(reinterpret_cast<u8*>(this), ::offset32(&avconf::scan_mode)));
ar.pos += utils::align<usz>(::offset32(&avconf::scan_mode), alignof(avconf)) - ::offset32(&avconf::scan_mode);
ar(std::span<u8>(reinterpret_cast<u8*>(this), OFFSET_OF(avconf, scan_mode)));
ar.pos += utils::align<usz>(OFFSET_OF(avconf, scan_mode), alignof(avconf)) - OFFSET_OF(avconf, scan_mode);
return;
}
@ -1209,7 +1209,7 @@ namespace rsx
if (const u64 get_put = new_get_put.exchange(u64{umax});
get_put != umax)
{
vm::_ref<atomic_be_t<u64>>(dma_address + ::offset32(&RsxDmaControl::put)).release(get_put);
vm::_ref<atomic_be_t<u64>>(dma_address + OFFSET_OF(RsxDmaControl, put)).release(get_put);
fifo_ctrl->set_get(static_cast<u32>(get_put));
fifo_ctrl->abort();
fifo_ret_addr = RSX_CALL_STACK_EMPTY;

View file

@ -1717,7 +1717,7 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
auto cmd = m_secondary_cb_list.next();
cmd->begin();
VK_GET_SYMBOL(vkCmdUpdateBuffer)(*cmd, mapping.second->value, mapping.first, 4, &write_data);
VK_GET_SYMBOL(vkCmdUpdateBuffer)(*cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::commands_complete_event), 8, &release_event_id);
VK_GET_SYMBOL(vkCmdUpdateBuffer)(*cmd, m_host_object_data->value, OFFSET_OF(vk::host_data_t, commands_complete_event), 8, &release_event_id);
cmd->end();
vk::queue_submit_t submit_info = {m_device->get_graphics_queue(), nullptr};
@ -1739,7 +1739,7 @@ void VKGSRender::on_guest_texture_read(const vk::command_buffer& cmd)
// Queue a sync update on the CB doing the load
auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());
const auto event_id = host_ctx->on_texture_load_acquire();
VK_GET_SYMBOL(vkCmdUpdateBuffer)(cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::texture_load_complete_event), sizeof(u64), &event_id);
VK_GET_SYMBOL(vkCmdUpdateBuffer)(cmd, m_host_object_data->value, OFFSET_OF(vk::host_data_t, texture_load_complete_event), sizeof(u64), &event_id);
}
void VKGSRender::sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload)
@ -2520,7 +2520,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
{
VK_GET_SYMBOL(vkCmdUpdateBuffer)(*m_current_command_buffer,
m_host_object_data->value,
::offset32(&vk::host_data_t::commands_complete_event),
OFFSET_OF(vk::host_data_t, commands_complete_event),
sizeof(u64),
const_cast<u64*>(&m_host_dma_ctrl->host_ctx()->last_label_acquire_event));

View file

@ -318,7 +318,7 @@ void init_fxo_for_exec(utils::serial* ar, bool full = false)
// Some settings are not allowed in certain PPU decoders
static void fixup_settings(const psf::registry* _psf)
{
if (g_cfg.core.ppu_decoder != ppu_decoder_type::_static)
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm_legacy)
{
if (g_cfg.core.ppu_use_nj_bit)
{

View file

@ -21,7 +21,7 @@ struct cfg_root : cfg::node
public:
node_core(cfg::node* _this) : cfg::node(_this, "Core") {}
cfg::_enum<ppu_decoder_type> ppu_decoder{this, "PPU Decoder", ppu_decoder_type::llvm};
cfg::_enum<ppu_decoder_type> ppu_decoder{this, "PPU Decoder", ppu_decoder_type::llvm_legacy};
cfg::_int<1, 8> ppu_threads{this, "PPU Threads", 2}; // Amount of PPU threads running simultaneously (must be 2)
cfg::_bool ppu_debug{this, "PPU Debug"};
cfg::_bool ppu_call_history{this, "PPU Calling History"}; // Enable PPU calling history recording

View file

@ -520,8 +520,9 @@ void fmt_class_string<ppu_decoder_type>::format(std::string& out, u64 arg)
{
switch (type)
{
case ppu_decoder_type::_static: return "Interpreter (static)";
case ppu_decoder_type::llvm: return "Recompiler (LLVM)";
case ppu_decoder_type::_static: return "Interpreter (Legacy)";
case ppu_decoder_type::llvm_legacy: return "LLVM Recompiler (Legacy)";
case ppu_decoder_type::interpreter: return "Interpreter";
}
return unknown;

View file

@ -3,7 +3,8 @@
enum class ppu_decoder_type : unsigned
{
_static,
llvm,
llvm_legacy,
interpreter,
};
enum class spu_decoder_type : unsigned

View file

@ -3,6 +3,7 @@
#include "util/types.hpp"
#include "util/File.h"
#include "util/bit_set.h"
#include "util/endian.hpp"
#include <span>

View file

@ -14,7 +14,7 @@ bool is_using_interpreter(thread_class t_class)
{
switch (t_class)
{
case thread_class::ppu: return g_cfg.core.ppu_decoder != ppu_decoder_type::llvm;
case thread_class::ppu: return g_cfg.core.ppu_decoder != ppu_decoder_type::llvm_legacy;
case thread_class::spu: return g_cfg.core.spu_decoder != spu_decoder_type::asmjit && g_cfg.core.spu_decoder != spu_decoder_type::llvm;
default: return true;
}

View file

@ -146,10 +146,10 @@ namespace stx
}
template <typename T>
requires requires(T& a, utils::serial& ar) { a.save(stx::exact_t<utils::serial&>(ar)); }
requires requires(T& a, utils::serial& ar) { a.save(exact_t<utils::serial&>(ar)); }
static void call_save(void* ptr, utils::serial& ar) noexcept
{
std::launder(static_cast<T*>(ptr))->save(stx::exact_t<utils::serial&>(ar));
std::launder(static_cast<T*>(ptr))->save(exact_t<utils::serial&>(ar));
}
template <typename T>
@ -173,7 +173,7 @@ namespace stx
r.thread_op = &call_thread_op<T>;
}
if constexpr (!!(requires(T& a, utils::serial& ar) { a.save(stx::exact_t<utils::serial&>(ar)); }))
if constexpr (!!(requires(T& a, utils::serial& ar) { a.save(exact_t<utils::serial&>(ar)); }))
{
r.save = &call_save<T>;
}

View file

@ -98,24 +98,6 @@ namespace utils
pos += padding;
}
// Add padding needed between two members
template <typename T, typename T2, typename T3>
void add_padding(T T2::* const first, T3 T2::* const second)
{
if (m_is_writing)
return;
const u32 offset1 = ::offset32(first) + sizeof(T);
const u32 offset2 = ::offset32(second);
AUDIT(::offset32(first) <= ::offset32(second));
if (offset2 > offset1)
{
pos += offset2 - offset1;
}
}
void set_expect_little_data(bool value)
{
m_expect_little_data = value;
@ -437,7 +419,7 @@ namespace utils
}
template <typename T>
requires requires(T& obj, utils::serial& ar) { (obj.*(&T::operator()))(stx::exact_t<utils::serial&>(ar)); }
requires requires(T& obj, utils::serial& ar) { (obj.*(&T::operator()))(exact_t<utils::serial&>(ar)); }
bool serialize(T& obj)
{
obj(*this);
@ -565,7 +547,7 @@ namespace utils
template <typename T>
requires(std::is_copy_constructible_v<std::remove_const_t<T>>) && (std::is_constructible_v<std::remove_const_t<T>> || Bitcopy<std::remove_const_t<T>> ||
std::is_constructible_v<std::remove_const_t<T>, stx::exact_t<serial&>> || TupleAlike<std::remove_const_t<T>>)
std::is_constructible_v<std::remove_const_t<T>, exact_t<serial&>> || TupleAlike<std::remove_const_t<T>>)
operator T() noexcept
{
AUDIT(!is_writing());
@ -604,9 +586,9 @@ namespace utils
return type{std::move(first), this->operator second_t()};
}
}
else if constexpr (std::is_constructible_v<type, stx::exact_t<serial&>>)
else if constexpr (std::is_constructible_v<type, exact_t<serial&>>)
{
return not_tuple_t(stx::exact_t<serial&>(*this));
return not_tuple_t(exact_t<serial&>(*this));
}
else if constexpr (std::is_constructible_v<type>)
{

File diff suppressed because it is too large Load diff

View file

@ -3,7 +3,11 @@
#include "util/types.hpp"
#include "util/endian.hpp"
union v128;
namespace rx
{
union v128;
}
using rx::v128;
// Type converter: converts native endianness arithmetic/enum types to appropriate se_t<> type
template <typename T, bool Se>

File diff suppressed because it is too large Load diff

View file

@ -1,223 +1,6 @@
#pragma once // No BOM and only basic ASCII in this header, or a neko will die
#include "util/types.hpp"
#include <rx/v128.hpp>
template <typename T>
concept Vector128 = (sizeof(T) == 16) && (std::is_trivial_v<T>);
// 128-bit vector type
union alignas(16) v128
{
uchar _bytes[16];
char _chars[16];
template <typename T, usz N, usz M>
struct masked_array_t // array type accessed as (index ^ M)
{
T m_data[N];
public:
T& operator[](usz index)
{
return m_data[index ^ M];
}
const T& operator[](usz index) const
{
return m_data[index ^ M];
}
};
template <typename T, usz N = 16 / sizeof(T)>
using normal_array_t = masked_array_t<T, N, std::endian::little == std::endian::native ? 0 : N - 1>;
template <typename T, usz N = 16 / sizeof(T)>
using reversed_array_t = masked_array_t<T, N, std::endian::little == std::endian::native ? N - 1 : 0>;
normal_array_t<u64> _u64;
normal_array_t<s64> _s64;
reversed_array_t<u64> u64r;
reversed_array_t<s64> s64r;
normal_array_t<u32> _u32;
normal_array_t<s32> _s32;
reversed_array_t<u32> u32r;
reversed_array_t<s32> s32r;
normal_array_t<u16> _u16;
normal_array_t<s16> _s16;
reversed_array_t<u16> u16r;
reversed_array_t<s16> s16r;
normal_array_t<u8> _u8;
normal_array_t<s8> _s8;
reversed_array_t<u8> u8r;
reversed_array_t<s8> s8r;
normal_array_t<f32> _f;
normal_array_t<f64> _d;
reversed_array_t<f32> fr;
reversed_array_t<f64> dr;
u128 _u;
s128 _s;
v128() = default;
constexpr v128(const v128&) noexcept = default;
template <Vector128 T>
constexpr v128(const T& rhs) noexcept
: v128(std::bit_cast<v128>(rhs))
{
}
constexpr v128& operator=(const v128&) noexcept = default;
template <Vector128 T>
constexpr operator T() const noexcept
{
return std::bit_cast<T>(*this);
}
ENABLE_BITWISE_SERIALIZATION;
static v128 from64(u64 _0, u64 _1 = 0)
{
v128 ret;
ret._u64[0] = _0;
ret._u64[1] = _1;
return ret;
}
static v128 from64r(u64 _1, u64 _0 = 0)
{
return from64(_0, _1);
}
static v128 from64p(u64 value)
{
v128 ret;
ret._u64[0] = value;
ret._u64[1] = value;
return ret;
}
static v128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0)
{
v128 ret;
ret._u32[0] = _0;
ret._u32[1] = _1;
ret._u32[2] = _2;
ret._u32[3] = _3;
return ret;
}
static v128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0)
{
return from32(_0, _1, _2, _3);
}
static v128 from32p(u32 value)
{
v128 ret;
ret._u32[0] = value;
ret._u32[1] = value;
ret._u32[2] = value;
ret._u32[3] = value;
return ret;
}
static v128 fromf32p(f32 value)
{
v128 ret;
ret._f[0] = value;
ret._f[1] = value;
ret._f[2] = value;
ret._f[3] = value;
return ret;
}
static v128 from16p(u16 value)
{
v128 ret;
ret._u16[0] = value;
ret._u16[1] = value;
ret._u16[2] = value;
ret._u16[3] = value;
ret._u16[4] = value;
ret._u16[5] = value;
ret._u16[6] = value;
ret._u16[7] = value;
return ret;
}
static v128 from8p(u8 value)
{
v128 ret;
std::memset(&ret, value, sizeof(ret));
return ret;
}
static v128 undef()
{
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wuninitialized"
#elif _MSC_VER
#pragma warning(push)
#pragma warning(disable : 6001)
#endif
v128 ret;
return ret;
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#elif _MSC_VER
#pragma warning(pop)
#endif
}
// Unaligned load with optional index offset
static v128 loadu(const void* ptr, usz index = 0)
{
v128 ret;
std::memcpy(&ret, static_cast<const u8*>(ptr) + index * sizeof(v128), sizeof(v128));
return ret;
}
// Unaligned store with optional index offset
static void storeu(v128 value, void* ptr, usz index = 0)
{
std::memcpy(static_cast<u8*>(ptr) + index * sizeof(v128), &value, sizeof(v128));
}
v128 operator|(const v128&) const;
v128 operator&(const v128&) const;
v128 operator^(const v128&) const;
v128 operator~() const;
bool operator==(const v128& right) const;
void clear()
{
*this = {};
}
};
template <typename T, usz N, usz M>
struct offset32_array<v128::masked_array_t<T, N, M>>
{
template <typename Arg>
static inline u32 index32(const Arg& arg)
{
return u32{sizeof(T)} * (static_cast<u32>(arg) ^ static_cast<u32>(M));
}
};
template <>
struct std::hash<v128>
{
usz operator()(const v128& key) const
{
return key._u64[0] + key._u64[1];
}
};
using rx::v128;

View file

@ -188,16 +188,19 @@ namespace utils
{
static const long r = []() -> long
{
long result;
#ifdef _WIN32
SYSTEM_INFO info;
::GetSystemInfo(&info);
return info.dwPageSize;
result = info.dwPageSize;
#else
return ::sysconf(_SC_PAGESIZE);
result = ::sysconf(_SC_PAGESIZE);
#endif
ensure(result, FN(((x & (x - 1)) == 0 && x > 0 && x <= 0x10000)));
return result;
}();
return ensure(r, FN(((x & (x - 1)) == 0 && x > 0 && x <= 0x10000)));
return r;
}
// Convert memory protection (internal)

View file

@ -1244,8 +1244,9 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
case emu_settings_type::PPUDecoder:
switch (static_cast<ppu_decoder_type>(index))
{
case ppu_decoder_type::_static: return tr("Interpreter (static)", "PPU decoder");
case ppu_decoder_type::llvm: return tr("Recompiler (LLVM)", "PPU decoder");
case ppu_decoder_type::_static: return tr("Interpreter (Legacy)", "PPU decoder");
case ppu_decoder_type::llvm_legacy: return tr("LLVM Recompiler (Legacy)", "PPU decoder");
case ppu_decoder_type::interpreter: return tr("Interpreter", "PPU decoder");
}
break;
case emu_settings_type::SPUDecoder:

View file

@ -367,7 +367,8 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
QButtonGroup* ppu_bg = new QButtonGroup(this);
ppu_bg->addButton(ui->ppu__static, static_cast<int>(ppu_decoder_type::_static));
ppu_bg->addButton(ui->ppu_llvm, static_cast<int>(ppu_decoder_type::llvm));
ppu_bg->addButton(ui->ppu_llvm, static_cast<int>(ppu_decoder_type::llvm_legacy));
ppu_bg->addButton(ui->ppu_interpreter, static_cast<int>(ppu_decoder_type::interpreter));
connect(ppu_bg, &QButtonGroup::idToggled, [this](int id, bool checked)
{
@ -376,12 +377,13 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
switch (id)
{
case static_cast<int>(ppu_decoder_type::interpreter):
case static_cast<int>(ppu_decoder_type::_static):
ui->accuratePPUFPCC->setEnabled(true);
ui->accuratePPUNJ->setEnabled(true);
ui->accuratePPUVNAN->setEnabled(true);
break;
case static_cast<int>(ppu_decoder_type::llvm):
case static_cast<int>(ppu_decoder_type::llvm_legacy):
ui->accuratePPUFPCC->setEnabled(false);
ui->accuratePPUNJ->setEnabled(false);
ui->accuratePPUVNAN->setEnabled(false);

View file

@ -74,14 +74,21 @@
<item>
<widget class="QRadioButton" name="ppu__static">
<property name="text">
<string notr="true">Interpreter (static)</string>
<string notr="true">Interpreter (Legacy)</string>
</property>
</widget>
</item>
<item>
<widget class="QRadioButton" name="ppu_llvm">
<property name="text">
<string notr="true">LLVM Recompiler (fastest)</string>
<string notr="true">LLVM Recompiler (Legacy)</string>
</property>
</widget>
</item>
<item>
<widget class="QRadioButton" name="ppu_interpreter">
<property name="text">
<string notr="true">Interpreter</string>
</property>
</widget>
</item>

View file

@ -1,103 +1,107 @@
find_package(libunwind REQUIRED)
find_package(sox REQUIRED)
find_package(ALSA REQUIRED)
add_library(standalone-config INTERFACE)
target_include_directories(standalone-config INTERFACE orbis-kernel-config)
add_library(orbis::kernel::config ALIAS standalone-config)
add_executable(rpcsx
audio/AudioDevice.cpp
audio/AlsaDevice.cpp
add_subdirectory(cpu)
iodev/a53io.cpp
iodev/ajm.cpp
iodev/blockpool.cpp
iodev/bt.cpp
iodev/camera.cpp
iodev/cd.cpp
iodev/console.cpp
iodev/hdd.cpp
iodev/dce.cpp
iodev/dipsw.cpp
iodev/dmem.cpp
iodev/gc.cpp
iodev/hid.cpp
iodev/hmd_3da.cpp
iodev/hmd_cmd.cpp
iodev/hmd_mmap.cpp
iodev/hmd_snsr.cpp
iodev/hmd2_cmd.cpp
iodev/hmd2_imu.cpp
iodev/hmd2_gen_data.cpp
iodev/hmd2_gaze.cpp
iodev/icc_configuration.cpp
iodev/mbus.cpp
iodev/metadbg.cpp
iodev/notification.cpp
iodev/npdrm.cpp
iodev/nsid_ctl.cpp
iodev/null.cpp
iodev/rng.cpp
iodev/sbl_srv.cpp
iodev/shm.cpp
iodev/urandom.cpp
iodev/xpt.cpp
iodev/zero.cpp
iodev/aout.cpp
iodev/av_control.cpp
iodev/hdmi.cpp
iodev/mbus_av.cpp
iodev/scanin.cpp
iodev/s3da.cpp
iodev/gbase.cpp
iodev/devstat.cpp
iodev/devact.cpp
iodev/devctl.cpp
iodev/uvd.cpp
iodev/vce.cpp
iodev/evlg.cpp
iodev/srtc.cpp
iodev/sshot.cpp
iodev/lvdctl.cpp
iodev/icc_power.cpp
iodev/cayman_reg.cpp
if(LINUX AND WITH_RPCSX)
find_package(libunwind REQUIRED)
find_package(sox REQUIRED)
find_package(ALSA REQUIRED)
main.cpp
AudioOut.cpp
backtrace.cpp
vm.cpp
ops.cpp
linker.cpp
io-device.cpp
thread.cpp
vfs.cpp
ipmi.cpp
)
add_subdirectory(gpu)
add_subdirectory(core)
add_subdirectory(gpu)
add_subdirectory(core)
add_executable(rpcsx
audio/AudioDevice.cpp
audio/AlsaDevice.cpp
target_include_directories(rpcsx PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(rpcsx
PUBLIC
ffmpeg::avcodec
ffmpeg::swresample
ffmpeg::avutil
Atrac9
rpcsx-gpu
orbis::kernel
rx
libcrypto
libunwind::unwind-x86_64
xbyak::xbyak
sox::sox
ALSA::ALSA
rpcsx-core
)
iodev/a53io.cpp
iodev/ajm.cpp
iodev/blockpool.cpp
iodev/bt.cpp
iodev/camera.cpp
iodev/cd.cpp
iodev/console.cpp
iodev/hdd.cpp
iodev/dce.cpp
iodev/dipsw.cpp
iodev/dmem.cpp
iodev/gc.cpp
iodev/hid.cpp
iodev/hmd_3da.cpp
iodev/hmd_cmd.cpp
iodev/hmd_mmap.cpp
iodev/hmd_snsr.cpp
iodev/hmd2_cmd.cpp
iodev/hmd2_imu.cpp
iodev/hmd2_gen_data.cpp
iodev/hmd2_gaze.cpp
iodev/icc_configuration.cpp
iodev/mbus.cpp
iodev/metadbg.cpp
iodev/notification.cpp
iodev/npdrm.cpp
iodev/nsid_ctl.cpp
iodev/null.cpp
iodev/rng.cpp
iodev/sbl_srv.cpp
iodev/shm.cpp
iodev/urandom.cpp
iodev/xpt.cpp
iodev/zero.cpp
iodev/aout.cpp
iodev/av_control.cpp
iodev/hdmi.cpp
iodev/mbus_av.cpp
iodev/scanin.cpp
iodev/s3da.cpp
iodev/gbase.cpp
iodev/devstat.cpp
iodev/devact.cpp
iodev/devctl.cpp
iodev/uvd.cpp
iodev/vce.cpp
iodev/evlg.cpp
iodev/srtc.cpp
iodev/sshot.cpp
iodev/lvdctl.cpp
iodev/icc_power.cpp
iodev/cayman_reg.cpp
target_base_address(rpcsx 0x0000070000000000)
target_compile_options(rpcsx PRIVATE "-mfsgsbase")
main.cpp
AudioOut.cpp
backtrace.cpp
vm.cpp
ops.cpp
linker.cpp
io-device.cpp
thread.cpp
vfs.cpp
ipmi.cpp
)
target_base_address(rpcsx 0x0000070000000000)
target_compile_options(rpcsx PRIVATE "-mfsgsbase")
set_target_properties(rpcsx PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
install(TARGETS rpcsx RUNTIME DESTINATION bin)
target_include_directories(rpcsx PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(rpcsx
PUBLIC
ffmpeg::avcodec
ffmpeg::swresample
ffmpeg::avutil
Atrac9
rpcsx-gpu
orbis::kernel
rx
libcrypto
libunwind::unwind-x86_64
xbyak::xbyak
sox::sox
ALSA::ALSA
rpcsx-core
)
endif()
set_target_properties(rpcsx PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
install(TARGETS rpcsx RUNTIME DESTINATION bin)

1
rpcsx/cpu/CMakeLists.txt Normal file
View file

@ -0,0 +1 @@
add_subdirectory(cell)

View file

@ -0,0 +1,3 @@
add_subdirectory(ppu)

View file

@ -0,0 +1,32 @@
add_library(
rpcsx_cpu_cell_ppu STATIC
src/Decoder.cpp
)
add_library(rpcsx_cpu_cell_ppu_semantic
STATIC
semantic/ppu.cpp
)
target_include_directories(rpcsx_cpu_cell_ppu_semantic PUBLIC include PRIVATE include/rx/cpu/cell/ppu)
target_link_libraries(rpcsx_cpu_cell_ppu_semantic PUBLIC rx)
# add_custom_command(
# OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ppu.ll
# COMMAND ${CLANG_EXECUTABLE} -O3 -S -emit-llvm semantic/ppu.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/ppu.ll -I include/rx/cpu/cell/ppu/ -I ../../../../rx/include/ -std=c++23 -fno-exceptions
# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
# )
# add_custom_target(ppu-semantic DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ppu.ll)
target_include_directories(rpcsx_cpu_cell_ppu
PUBLIC
include
PRIVATE
include/rx/cpu/cell/ppu
)
target_link_libraries(rpcsx_cpu_cell_ppu PUBLIC rx)
# add_dependencies(rpcsx_cpu_cell_ppu ppu-semantic)
add_library(rpcsx::cpu::cell::ppu ALIAS rpcsx_cpu_cell_ppu)
add_library(rpcsx::cpu::cell::ppu::semantic ALIAS rpcsx_cpu_cell_ppu_semantic)

View file

@ -0,0 +1,23 @@
#pragma once
#include "Opcode.hpp"
#include <array>
#include <cstdint>
#include <rx/refl.hpp>
namespace rx::cell::ppu {
template <typename T> using DecoderTable = std::array<T, 0x20000>;
extern DecoderTable<Opcode> g_ppuOpcodeTable;
// extern std::array<Form, rx::fieldCount<Opcode>> g_opcodeForms;
inline Opcode getOpcode(std::uint32_t instruction) {
auto decode = [](std::uint32_t inst) {
return ((inst >> 26) | (inst << 6)) & 0x1ffff; // Rotate + mask
};
return g_ppuOpcodeTable[decode(instruction)];
}
Opcode fixOpcode(Opcode opcode, std::uint32_t instruction);
} // namespace rx::cell::ppu

View file

@ -0,0 +1,410 @@
#pragma once
#include "Instruction.hpp"
#include <cstdint>
namespace rx::cell::ppu {
inline namespace registers {
enum {
r0,
r1,
r2,
r3,
r4,
r5,
r6,
r7,
r8,
r9,
r10,
r11,
r12,
r13,
r14,
r15,
r16,
r17,
r18,
r19,
r20,
r21,
r22,
r23,
r24,
r25,
r26,
r27,
r28,
r29,
r30,
r31,
};
enum {
f0,
f1,
f2,
f3,
f4,
f5,
f6,
f7,
f8,
f9,
f10,
f11,
f12,
f13,
f14,
f15,
F16,
f17,
f18,
f19,
f20,
f21,
f22,
f23,
f24,
f25,
f26,
f27,
f28,
f29,
f30,
f31,
};
enum {
v0,
v1,
v2,
v3,
v4,
v5,
v6,
v7,
v8,
v9,
v10,
v11,
v12,
v13,
v14,
v15,
v16,
v17,
v18,
v19,
v20,
v21,
v22,
v23,
v24,
v25,
v26,
v27,
v28,
v29,
v30,
v31,
};
enum {
cr0,
cr1,
cr2,
cr3,
cr4,
cr5,
cr6,
cr7,
};
} // namespace registers
inline std::uint32_t ADDI(std::uint32_t rt, std::uint32_t ra, std::int32_t si) {
Instruction op{0x0eu << 26};
op.rd = rt;
op.ra = ra;
op.simm16 = si;
return op.raw;
}
inline std::uint32_t ADDIS(std::uint32_t rt, std::uint32_t ra,
std::int32_t si) {
Instruction op{0x0fu << 26};
op.rd = rt;
op.ra = ra;
op.simm16 = si;
return op.raw;
}
inline std::uint32_t XORIS(std::uint32_t rt, std::uint32_t ra,
std::int32_t si) {
Instruction op{0x1bu << 26};
op.rd = rt;
op.ra = ra;
op.simm16 = si;
return op.raw;
}
inline std::uint32_t ORI(std::uint32_t rt, std::uint32_t ra, std::uint32_t ui) {
Instruction op{0x18u << 26};
op.rd = rt;
op.ra = ra;
op.uimm16 = ui;
return op.raw;
}
inline std::uint32_t ORIS(std::uint32_t rt, std::uint32_t ra,
std::uint32_t ui) {
Instruction op{0x19u << 26};
op.rd = rt;
op.ra = ra;
op.uimm16 = ui;
return op.raw;
}
inline std::uint32_t OR(std::uint32_t ra, std::uint32_t rs, std::uint32_t rb,
bool rc = false) {
Instruction op{0x1fu << 26 | 0x1bcu << 1};
op.rs = rs;
op.ra = ra;
op.rb = rb;
op.rc = rc;
return op.raw;
}
inline std::uint32_t SC(std::uint32_t lev) {
Instruction op{0x11u << 26 | 1 << 1};
op.lev = lev;
return op.raw;
}
inline std::uint32_t B(std::int32_t li, bool aa = false, bool lk = false) {
Instruction op{0x12u << 26};
op.ll = li;
op.aa = aa;
op.lk = lk;
return op.raw;
}
inline std::uint32_t BC(std::uint32_t bo, std::uint32_t bi, std::int32_t bd,
bool aa = false, bool lk = false) {
Instruction op{0x10u << 26};
op.bo = bo;
op.bi = bi;
op.ds = bd / 4;
op.aa = aa;
op.lk = lk;
return op.raw;
}
inline std::uint32_t BCLR(std::uint32_t bo, std::uint32_t bi, std::uint32_t bh,
bool lk = false) {
Instruction op{0x13u << 26 | 0x10u << 1};
op.bo = bo;
op.bi = bi;
op.bh = bh;
op.lk = lk;
return op.raw;
}
inline std::uint32_t BCCTR(std::uint32_t bo, std::uint32_t bi, std::uint32_t bh,
bool lk = false) {
Instruction op{0x13u << 26 | 0x210u << 1};
op.bo = bo;
op.bi = bi;
op.bh = bh;
op.lk = lk;
return op.raw;
}
inline std::uint32_t MFSPR(std::uint32_t rt, std::uint32_t spr) {
Instruction op{0x1fu << 26 | 0x153u << 1};
op.rd = rt;
op.spr = spr;
return op.raw;
}
inline std::uint32_t MTSPR(std::uint32_t spr, std::uint32_t rs) {
Instruction op{0x1fu << 26 | 0x1d3u << 1};
op.rs = rs;
op.spr = spr;
return op.raw;
}
inline std::uint32_t LWZ(std::uint32_t rt, std::uint32_t ra, std::int32_t si) {
Instruction op{0x20u << 26};
op.rd = rt;
op.ra = ra;
op.simm16 = si;
return op.raw;
}
inline std::uint32_t STW(std::uint32_t rt, std::uint32_t ra, std::int32_t si) {
Instruction op{0x24u << 26};
op.rd = rt;
op.ra = ra;
op.simm16 = si;
return op.raw;
}
inline std::uint32_t STD(std::uint32_t rs, std::uint32_t ra, std::int32_t si) {
Instruction op{0x3eu << 26};
op.rs = rs;
op.ra = ra;
op.ds = si / 4;
return op.raw;
}
inline std::uint32_t STDU(std::uint32_t rs, std::uint32_t ra, std::int32_t si) {
Instruction op{0x3eu << 26 | 1};
op.rs = rs;
op.ra = ra;
op.ds = si / 4;
return op.raw;
}
inline std::uint32_t LD(std::uint32_t rt, std::uint32_t ra, std::int32_t si) {
Instruction op{0x3au << 26};
op.rd = rt;
op.ra = ra;
op.ds = si / 4;
return op.raw;
}
inline std::uint32_t LDU(std::uint32_t rt, std::uint32_t ra, std::int32_t si) {
Instruction op{0x3au << 26 | 1};
op.rd = rt;
op.ra = ra;
op.ds = si / 4;
return op.raw;
}
inline std::uint32_t CMPI(std::uint32_t bf, std::uint32_t l, std::uint32_t ra,
std::uint32_t ui) {
Instruction op{0xbu << 26};
op.crfd = bf;
op.l10 = l;
op.ra = ra;
op.uimm16 = ui;
return op.raw;
}
inline std::uint32_t CMPLI(std::uint32_t bf, std::uint32_t l, std::uint32_t ra,
std::uint32_t ui) {
Instruction op{0xau << 26};
op.crfd = bf;
op.l10 = l;
op.ra = ra;
op.uimm16 = ui;
return op.raw;
}
inline std::uint32_t RLDICL(std::uint32_t ra, std::uint32_t rs,
std::uint32_t sh, std::uint32_t mb,
bool rc = false) {
Instruction op{30 << 26};
op.ra = ra;
op.rs = rs;
op.sh64 = sh;
op.mbe64 = mb;
op.rc = rc;
return op.raw;
}
inline std::uint32_t RLDICR(std::uint32_t ra, std::uint32_t rs,
std::uint32_t sh, std::uint32_t mb,
bool rc = false) {
return RLDICL(ra, rs, sh, mb, rc) | 1 << 2;
}
inline std::uint32_t STFD(std::uint32_t frs, std::uint32_t ra,
std::int32_t si) {
Instruction op{54u << 26};
op.frs = frs;
op.ra = ra;
op.simm16 = si;
return op.raw;
}
inline std::uint32_t STVX(std::uint32_t vs, std::uint32_t ra,
std::uint32_t rb) {
Instruction op{31 << 26 | 231 << 1};
op.vs = vs;
op.ra = ra;
op.rb = rb;
return op.raw;
}
inline std::uint32_t LFD(std::uint32_t frd, std::uint32_t ra, std::int32_t si) {
Instruction op{50u << 26};
op.frd = frd;
op.ra = ra;
op.simm16 = si;
return op.raw;
}
inline std::uint32_t LVX(std::uint32_t vd, std::uint32_t ra, std::uint32_t rb) {
Instruction op{31 << 26 | 103 << 1};
op.vd = vd;
op.ra = ra;
op.rb = rb;
return op.raw;
}
inline constexpr std::uint32_t EIEIO() { return 0x7c0006ac; }
inline namespace implicts {
inline std::uint32_t NOP() { return ORI(r0, r0, 0); }
inline std::uint32_t MR(std::uint32_t rt, std::uint32_t ra) {
return OR(rt, ra, ra, false);
}
inline std::uint32_t LI(std::uint32_t rt, std::uint32_t imm) {
return ADDI(rt, r0, imm);
}
inline std::uint32_t LIS(std::uint32_t rt, std::uint32_t imm) {
return ADDIS(rt, r0, imm);
}
inline std::uint32_t BLR() { return BCLR(0x10 | 0x04, 0, 0); }
inline std::uint32_t BCTR() { return BCCTR(0x10 | 0x04, 0, 0); }
inline std::uint32_t BCTRL() { return BCCTR(0x10 | 0x04, 0, 0, true); }
inline std::uint32_t MFCTR(std::uint32_t reg) { return MFSPR(reg, 9 << 5); }
inline std::uint32_t MTCTR(std::uint32_t reg) { return MTSPR(9 << 5, reg); }
inline std::uint32_t MFLR(std::uint32_t reg) { return MFSPR(reg, 8 << 5); }
inline std::uint32_t MTLR(std::uint32_t reg) { return MTSPR(8 << 5, reg); }
inline std::uint32_t BNE(std::uint32_t cr, std::int32_t imm) {
return BC(4, 2 | cr << 2, imm);
}
inline std::uint32_t BEQ(std::uint32_t cr, std::int32_t imm) {
return BC(12, 2 | cr << 2, imm);
}
inline std::uint32_t BGT(std::uint32_t cr, std::int32_t imm) {
return BC(12, 1 | cr << 2, imm);
}
inline std::uint32_t BNE(std::int32_t imm) { return BNE(cr0, imm); }
inline std::uint32_t BEQ(std::int32_t imm) { return BEQ(cr0, imm); }
inline std::uint32_t BGT(std::int32_t imm) { return BGT(cr0, imm); }
inline std::uint32_t CMPDI(std::uint32_t cr, std::uint32_t reg,
std::uint32_t imm) {
return CMPI(cr, 1, reg, imm);
}
inline std::uint32_t CMPDI(std::uint32_t reg, std::uint32_t imm) {
return CMPDI(cr0, reg, imm);
}
inline std::uint32_t CMPWI(std::uint32_t cr, std::uint32_t reg,
std::uint32_t imm) {
return CMPI(cr, 0, reg, imm);
}
inline std::uint32_t CMPWI(std::uint32_t reg, std::uint32_t imm) {
return CMPWI(cr0, reg, imm);
}
inline std::uint32_t CMPLDI(std::uint32_t cr, std::uint32_t reg,
std::uint32_t imm) {
return CMPLI(cr, 1, reg, imm);
}
inline std::uint32_t CMPLDI(std::uint32_t reg, std::uint32_t imm) {
return CMPLDI(cr0, reg, imm);
}
inline std::uint32_t CMPLWI(std::uint32_t cr, std::uint32_t reg,
std::uint32_t imm) {
return CMPLI(cr, 0, reg, imm);
}
inline std::uint32_t CMPLWI(std::uint32_t reg, std::uint32_t imm) {
return CMPLWI(cr0, reg, imm);
}
inline std::uint32_t EXTRDI(std::uint32_t x, std::uint32_t y, std::uint32_t n,
std::uint32_t b) {
return RLDICL(x, y, b + n, 64 - b, false);
}
inline std::uint32_t SRDI(std::uint32_t x, std::uint32_t y, std::uint32_t n) {
return RLDICL(x, y, 64 - n, n, false);
}
inline std::uint32_t CLRLDI(std::uint32_t x, std::uint32_t y, std::uint32_t n) {
return RLDICL(x, y, 0, n, false);
}
inline std::uint32_t CLRRDI(std::uint32_t x, std::uint32_t y, std::uint32_t n) {
return RLDICR(x, y, 0, 63 - n, false);
}
inline constexpr std::uint32_t TRAP() { return 0x7FE00008; } // tw 31,r0,r0
} // namespace implicts
} // namespace rx::cell::ppu

View file

@ -0,0 +1,72 @@
#pragma once
#include <cstdint>
#include <rx/BitField.h>
namespace rx::cell::ppu {
union Instruction {
template <typename T, std::uint32_t I, std::uint32_t N>
using bf = BitField<T, sizeof(T) * 8 - N - I, N>;
std::uint32_t raw;
bf<std::uint32_t, 0, 6> main; // 0..5
BitFieldPack<bf<std::uint32_t, 30, 1>, bf<std::uint32_t, 16, 5>>
sh64; // 30 + 16..20
BitFieldPack<bf<std::uint32_t, 26, 1>, bf<std::uint32_t, 21, 5>>
mbe64; // 26 + 21..25
bf<std::uint32_t, 11, 5> vuimm; // 11..15
bf<std::uint32_t, 6, 5> vs; // 6..10
bf<std::uint32_t, 22, 4> vsh; // 22..25
bf<std::uint32_t, 21, 1> oe; // 21
bf<std::uint32_t, 11, 10> spr; // 11..20
bf<std::uint32_t, 21, 5> vc; // 21..25
bf<std::uint32_t, 16, 5> vb; // 16..20
bf<std::uint32_t, 11, 5> va; // 11..15
bf<std::uint32_t, 6, 5> vd; // 6..10
bf<std::uint32_t, 31, 1> lk; // 31
bf<std::uint32_t, 30, 1> aa; // 30
bf<std::uint32_t, 16, 5> rb; // 16..20
bf<std::uint32_t, 11, 5> ra; // 11..15
bf<std::uint32_t, 6, 5> rd; // 6..10
bf<std::uint32_t, 16, 16> uimm16; // 16..31
bf<std::uint32_t, 11, 1> l11; // 11
bf<std::uint32_t, 6, 5> rs; // 6..10
bf<std::int32_t, 16, 16> simm16; // 16..31, signed
bf<std::int32_t, 16, 14> ds; // 16..29, signed
bf<std::int32_t, 11, 5> vsimm; // 11..15, signed
bf<std::int32_t, 6, 26> ll; // 6..31, signed
bf<std::int32_t, 6, 24> li; // 6..29, signed
bf<std::uint32_t, 20, 7> lev; // 20..26
bf<std::uint32_t, 16, 4> i; // 16..19
bf<std::uint32_t, 11, 3> crfs; // 11..13
bf<std::uint32_t, 10, 1> l10; // 10
bf<std::uint32_t, 6, 3> crfd; // 6..8
bf<std::uint32_t, 16, 5> crbb; // 16..20
bf<std::uint32_t, 11, 5> crba; // 11..15
bf<std::uint32_t, 6, 5> crbd; // 6..10
bf<std::uint32_t, 31, 1> rc; // 31
bf<std::uint32_t, 26, 5> me32; // 26..30
bf<std::uint32_t, 21, 5> mb32; // 21..25
bf<std::uint32_t, 16, 5> sh32; // 16..20
bf<std::uint32_t, 11, 5> bi; // 11..15
bf<std::uint32_t, 6, 5> bo; // 6..10
bf<std::uint32_t, 19, 2> bh; // 19..20
bf<std::uint32_t, 21, 5> frc; // 21..25
bf<std::uint32_t, 16, 5> frb; // 16..20
bf<std::uint32_t, 11, 5> fra; // 11..15
bf<std::uint32_t, 6, 5> frd; // 6..10
bf<std::uint32_t, 12, 8> crm; // 12..19
bf<std::uint32_t, 6, 5> frs; // 6..10
bf<std::uint32_t, 7, 8> flm; // 7..14
bf<std::uint32_t, 6, 1> l6; // 6
bf<std::uint32_t, 15, 1> l15; // 15
BitFieldPack<bf<std::int32_t, 16, 14>, BitFieldFixed<std::uint32_t, 0, 2>>
bt14;
BitFieldPack<bf<std::int32_t, 6, 24>, BitFieldFixed<std::uint32_t, 0, 2>>
bt24;
};
static_assert(sizeof(Instruction) == sizeof(std::uint32_t));
} // namespace rx::cell::ppu

View file

@ -0,0 +1,858 @@
#pragma once
namespace rx::cell::ppu {
enum class Opcode {
Invalid,
MFVSCR,
MTVSCR,
VADDCUW,
VADDFP,
VADDSBS,
VADDSHS,
VADDSWS,
VADDUBM,
VADDUBS,
VADDUHM,
VADDUHS,
VADDUWM,
VADDUWS,
VAND,
VANDC,
VAVGSB,
VAVGSH,
VAVGSW,
VAVGUB,
VAVGUH,
VAVGUW,
VCFSX,
VCFUX,
VCMPBFP,
VCMPBFP_,
VCMPEQFP,
VCMPEQFP_,
VCMPEQUB,
VCMPEQUB_,
VCMPEQUH,
VCMPEQUH_,
VCMPEQUW,
VCMPEQUW_,
VCMPGEFP,
VCMPGEFP_,
VCMPGTFP,
VCMPGTFP_,
VCMPGTSB,
VCMPGTSB_,
VCMPGTSH,
VCMPGTSH_,
VCMPGTSW,
VCMPGTSW_,
VCMPGTUB,
VCMPGTUB_,
VCMPGTUH,
VCMPGTUH_,
VCMPGTUW,
VCMPGTUW_,
VCTSXS,
VCTUXS,
VEXPTEFP,
VLOGEFP,
VMADDFP,
VMAXFP,
VMAXSB,
VMAXSH,
VMAXSW,
VMAXUB,
VMAXUH,
VMAXUW,
VMHADDSHS,
VMHRADDSHS,
VMINFP,
VMINSB,
VMINSH,
VMINSW,
VMINUB,
VMINUH,
VMINUW,
VMLADDUHM,
VMRGHB,
VMRGHH,
VMRGHW,
VMRGLB,
VMRGLH,
VMRGLW,
VMSUMMBM,
VMSUMSHM,
VMSUMSHS,
VMSUMUBM,
VMSUMUHM,
VMSUMUHS,
VMULESB,
VMULESH,
VMULEUB,
VMULEUH,
VMULOSB,
VMULOSH,
VMULOUB,
VMULOUH,
VNMSUBFP,
VNOR,
VOR,
VPERM,
VPKPX,
VPKSHSS,
VPKSHUS,
VPKSWSS,
VPKSWUS,
VPKUHUM,
VPKUHUS,
VPKUWUM,
VPKUWUS,
VREFP,
VRFIM,
VRFIN,
VRFIP,
VRFIZ,
VRLB,
VRLH,
VRLW,
VRSQRTEFP,
VSEL,
VSL,
VSLB,
VSLDOI,
VSLH,
VSLO,
VSLW,
VSPLTB,
VSPLTH,
VSPLTISB,
VSPLTISH,
VSPLTISW,
VSPLTW,
VSR,
VSRAB,
VSRAH,
VSRAW,
VSRB,
VSRH,
VSRO,
VSRW,
VSUBCUW,
VSUBFP,
VSUBSBS,
VSUBSHS,
VSUBSWS,
VSUBUBM,
VSUBUBS,
VSUBUHM,
VSUBUHS,
VSUBUWM,
VSUBUWS,
VSUMSWS,
VSUM2SWS,
VSUM4SBS,
VSUM4SHS,
VSUM4UBS,
VUPKHPX,
VUPKHSB,
VUPKHSH,
VUPKLPX,
VUPKLSB,
VUPKLSH,
VXOR,
TDI,
TWI,
MULLI,
SUBFIC,
CMPLI,
CMPI,
ADDIC,
ADDI,
ADDIS,
BC,
SC,
B,
MCRF,
BCLR,
RFID,
CRNOR,
RFSCV,
CRANDC,
ISYNC,
CRXOR,
CRNAND,
CRAND,
HRFID,
CREQV,
URFID,
STOP,
CRORC,
CROR,
BCCTR,
RLWIMI,
RLWINM,
RLWNM,
ORI,
ORIS,
XORI,
XORIS,
ANDI,
ANDIS,
RLDICL,
RLDICR,
RLDIC,
RLDIMI,
RLDCL,
RLDCR,
CMP,
TW,
LVSL,
LVEBX,
SUBFC,
MULHDU,
ADDC,
MULHWU,
MFOCRF,
LWARX,
LDX,
LWZX,
SLW,
CNTLZW,
SLD,
AND,
CMPL,
LVSR,
LVEHX,
SUBF,
LDUX,
DCBST,
LWZUX,
CNTLZD,
ANDC,
TD,
LVEWX,
MULHD,
MULHW,
LDARX,
DCBF,
LBZX,
LVX,
NEG,
LBZUX,
NOR,
STVEBX,
SUBFE,
ADDE,
MTOCRF,
STDX,
STWCX,
STWX,
STVEHX,
STDUX,
STWUX,
STVEWX,
SUBFZE,
ADDZE,
STDCX,
STBX,
STVX,
MULLD,
SUBFME,
ADDME,
MULLW,
DCBTST,
STBUX,
ADD,
DCBT,
LHZX,
EQV,
ECIWX,
LHZUX,
XOR,
MFSPR,
LWAX,
DST,
LHAX,
LVXL,
MFTB,
LWAUX,
DSTST,
LHAUX,
STHX,
ORC,
ECOWX,
STHUX,
OR,
DIVDU,
DIVWU,
MTSPR,
DCBI,
NAND,
STVXL,
DIVD,
DIVW,
LVLX,
LDBRX,
LSWX,
LWBRX,
LFSX,
SRW,
SRD,
LVRX,
LSWI,
LFSUX,
SYNC,
LFDX,
LFDUX,
STVLX,
STDBRX,
STSWX,
STWBRX,
STFSX,
STVRX,
STFSUX,
STSWI,
STFDX,
STFDUX,
LVLXL,
LHBRX,
SRAW,
SRAD,
LVRXL,
DSS,
SRAWI,
SRADI,
EIEIO,
STVLXL,
STHBRX,
EXTSH,
STVRXL,
EXTSB,
STFIWX,
EXTSW,
ICBI,
DCBZ,
LWZ,
LWZU,
LBZ,
LBZU,
STW,
STWU,
STB,
STBU,
LHZ,
LHZU,
LHA,
LHAU,
STH,
STHU,
LMW,
STMW,
LFS,
LFSU,
LFD,
LFDU,
STFS,
STFSU,
STFD,
STFDU,
LD,
LDU,
LWA,
STD,
STDU,
FDIVS,
FSUBS,
FADDS,
FSQRTS,
FRES,
FMULS,
FMADDS,
FMSUBS,
FNMSUBS,
FNMADDS,
MTFSB1,
MCRFS,
MTFSB0,
MTFSFI,
MFFS,
MTFSF,
FCMPU,
FRSP,
FCTIW,
FCTIWZ,
FDIV,
FSUB,
FADD,
FSQRT,
FSEL,
FMUL,
FRSQRTE,
FMSUB,
FMADD,
FNMSUB,
FNMADD,
FCMPO,
FNEG,
FMR,
FNABS,
FABS,
FCTID,
FCTIDZ,
FCFID,
UNK,
SUBFCO,
ADDCO,
SUBFO,
NEGO,
SUBFEO,
ADDEO,
SUBFZEO,
ADDZEO,
SUBFMEO,
MULLDO,
ADDMEO,
MULLWO,
ADDO,
DIVDUO,
DIVWUO,
DIVDO,
DIVWO,
SUBFCO_,
ADDCO_,
SUBFO_,
NEGO_,
SUBFEO_,
ADDEO_,
SUBFZEO_,
ADDZEO_,
SUBFMEO_,
MULLDO_,
ADDMEO_,
MULLWO_,
ADDO_,
DIVDUO_,
DIVWUO_,
DIVDO_,
DIVWO_,
RLWIMI_,
RLWINM_,
RLWNM_,
RLDICL_,
RLDICR_,
RLDIC_,
RLDIMI_,
RLDCL_,
RLDCR_,
SUBFC_,
MULHDU_,
ADDC_,
MULHWU_,
SLW_,
CNTLZW_,
SLD_,
AND_,
SUBF_,
CNTLZD_,
ANDC_,
MULHD_,
MULHW_,
NEG_,
NOR_,
SUBFE_,
ADDE_,
SUBFZE_,
ADDZE_,
MULLD_,
SUBFME_,
ADDME_,
MULLW_,
ADD_,
EQV_,
XOR_,
ORC_,
OR_,
DIVDU_,
DIVWU_,
NAND_,
DIVD_,
DIVW_,
SRW_,
SRD_,
SRAW_,
SRAD_,
SRAWI_,
SRADI_,
EXTSH_,
EXTSB_,
EXTSW_,
FDIVS_,
FSUBS_,
FADDS_,
FSQRTS_,
FRES_,
FMULS_,
FMADDS_,
FMSUBS_,
FNMSUBS_,
FNMADDS_,
MTFSB1_,
MTFSB0_,
MTFSFI_,
MFFS_,
MTFSF_,
FRSP_,
FCTIW_,
FCTIWZ_,
FDIV_,
FSUB_,
FADD_,
FSQRT_,
FSEL_,
FMUL_,
FRSQRTE_,
FMSUB_,
FMADD_,
FNMSUB_,
FNMADD_,
FNEG_,
FMR_,
FNABS_,
FABS_,
FCTID_,
FCTIDZ_,
FCFID_,
// extended mnemonic
LI,
LIS,
NOP,
MR,
CLRLDI,
ROTLDI,
SRDI,
CMPD,
CMPW,
CMPLD,
CMPLW,
NOT,
MTCRF,
MFXER,
MFLR,
MFCTR,
MFTBU,
CCTPL,
CCTPM,
CCTPH,
DB8CYC,
DB10CYC,
DB12CYC,
DB16CYC,
CRNOT,
BDNZF,
BDZF,
BDNZT,
BDZT,
BDZ,
BDZ_P,
BDZ_M,
BDNZ,
BDNZ_P,
BDNZ_M,
BGE,
BGE_P,
BGE_M,
BLE,
BLE_P,
BLE_M,
BNE,
BNE_P,
BNE_M,
BNS,
BNS_P,
BNS_M,
BLT,
BLT_P,
BLT_M,
BGT,
BGT_P,
BGT_M,
BEQ,
BEQ_P,
BEQ_M,
BSO,
BSO_P,
BSO_M,
BDNZFL,
BDZFL,
BDNZTL,
BDZTL,
BDZL,
BDZL_P,
BDZL_M,
BDNZL,
BDNZL_P,
BDNZL_M,
BGEL,
BGEL_P,
BGEL_M,
BLEL,
BLEL_P,
BLEL_M,
BNEL,
BNEL_P,
BNEL_M,
BNSL,
BNSL_P,
BNSL_M,
BLTL,
BLTL_P,
BLTL_M,
BGTL,
BGTL_P,
BGTL_M,
BEQL,
BEQL_P,
BEQL_M,
BSOL,
BSOL_P,
BSOL_M,
BDNZFA,
BDZFA,
BDNZTA,
BDZTA,
BDZA,
BDZA_P,
BDZA_M,
BDNZA,
BDNZA_P,
BDNZA_M,
BGEA,
BGEA_P,
BGEA_M,
BLEA,
BLEA_P,
BLEA_M,
BNEA,
BNEA_P,
BNEA_M,
BNSA,
BNSA_P,
BNSA_M,
BLTA,
BLTA_P,
BLTA_M,
BGTA,
BGTA_P,
BGTA_M,
BEQA,
BEQA_P,
BEQA_M,
BSOA,
BSOA_P,
BSOA_M,
BDNZFLA,
BDZFLA,
BDNZTLA,
BDZTLA,
BDZLA,
BDZLA_P,
BDZLA_M,
BDNZLA,
BDNZLA_P,
BDNZLA_M,
BGELA,
BGELA_P,
BGELA_M,
BLELA,
BLELA_P,
BLELA_M,
BNELA,
BNELA_P,
BNELA_M,
BNSLA,
BNSLA_P,
BNSLA_M,
BLTLA,
BLTLA_P,
BLTLA_M,
BGTLA,
BGTLA_P,
BGTLA_M,
BEQLA,
BEQLA_P,
BEQLA_M,
BSOLA,
BSOLA_P,
BSOLA_M,
BDNZFLR,
BDZFLR,
BDNZTLR,
BDZTLR,
BDZLR,
BDZLR_P,
BDZLR_M,
BDNZLR,
BDNZLR_P,
BDNZLR_M,
BGELR,
BGELR_P,
BGELR_M,
BLELR,
BLELR_P,
BLELR_M,
BNELR,
BNELR_P,
BNELR_M,
BNSLR,
BNSLR_P,
BNSLR_M,
BLTLR,
BLTLR_P,
BLTLR_M,
BGTLR,
BGTLR_P,
BGTLR_M,
BEQLR,
BEQLR_P,
BEQLR_M,
BSOLR,
BSOLR_P,
BSOLR_M,
BDNZFCTR,
BDZFCTR,
BDNZTCTR,
BDZTCTR,
BDZCTR,
BDZCTR_P,
BDZCTR_M,
BDNZCTR,
BDNZCTR_P,
BDNZCTR_M,
BGECTR,
BGECTR_P,
BGECTR_M,
BLECTR,
BLECTR_P,
BLECTR_M,
BNECTR,
BNECTR_P,
BNECTR_M,
BNSCTR,
BNSCTR_P,
BNSCTR_M,
BLTCTR,
BLTCTR_P,
BLTCTR_M,
BGTCTR,
BGTCTR_P,
BGTCTR_M,
BEQCTR,
BEQCTR_P,
BEQCTR_M,
BSOCTR,
BSOCTR_P,
BSOCTR_M,
BDNZFCTRL,
BDZFCTRL,
BDNZTCTRL,
BDZTCTRL,
BDZCTRL,
BDZCTRL_P,
BDZCTRL_M,
BDNZCTRL,
BDNZCTRL_P,
BDNZCTRL_M,
BGECTRL,
BGECTRL_P,
BGECTRL_M,
BLECTRL,
BLECTRL_P,
BLECTRL_M,
BNECTRL,
BNECTRL_P,
BNECTRL_M,
BNSCTRL,
BNSCTRL_P,
BNSCTRL_M,
BLTCTRL,
BLTCTRL_P,
BLTCTRL_M,
BGTCTRL,
BGTCTRL_P,
BGTCTRL_M,
BEQCTRL,
BEQCTRL_P,
BEQCTRL_M,
BSOCTRL,
BSOCTRL_P,
BSOCTRL_M,
BDNZFLRL,
BDZFLRL,
BDNZTLRL,
BDZTLRL,
BDZLRL,
BDZLRL_P,
BDZLRL_M,
BDNZLRL,
BDNZLRL_P,
BDNZLRL_M,
BGELRL,
BGELRL_P,
BGELRL_M,
BLELRL,
BLELRL_P,
BLELRL_M,
BNELRL,
BNELRL_P,
BNELRL_M,
BNSLRL,
BNSLRL_P,
BNSLRL_M,
BLTLRL,
BLTLRL_P,
BLTLRL_M,
BGTLRL,
BGTLRL_P,
BGTLRL_M,
BEQLRL,
BEQLRL_P,
BEQLRL_M,
BSOLRL,
BSOLRL_P,
BSOLRL_M,
BL,
BA,
BLA,
BCL,
BCA,
BCLA,
BLR,
BTLR,
BFLR,
BCTRL,
BCCTRL,
BTCTRL,
BFCTRL,
_count
};
}

View file

@ -0,0 +1,129 @@
#pragma once
#include "rx/v128.hpp"
#include <cstdint>
struct alignas(4) CrField {
std::uint8_t bits[4];
constexpr void set(bool lt, bool gt, bool eq, bool so) {
bits[0] = lt;
bits[1] = gt;
bits[2] = eq;
bits[3] = so;
}
template <typename T>
constexpr void update(const T &lhs, const T &rhs, bool so) {
bits[0] = lhs < rhs;
bits[1] = lhs > rhs;
bits[2] = lhs == rhs;
bits[3] = so;
}
static constexpr CrField From(bool lt, bool gt, bool eq, bool so) {
CrField result;
result.set(lt, gt, eq, so);
return result;
}
[[nodiscard]] constexpr bool isLt() const { return bits[0] != 0; }
[[nodiscard]] constexpr bool isGt() const { return bits[1] != 0; }
[[nodiscard]] constexpr bool isEq() const { return bits[2] != 0; }
[[nodiscard]] constexpr bool isSo() const { return bits[3] != 0; }
};
struct PPUContext {
std::uint64_t gpr[32] = {}; // General-Purpose Registers
double fpr[32] = {}; // Floating Point Registers
rx::v128 vr[32] = {}; // Vector Registers
union alignas(16) cr_bits {
std::uint8_t bits[32];
CrField fields[8];
std::uint8_t &operator[](std::size_t i) { return bits[i]; }
// Pack CR bits
[[nodiscard]] std::uint32_t pack() const {
std::uint32_t result{};
for (u32 bit : bits) {
result <<= 1;
result |= bit;
}
return result;
}
// Unpack CR bits
void unpack(std::uint32_t value) {
for (u8 &b : bits) {
b = !!(value & (1u << 31));
value <<= 1;
}
}
};
cr_bits cr{}; // Condition Registers (unpacked)
// Floating-Point Status and Control Register (unpacked)
union alignas(16) {
struct {
// TODO
bool _start[16];
bool fl; // FPCC.FL
bool fg; // FPCC.FG
bool fe; // FPCC.FE
bool fu; // FPCC.FU
bool _end[12];
};
CrField fields[8];
cr_bits bits;
} fpscr{};
std::uint64_t lr{}; // Link Register
std::uint64_t ctr{}; // Counter Register
std::uint32_t vrsave{0xffffffff}; // vr Save Register
std::uint32_t cia{}; // Current Instruction Address
// Fixed-Point Exception Register (abstract representation)
bool xer_so{}; // Summary Overflow
bool xer_ov{}; // Overflow
bool xer_ca{}; // Carry
std::uint8_t xer_cnt{}; // 0..6
/*
Non-Java. A mode control bit that determines whether vector floating-point
operations will be performed in a Java-IEEE-C9X-compliant mode or a
possibly faster non-Java/non-IEEE mode. 0 The Java-IEEE-C9X-compliant mode
is selected. Denormalized values are handled as specified by Java, IEEE,
and C9X standard. 1 The non-Java/non-IEEE-compliant mode is
selected. If an element in a source vector register contains a denormalized
value, the value '0' is used instead. If an instruction causes an underflow
exception, the corresponding element in the target vr is cleared to
'0'. In both cases, the '0' has the same sign as the denormalized or
underflowing value.
*/
bool nj = true;
// Sticky saturation bit
rx::v128 sat{};
// Optimization: precomputed java-mode mask for handling denormals
std::uint32_t jm_mask = 0x7f80'0000;
std::uint32_t raddr{0}; // Reservation addr
std::uint64_t rtime{0};
alignas(64) std::byte rdata[128]{}; // Reservation data
bool use_full_rdata{};
std::uint32_t res_cached{0}; // Reservation "cached" addresss
std::uint32_t res_notify{0};
std::uint64_t res_notify_time{0};
inline void setOV(bool bit) {
xer_ov = bit;
xer_so |= bit;
}
};

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,501 @@
#include "Decoder.hpp"
#include "Instruction.hpp"
#include "Opcode.hpp"
#include <bit>
#include <cstdint>
struct InstructionEncodingInfo {
std::uint32_t value;
rx::cell::ppu::Opcode opcode;
rx::cell::ppu::Opcode rcOpcode;
std::uint32_t magn = 0;
constexpr InstructionEncodingInfo(std::uint32_t value,
rx::cell::ppu::Opcode opcode,
rx::cell::ppu::Opcode rcOpcode)
: value(value), opcode(opcode), rcOpcode(rcOpcode) {}
constexpr InstructionEncodingInfo(std::uint32_t value,
rx::cell::ppu::Opcode opcode,
rx::cell::ppu::Opcode rcOpcode,
std::uint32_t magn)
: value(value), opcode(opcode), rcOpcode(rcOpcode), magn(magn) {}
};
static constexpr rx::cell::ppu::DecoderTable<rx::cell::ppu::Opcode>
buildOpcodeTable() {
// Main opcodes (field 0..5)
rx::cell::ppu::DecoderTable<rx::cell::ppu::Opcode> result;
result.fill(rx::cell::ppu::Opcode::Invalid);
auto fill_table =
[&](std::uint32_t main_op, std::uint32_t count, std::uint32_t sh,
std::initializer_list<InstructionEncodingInfo> entries) noexcept {
if (sh < 11) {
for (const auto &v : entries) {
for (std::uint32_t i = 0; i < 1u << (v.magn + (11 - sh - count));
i++) {
for (std::uint32_t j = 0; j < 1u << sh; j++) {
const std::uint32_t k =
(((i << (count - v.magn)) | v.value) << sh) | j;
result[(k << 6) | main_op] = i & 1 ? v.rcOpcode : v.opcode;
}
}
}
} else {
// Main table (special case)
for (const auto &v : entries) {
for (std::uint32_t i = 0; i < 1u << 11; i++) {
result[i << 6 | v.value] = i & 1 ? v.rcOpcode : v.opcode;
}
}
}
};
#define GET(name) rx::cell::ppu::Opcode::name, rx::cell::ppu::Opcode::name
#define GETRC(name) rx::cell::ppu::Opcode::name, rx::cell::ppu::Opcode::name##_
fill_table(
0x00, 6, -1,
{
{0x02, GET(TDI)}, {0x03, GET(TWI)}, {0x07, GET(MULLI)},
{0x08, GET(SUBFIC)}, {0x0a, GET(CMPLI)}, {0x0b, GET(CMPI)},
{0x0c, GET(ADDIC)}, {0x0d, GET(ADDIC)}, {0x0e, GET(ADDI)},
{0x0f, GET(ADDIS)}, {0x10, GET(BC)}, {0x11, GET(SC)},
{0x12, GET(B)}, {0x14, GETRC(RLWIMI)}, {0x15, GETRC(RLWINM)},
{0x17, GETRC(RLWNM)}, {0x18, GET(ORI)}, {0x19, GET(ORIS)},
{0x1a, GET(XORI)}, {0x1b, GET(XORIS)}, {0x1c, GET(ANDI)},
{0x1d, GET(ANDIS)}, {0x20, GET(LWZ)}, {0x21, GET(LWZU)},
{0x22, GET(LBZ)}, {0x23, GET(LBZU)}, {0x24, GET(STW)},
{0x25, GET(STWU)}, {0x26, GET(STB)}, {0x27, GET(STBU)},
{0x28, GET(LHZ)}, {0x29, GET(LHZU)}, {0x2a, GET(LHA)},
{0x2b, GET(LHAU)}, {0x2c, GET(STH)}, {0x2d, GET(STHU)},
{0x2e, GET(LMW)}, {0x2f, GET(STMW)}, {0x30, GET(LFS)},
{0x31, GET(LFSU)}, {0x32, GET(LFD)}, {0x33, GET(LFDU)},
{0x34, GET(STFS)}, {0x35, GET(STFSU)}, {0x36, GET(STFD)},
{0x37, GET(STFDU)},
});
// Group 0x04 opcodes (field 21..31)
fill_table(0x04, 11, 0,
{
{0x0, GET(VADDUBM)}, {0x2, GET(VMAXUB)},
{0x4, GET(VRLB)}, {0x006, GET(VCMPEQUB)},
{0x406, GET(VCMPEQUB_)}, {0x8, GET(VMULOUB)},
{0xa, GET(VADDFP)}, {0xc, GET(VMRGHB)},
{0xe, GET(VPKUHUM)},
{0x20, GET(VMHADDSHS), 5}, {0x21, GET(VMHRADDSHS), 5},
{0x22, GET(VMLADDUHM), 5}, {0x24, GET(VMSUMUBM), 5},
{0x25, GET(VMSUMMBM), 5}, {0x26, GET(VMSUMUHM), 5},
{0x27, GET(VMSUMUHS), 5}, {0x28, GET(VMSUMSHM), 5},
{0x29, GET(VMSUMSHS), 5}, {0x2a, GET(VSEL), 5},
{0x2b, GET(VPERM), 5}, {0x2c, GET(VSLDOI), 5},
{0x2e, GET(VMADDFP), 5}, {0x2f, GET(VNMSUBFP), 5},
{0x40, GET(VADDUHM)}, {0x42, GET(VMAXUH)},
{0x44, GET(VRLH)}, {0x046, GET(VCMPEQUH)},
{0x446, GET(VCMPEQUH_)}, {0x48, GET(VMULOUH)},
{0x4a, GET(VSUBFP)}, {0x4c, GET(VMRGHH)},
{0x4e, GET(VPKUWUM)}, {0x80, GET(VADDUWM)},
{0x82, GET(VMAXUW)}, {0x84, GET(VRLW)},
{0x086, GET(VCMPEQUW)}, {0x486, GET(VCMPEQUW_)},
{0x8c, GET(VMRGHW)}, {0x8e, GET(VPKUHUS)},
{0x0c6, GET(VCMPEQFP)}, {0x4c6, GET(VCMPEQFP_)},
{0xce, GET(VPKUWUS)},
{0x102, GET(VMAXSB)}, {0x104, GET(VSLB)},
{0x108, GET(VMULOSB)}, {0x10a, GET(VREFP)},
{0x10c, GET(VMRGLB)}, {0x10e, GET(VPKSHUS)},
{0x142, GET(VMAXSH)}, {0x144, GET(VSLH)},
{0x148, GET(VMULOSH)}, {0x14a, GET(VRSQRTEFP)},
{0x14c, GET(VMRGLH)}, {0x14e, GET(VPKSWUS)},
{0x180, GET(VADDCUW)}, {0x182, GET(VMAXSW)},
{0x184, GET(VSLW)}, {0x18a, GET(VEXPTEFP)},
{0x18c, GET(VMRGLW)}, {0x18e, GET(VPKSHSS)},
{0x1c4, GET(VSL)}, {0x1c6, GET(VCMPGEFP)},
{0x5c6, GET(VCMPGEFP_)}, {0x1ca, GET(VLOGEFP)},
{0x1ce, GET(VPKSWSS)}, {0x200, GET(VADDUBS)},
{0x202, GET(VMINUB)}, {0x204, GET(VSRB)},
{0x206, GET(VCMPGTUB)}, {0x606, GET(VCMPGTUB_)},
{0x208, GET(VMULEUB)}, {0x20a, GET(VRFIN)},
{0x20c, GET(VSPLTB)}, {0x20e, GET(VUPKHSB)},
{0x240, GET(VADDUHS)}, {0x242, GET(VMINUH)},
{0x244, GET(VSRH)}, {0x246, GET(VCMPGTUH)},
{0x646, GET(VCMPGTUH_)}, {0x248, GET(VMULEUH)},
{0x24a, GET(VRFIZ)}, {0x24c, GET(VSPLTH)},
{0x24e, GET(VUPKHSH)}, {0x280, GET(VADDUWS)},
{0x282, GET(VMINUW)}, {0x284, GET(VSRW)},
{0x286, GET(VCMPGTUW)}, {0x686, GET(VCMPGTUW_)},
{0x28a, GET(VRFIP)}, {0x28c, GET(VSPLTW)},
{0x28e, GET(VUPKLSB)}, {0x2c4, GET(VSR)},
{0x2c6, GET(VCMPGTFP)}, {0x6c6, GET(VCMPGTFP_)},
{0x2ca, GET(VRFIM)}, {0x2ce, GET(VUPKLSH)},
{0x300, GET(VADDSBS)}, {0x302, GET(VMINSB)},
{0x304, GET(VSRAB)}, {0x306, GET(VCMPGTSB)},
{0x706, GET(VCMPGTSB_)}, {0x308, GET(VMULESB)},
{0x30a, GET(VCFUX)}, {0x30c, GET(VSPLTISB)},
{0x30e, GET(VPKPX)}, {0x340, GET(VADDSHS)},
{0x342, GET(VMINSH)}, {0x344, GET(VSRAH)},
{0x346, GET(VCMPGTSH)}, {0x746, GET(VCMPGTSH_)},
{0x348, GET(VMULESH)}, {0x34a, GET(VCFSX)},
{0x34c, GET(VSPLTISH)}, {0x34e, GET(VUPKHPX)},
{0x380, GET(VADDSWS)}, {0x382, GET(VMINSW)},
{0x384, GET(VSRAW)}, {0x386, GET(VCMPGTSW)},
{0x786, GET(VCMPGTSW_)}, {0x38a, GET(VCTUXS)},
{0x38c, GET(VSPLTISW)}, {0x3c6, GET(VCMPBFP)},
{0x7c6, GET(VCMPBFP_)}, {0x3ca, GET(VCTSXS)},
{0x3ce, GET(VUPKLPX)}, {0x400, GET(VSUBUBM)},
{0x402, GET(VAVGUB)}, {0x404, GET(VAND)},
{0x40a, GET(VMAXFP)}, {0x40c, GET(VSLO)},
{0x440, GET(VSUBUHM)}, {0x442, GET(VAVGUH)},
{0x444, GET(VANDC)}, {0x44a, GET(VMINFP)},
{0x44c, GET(VSRO)}, {0x480, GET(VSUBUWM)},
{0x482, GET(VAVGUW)}, {0x484, GET(VOR)},
{0x4c4, GET(VXOR)}, {0x502, GET(VAVGSB)},
{0x504, GET(VNOR)}, {0x542, GET(VAVGSH)},
{0x580, GET(VSUBCUW)}, {0x582, GET(VAVGSW)},
{0x600, GET(VSUBUBS)}, {0x604, GET(MFVSCR)},
{0x608, GET(VSUM4UBS)}, {0x640, GET(VSUBUHS)},
{0x644, GET(MTVSCR)}, {0x648, GET(VSUM4SHS)},
{0x680, GET(VSUBUWS)}, {0x688, GET(VSUM2SWS)},
{0x700, GET(VSUBSBS)}, {0x708, GET(VSUM4SBS)},
{0x740, GET(VSUBSHS)}, {0x780, GET(VSUBSWS)},
{0x788, GET(VSUMSWS)},
});
// Group 0x13 opcodes (field 21..30)
fill_table(0x13, 10, 1,
{
{0x000, GET(MCRF)},
{0x010, GET(BCLR)},
{0x012, GET(RFID)},
{0x021, GET(CRNOR)},
{0x052, GET(RFSCV)},
{0x081, GET(CRANDC)},
{0x096, GET(ISYNC)},
{0x0c1, GET(CRXOR)},
{0x0e1, GET(CRNAND)},
{0x101, GET(CRAND)},
{0x112, GET(HRFID)},
{0x121, GET(CREQV)},
{0x132, GET(URFID)},
{0x172, GET(STOP)},
{0x1a1, GET(CRORC)},
{0x1c1, GET(CROR)},
{0x210, GET(BCCTR)},
});
// Group 0x1e opcodes (field 27..30)
fill_table(0x1e, 4, 1,
{
{0x0, GETRC(RLDICL)},
{0x1, GETRC(RLDICL)},
{0x2, GETRC(RLDICR)},
{0x3, GETRC(RLDICR)},
{0x4, GETRC(RLDIC)},
{0x5, GETRC(RLDIC)},
{0x6, GETRC(RLDIMI)},
{0x7, GETRC(RLDIMI)},
{0x8, GETRC(RLDCL)},
{0x9, GETRC(RLDCR)},
});
// Group 0x1f opcodes (field 21..30)
fill_table(0x1f, 10, 1,
{
{0x000, GET(CMP)}, {0x004, GET(TW)},
{0x006, GET(LVSL)}, {0x007, GET(LVEBX)},
{0x008, GETRC(SUBFC)}, {0x208, GETRC(SUBFCO)},
{0x009, GETRC(MULHDU)}, {0x00a, GETRC(ADDC)},
{0x20a, GETRC(ADDCO)}, {0x00b, GETRC(MULHWU)},
{0x013, GET(MFOCRF)}, {0x014, GET(LWARX)},
{0x015, GET(LDX)}, {0x017, GET(LWZX)},
{0x018, GETRC(SLW)}, {0x01a, GETRC(CNTLZW)},
{0x01b, GETRC(SLD)}, {0x01c, GETRC(AND)},
{0x020, GET(CMPL)}, {0x026, GET(LVSR)},
{0x027, GET(LVEHX)}, {0x028, GETRC(SUBF)},
{0x228, GETRC(SUBFO)}, {0x035, GET(LDUX)},
{0x036, GET(DCBST)}, {0x037, GET(LWZUX)},
{0x03a, GETRC(CNTLZD)}, {0x03c, GETRC(ANDC)},
{0x044, GET(TD)}, {0x047, GET(LVEWX)},
{0x049, GETRC(MULHD)}, {0x04b, GETRC(MULHW)},
{0x054, GET(LDARX)}, {0x056, GET(DCBF)},
{0x057, GET(LBZX)}, {0x067, GET(LVX)},
{0x068, GETRC(NEG)}, {0x268, GETRC(NEGO)},
{0x077, GET(LBZUX)}, {0x07c, GETRC(NOR)},
{0x087, GET(STVEBX)}, {0x088, GETRC(SUBFE)},
{0x288, GETRC(SUBFEO)}, {0x08a, GETRC(ADDE)},
{0x28a, GETRC(ADDEO)}, {0x090, GET(MTOCRF)},
{0x095, GET(STDX)}, {0x096, GET(STWCX)},
{0x097, GET(STWX)}, {0x0a7, GET(STVEHX)},
{0x0b5, GET(STDUX)}, {0x0b7, GET(STWUX)},
{0x0c7, GET(STVEWX)}, {0x0c8, GETRC(SUBFZE)},
{0x2c8, GETRC(SUBFZEO)}, {0x0ca, GETRC(ADDZE)},
{0x2ca, GETRC(ADDZEO)}, {0x0d6, GET(STDCX)},
{0x0d7, GET(STBX)}, {0x0e7, GET(STVX)},
{0x0e8, GETRC(SUBFME)}, {0x2e8, GETRC(SUBFMEO)},
{0x0e9, GETRC(MULLD)}, {0x2e9, GETRC(MULLDO)},
{0x0ea, GETRC(ADDME)}, {0x2ea, GETRC(ADDMEO)},
{0x0eb, GETRC(MULLW)}, {0x2eb, GETRC(MULLWO)},
{0x0f6, GET(DCBTST)}, {0x0f7, GET(STBUX)},
{0x10a, GETRC(ADD)}, {0x30a, GETRC(ADDO)},
{0x116, GET(DCBT)}, {0x117, GET(LHZX)},
{0x11c, GETRC(EQV)}, {0x136, GET(ECIWX)},
{0x137, GET(LHZUX)}, {0x13c, GETRC(XOR)},
{0x153, GET(MFSPR)}, {0x155, GET(LWAX)},
{0x156, GET(DST)}, {0x157, GET(LHAX)},
{0x167, GET(LVXL)}, {0x173, GET(MFTB)},
{0x175, GET(LWAUX)}, {0x176, GET(DSTST)},
{0x177, GET(LHAUX)}, {0x197, GET(STHX)},
{0x19c, GETRC(ORC)}, {0x1b6, GET(ECOWX)},
{0x1b7, GET(STHUX)}, {0x1bc, GETRC(OR)},
{0x1c9, GETRC(DIVDU)}, {0x3c9, GETRC(DIVDUO)},
{0x1cb, GETRC(DIVWU)}, {0x3cb, GETRC(DIVWUO)},
{0x1d3, GET(MTSPR)}, {0x1d6, GET(DCBI)},
{0x1dc, GETRC(NAND)}, {0x1e7, GET(STVXL)},
{0x1e9, GETRC(DIVD)}, {0x3e9, GETRC(DIVDO)},
{0x1eb, GETRC(DIVW)}, {0x3eb, GETRC(DIVWO)},
{0x207, GET(LVLX)}, {0x214, GET(LDBRX)},
{0x215, GET(LSWX)}, {0x216, GET(LWBRX)},
{0x217, GET(LFSX)}, {0x218, GETRC(SRW)},
{0x21b, GETRC(SRD)}, {0x227, GET(LVRX)},
{0x237, GET(LFSUX)}, {0x255, GET(LSWI)},
{0x256, GET(SYNC)}, {0x257, GET(LFDX)},
{0x277, GET(LFDUX)}, {0x287, GET(STVLX)},
{0x294, GET(STDBRX)}, {0x295, GET(STSWX)},
{0x296, GET(STWBRX)}, {0x297, GET(STFSX)},
{0x2a7, GET(STVRX)}, {0x2b7, GET(STFSUX)},
{0x2d5, GET(STSWI)}, {0x2d7, GET(STFDX)},
{0x2f7, GET(STFDUX)}, {0x307, GET(LVLXL)},
{0x316, GET(LHBRX)}, {0x318, GETRC(SRAW)},
{0x31a, GETRC(SRAD)}, {0x327, GET(LVRXL)},
{0x336, GET(DSS)}, {0x338, GETRC(SRAWI)},
{0x33a, GETRC(SRADI)}, {0x33b, GETRC(SRADI)},
{0x356, GET(EIEIO)}, {0x387, GET(STVLXL)},
{0x396, GET(STHBRX)}, {0x39a, GETRC(EXTSH)},
{0x3a7, GET(STVRXL)}, {0x3ba, GETRC(EXTSB)},
{0x3d7, GET(STFIWX)}, {0x3da, GETRC(EXTSW)},
{0x3d6, GET(ICBI)}, {0x3f6, GET(DCBZ)},
});
// Group 0x3a opcodes (field 30..31)
fill_table(0x3a, 2, 0,
{
{0x0, GET(LD)},
{0x1, GET(LDU)},
{0x2, GET(LWA)},
});
// Group 0x3b opcodes (field 21..30)
fill_table(0x3b, 10, 1,
{
{0x12, GETRC(FDIVS), 5},
{0x14, GETRC(FSUBS), 5},
{0x15, GETRC(FADDS), 5},
{0x16, GETRC(FSQRTS), 5},
{0x18, GETRC(FRES), 5},
{0x19, GETRC(FMULS), 5},
{0x1c, GETRC(FMSUBS), 5},
{0x1d, GETRC(FMADDS), 5},
{0x1e, GETRC(FNMSUBS), 5},
{0x1f, GETRC(FNMADDS), 5},
});
// Group 0x3e opcodes (field 30..31)
fill_table(0x3e, 2, 0,
{
{0x0, GET(STD)},
{0x1, GET(STDU)},
});
// Group 0x3f opcodes (field 21..30)
fill_table(0x3f, 10, 1,
{
{0x026, GETRC(MTFSB1)}, {0x040, GET(MCRFS)},
{0x046, GETRC(MTFSB0)}, {0x086, GETRC(MTFSFI)},
{0x247, GETRC(MFFS)}, {0x2c7, GETRC(MTFSF)},
{0x000, GET(FCMPU)}, {0x00c, GETRC(FRSP)},
{0x00e, GETRC(FCTIW)}, {0x00f, GETRC(FCTIWZ)},
{0x012, GETRC(FDIV), 5}, {0x014, GETRC(FSUB), 5},
{0x015, GETRC(FADD), 5}, {0x016, GETRC(FSQRT), 5},
{0x017, GETRC(FSEL), 5}, {0x019, GETRC(FMUL), 5},
{0x01a, GETRC(FRSQRTE), 5}, {0x01c, GETRC(FMSUB), 5},
{0x01d, GETRC(FMADD), 5}, {0x01e, GETRC(FNMSUB), 5},
{0x01f, GETRC(FNMADD), 5},
{0x020, GET(FCMPO)}, {0x028, GETRC(FNEG)},
{0x048, GETRC(FMR)}, {0x088, GETRC(FNABS)},
{0x108, GETRC(FABS)}, {0x32e, GETRC(FCTID)},
{0x32f, GETRC(FCTIDZ)}, {0x34e, GETRC(FCFID)},
});
return result;
}
rx::cell::ppu::DecoderTable<rx::cell::ppu::Opcode>
rx::cell::ppu::g_ppuOpcodeTable = buildOpcodeTable();
rx::cell::ppu::Opcode rx::cell::ppu::fixOpcode(Opcode opcode,
std::uint32_t instruction) {
auto inst = std::bit_cast<Instruction>(instruction);
if (opcode == Opcode::ADDI) {
if (inst.ra == 0) {
return Opcode::LI;
}
return opcode;
}
if (opcode == Opcode::ADDIS) {
if (inst.ra == 0) {
return Opcode::LIS;
}
return opcode;
}
if (opcode == Opcode::CRNOR) {
if (inst.crba == inst.crbb) {
return Opcode::CRNOT;
}
return opcode;
}
if (opcode == Opcode::B) {
if (inst.aa && inst.lk) {
return Opcode::BLA;
} else if (inst.lk) {
return Opcode::BL;
} else if (inst.aa) {
return Opcode::BA;
}
return opcode;
}
if (opcode == Opcode::ORI) {
if (inst.rs == 0 && inst.ra == 0 && inst.uimm16 == 0) {
return Opcode::NOP;
}
if (inst.uimm16 == 0) {
return Opcode::MR;
}
return opcode;
}
if (opcode == Opcode::ORIS) {
if (inst.rs == 0 && inst.ra == 0 && inst.uimm16 == 0) {
return Opcode::NOP;
}
return opcode;
}
if (opcode == Opcode::RLDICL) {
if (inst.sh64 == 0) {
return Opcode::CLRLDI;
}
if (inst.mbe64 == 0) {
return Opcode::ROTLDI;
}
if (inst.mbe64 == 64 - inst.sh64) {
return Opcode::SRDI;
}
return opcode;
}
if (opcode == Opcode::CMP) {
if (inst.l10) {
return Opcode::CMPD;
}
return Opcode::CMPW;
}
if (opcode == Opcode::CMPL) {
if (inst.l10) {
return Opcode::CMPLD;
}
return Opcode::CMPLW;
}
if (opcode == Opcode::NOR) {
if (inst.rs == inst.rb) {
return Opcode::NOT;
}
return opcode;
}
if (opcode == Opcode::MTOCRF) {
if (!inst.l10) {
return Opcode::MTCRF;
}
return opcode;
}
if (opcode == Opcode::MFSPR) {
auto n = (inst.spr >> 5) | ((inst.spr & 0x1f) << 5);
switch (n) {
case 1:
return Opcode::MFXER;
case 8:
return Opcode::MFLR;
case 9:
return Opcode::MFCTR;
}
return opcode;
}
if (opcode == Opcode::MFTB) {
auto n = (inst.spr >> 5) | ((inst.spr & 0x1f) << 5);
switch (n) {
case 268:
return Opcode::MFTB;
case 269:
return Opcode::MFTBU;
}
return opcode;
}
if (opcode == Opcode::OR) {
if (inst.rs == inst.rb) {
switch (inst.raw) {
case 0x7c210b78:
return Opcode::CCTPL;
case 0x7c421378:
return Opcode::CCTPM;
case 0x7c631b78:
return Opcode::CCTPH;
case 0x7f9ce378:
return Opcode::DB8CYC;
case 0x7fbdeb78:
return Opcode::DB10CYC;
case 0x7fdef378:
return Opcode::DB12CYC;
case 0x7ffffb78:
return Opcode::DB16CYC;
}
return Opcode::MR;
}
return opcode;
}
return opcode;
}

244
rx/include/rx/BitField.h Normal file
View file

@ -0,0 +1,244 @@
#pragma once
#include <cstddef>
#include <type_traits>
#ifndef _MSC_VER
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
#endif
namespace rx {
template <typename T, std::size_t N> struct BitFieldBase {
using type = T;
using vtype = std::common_type_t<type>;
using utype = std::make_unsigned_t<vtype>;
static constexpr bool can_be_packed =
N < (sizeof(int) * 8 + (std::is_unsigned_v<vtype> ? 1 : 0)) &&
sizeof(vtype) > sizeof(int);
using compact_type = std::conditional_t<
can_be_packed,
std::conditional_t<std::is_unsigned_v<vtype>, std::size_t, int>, vtype>;
// Datatype bitsize
static constexpr std::size_t bitmax = sizeof(T) * 8;
static_assert(N - 1 < bitmax, "BitFieldBase<> error: N out of bounds");
// Field bitsize
static constexpr std::size_t bitsize = N;
// All ones mask
static constexpr utype mask1 = static_cast<utype>(~static_cast<utype>(0));
// Value mask
static constexpr utype vmask = mask1 >> (bitmax - bitsize);
protected:
type m_data;
};
// Bitfield accessor (N bits from I position, 0 is LSB)
template <typename T, std::size_t I, std::size_t N>
struct BitField : BitFieldBase<T, N> {
using type = typename BitField::type;
using vtype = typename BitField::vtype;
using utype = typename BitField::utype;
using compact_type = typename BitField::compact_type;
// Field offset
static constexpr std::size_t bitpos = I;
static_assert(bitpos + N <= BitField::bitmax,
"BitField<> error: I out of bounds");
// Get bitmask of size N, at I pos
static constexpr utype data_mask() {
return static_cast<utype>(
static_cast<utype>(BitField::mask1 >>
(BitField::bitmax - BitField::bitsize))
<< bitpos);
}
// Bitfield extraction
static constexpr compact_type extract(const T &data) noexcept {
if constexpr (std::is_signed_v<T>) {
// Load signed value (sign-extended)
return static_cast<compact_type>(
static_cast<vtype>(static_cast<utype>(data)
<< (BitField::bitmax - bitpos - N)) >>
(BitField::bitmax - N));
} else {
// Load unsigned value
return static_cast<compact_type>((static_cast<utype>(data) >> bitpos) &
BitField::vmask);
}
}
// Bitfield insertion
static constexpr vtype insert(compact_type value) {
return static_cast<vtype>((value & BitField::vmask) << bitpos);
}
// Load bitfield value
constexpr operator compact_type() const noexcept {
return extract(this->m_data);
}
// Load raw data with mask applied
constexpr T unshifted() const {
return static_cast<T>(this->m_data & data_mask());
}
// Optimized bool conversion (must be removed if inappropriate)
explicit constexpr operator bool() const noexcept {
return unshifted() != 0u;
}
// Store bitfield value
BitField &operator=(compact_type value) noexcept {
this->m_data =
static_cast<vtype>((this->m_data & ~data_mask()) | insert(value));
return *this;
}
compact_type operator++(int) {
compact_type result = *this;
*this = static_cast<compact_type>(result + 1u);
return result;
}
BitField &operator++() {
return *this = static_cast<compact_type>(*this + 1u);
}
compact_type operator--(int) {
compact_type result = *this;
*this = static_cast<compact_type>(result - 1u);
return result;
}
BitField &operator--() {
return *this = static_cast<compact_type>(*this - 1u);
}
BitField &operator+=(compact_type right) {
return *this = static_cast<compact_type>(*this + right);
}
BitField &operator-=(compact_type right) {
return *this = static_cast<compact_type>(*this - right);
}
BitField &operator*=(compact_type right) {
return *this = static_cast<compact_type>(*this * right);
}
BitField &operator&=(compact_type right) {
this->m_data &= static_cast<vtype>(
((static_cast<utype>(right + 0u) & BitField::vmask) << bitpos) |
~(BitField::vmask << bitpos));
return *this;
}
BitField &operator|=(compact_type right) {
this->m_data |= static_cast<vtype>(
(static_cast<utype>(right + 0u) & BitField::vmask) << bitpos);
return *this;
}
BitField &operator^=(compact_type right) {
this->m_data ^= static_cast<vtype>(
(static_cast<utype>(right + 0u) & BitField::vmask) << bitpos);
return *this;
}
};
// Field pack (concatenated from left to right)
template <typename F = void, typename... Fields>
struct BitFieldPack
: BitFieldBase<typename F::type,
F::bitsize + BitFieldPack<Fields...>::bitsize> {
using type = typename BitFieldPack::type;
using vtype = typename BitFieldPack::vtype;
using utype = typename BitFieldPack::utype;
using compact_type = typename BitFieldPack::compact_type;
// Get disjunction of all "data" masks of concatenated values
static constexpr vtype data_mask() {
return static_cast<vtype>(F::data_mask() |
BitFieldPack<Fields...>::data_mask());
}
// Extract all bitfields and concatenate
static constexpr compact_type extract(const type &data) {
return static_cast<compact_type>(static_cast<utype>(F::extract(data))
<< BitFieldPack<Fields...>::bitsize |
BitFieldPack<Fields...>::extract(data));
}
// Split bitfields and insert them
static constexpr vtype insert(compact_type value) {
return static_cast<vtype>(
F::insert(value >> BitFieldPack<Fields...>::bitsize) |
BitFieldPack<Fields...>::insert(value));
}
// Load value
constexpr operator compact_type() const noexcept {
return extract(this->m_data);
}
// Store value
BitFieldPack &operator=(compact_type value) noexcept {
this->m_data = (this->m_data & ~data_mask()) | insert(value);
return *this;
}
};
// Empty field pack (recursion terminator)
template <> struct BitFieldPack<void> {
static constexpr std::size_t bitsize = 0;
static constexpr std::size_t data_mask() { return 0; }
template <typename T>
static constexpr auto extract(const T &) -> decltype(+T()) {
return 0;
}
template <typename T> static constexpr T insert(T /*value*/) { return 0; }
};
// Fixed field (provides constant values in field pack)
template <typename T, T V, std::size_t N>
struct BitFieldFixed : BitFieldBase<T, N> {
using type = typename BitFieldFixed::type;
using vtype = typename BitFieldFixed::vtype;
// Return constant value
static constexpr vtype extract(const type &) {
static_assert((V & BitFieldFixed::vmask) == V,
"BitFieldFixed<> error: V out of bounds");
return V;
}
// Get value
constexpr operator vtype() const noexcept { return V; }
};
} // namespace rx
template <typename T, std::size_t I, std::size_t N>
struct std::common_type<rx::BitField<T, I, N>, rx::BitField<T, I, N>>
: std::common_type<T> {};
template <typename T, std::size_t I, std::size_t N, typename T2>
struct std::common_type<rx::BitField<T, I, N>, T2>
: std::common_type<T2, std::common_type_t<T>> {};
template <typename T, std::size_t I, std::size_t N, typename T2>
struct std::common_type<T2, rx::BitField<T, I, N>>
: std::common_type<std::common_type_t<T>, T2> {};
#ifndef _MSC_VER
#pragma GCC diagnostic pop
#endif

268
rx/include/rx/BitSet.h Normal file
View file

@ -0,0 +1,268 @@
#pragma once
/*
This header implements bs_t<> class for scoped enum types (enum class).
To enable bs_t<>, enum scope must contain `__bitset_enum_max` entry.
enum class flagzz : u32
{
flag1, // Bit indices start from zero
flag2,
};
This also enables helper operators for this enum type.
Examples:
`flagzz::flag1 | flagzz::flag2` - bitset union
`flagzz::flag1 & ~flagzz::flag2` - bitset difference
Intersection (&) and symmetric difference (^) is also available.
*/
#include "refl.hpp"
#include "types.hpp"
namespace rx {
template <typename T>
concept BitSetEnum =
std::is_enum_v<T> && requires(T x) { rx::fieldCount<T> > 0; };
template <BitSetEnum T> class BitSet;
namespace detail {
template <BitSetEnum T> class InvertedBitSet final {
using underlying_type = std::underlying_type_t<T>;
underlying_type m_data;
constexpr InvertedBitSet(underlying_type data) : m_data(data) {}
friend BitSet<T>;
};
} // namespace detail
// Bitset type for enum class with available bits [0, fieldCount)
template <BitSetEnum T> class BitSet final {
public:
// Underlying type
using underlying_type = std::underlying_type_t<T>;
private:
// Underlying value
underlying_type m_data;
// Value constructor
constexpr explicit BitSet(int, underlying_type data) noexcept
: m_data(data) {}
public:
static constexpr usz bitmax = sizeof(T) * 8;
static constexpr usz bitsize =
static_cast<underlying_type>(rx::fieldCount<T>);
static_assert(std::is_enum_v<T>,
"BitSet<> error: invalid type (must be enum)");
static_assert(bitsize <= bitmax,
"BitSet<> error: failed to determine enum field count");
static_assert(bitsize != bitmax || std::is_unsigned_v<underlying_type>,
"BitSet<> error: invalid field count (sign bit)");
// Helper function
static constexpr underlying_type shift(T value) {
return static_cast<underlying_type>(1)
<< static_cast<underlying_type>(value);
}
BitSet() = default;
// Construct from a single bit
constexpr BitSet(T bit) noexcept : m_data(shift(bit)) {}
// Test for empty bitset
constexpr explicit operator bool() const noexcept { return m_data != 0; }
// Extract underlying data
constexpr explicit operator underlying_type() const noexcept {
return m_data;
}
constexpr detail::InvertedBitSet<T> operator~() const { return {m_data}; }
constexpr BitSet &operator+=(BitSet rhs) {
m_data |= static_cast<underlying_type>(rhs);
return *this;
}
constexpr BitSet &operator-=(BitSet rhs) {
m_data &= ~static_cast<underlying_type>(rhs);
return *this;
}
constexpr BitSet without(BitSet rhs) const {
BitSet result = *this;
result.m_data &= ~static_cast<underlying_type>(rhs);
return result;
}
constexpr BitSet with(BitSet rhs) const {
BitSet result = *this;
result.m_data |= static_cast<underlying_type>(rhs);
return result;
}
constexpr BitSet &operator&=(BitSet rhs) {
m_data &= static_cast<underlying_type>(rhs);
return *this;
}
constexpr BitSet &operator^=(BitSet rhs) {
m_data ^= static_cast<underlying_type>(rhs);
return *this;
}
[[deprecated("Use operator|")]] friend constexpr BitSet
operator+(BitSet lhs, BitSet rhs) {
return BitSet(0, lhs.m_data | rhs.m_data);
}
friend constexpr BitSet operator-(BitSet lhs, BitSet rhs) {
return BitSet(0, lhs.m_data & ~rhs.m_data);
}
friend constexpr BitSet operator|(BitSet lhs, BitSet rhs) {
return BitSet(0, lhs.m_data | rhs.m_data);
}
friend constexpr BitSet operator&(BitSet lhs, BitSet rhs) {
return BitSet(0, lhs.m_data & rhs.m_data);
}
friend constexpr BitSet operator&(BitSet lhs, detail::InvertedBitSet<T> rhs) {
return BitSet(0, lhs.m_data & rhs.m_data);
}
friend constexpr BitSet operator^(BitSet lhs, BitSet rhs) {
return BitSet(0, lhs.m_data ^ rhs.m_data);
}
constexpr bool operator==(BitSet rhs) const noexcept {
return m_data == rhs.m_data;
}
constexpr bool test_and_set(T bit) {
bool r = (m_data & shift(bit)) != 0;
m_data |= shift(bit);
return r;
}
constexpr bool test_and_reset(T bit) {
bool r = (m_data & shift(bit)) != 0;
m_data &= ~shift(bit);
return r;
}
constexpr bool test_and_complement(T bit) {
bool r = (m_data & shift(bit)) != 0;
m_data ^= shift(bit);
return r;
}
constexpr bool any_of(BitSet arg) const { return (m_data & arg.m_data) != 0; }
constexpr bool all_of(BitSet arg) const {
return (m_data & arg.m_data) == arg.m_data;
}
constexpr bool none_of(BitSet arg) const {
return (m_data & arg.m_data) == 0;
}
};
namespace bitset {
// Unary '+' operator: promote plain enum value to bitset value
template <BitSetEnum T>
[[deprecated("Use toBitSet(bit)")]] constexpr BitSet<T> operator+(T bit) {
return BitSet<T>(bit);
}
template <BitSetEnum T> constexpr BitSet<T> toBitSet(T bit) {
return BitSet<T>(bit);
}
// Binary '+' operator: bitset union
template <BitSetEnum T, typename U>
requires(std::is_constructible_v<BitSet<T>, U>)
[[deprecated("Use operator|")]] constexpr BitSet<T> operator+(T lhs,
const U &rhs) {
return BitSet<T>(lhs) | BitSet<T>(rhs);
}
// Binary '+' operator: bitset union
template <typename U, BitSetEnum T>
requires(std::is_constructible_v<BitSet<T>, U> && !std::is_enum_v<U>)
[[deprecated("Use operator|")]] constexpr BitSet<T> operator+(const U &lhs,
T rhs) {
return BitSet<T>(lhs) | BitSet<T>(rhs);
}
// Binary '|' operator: bitset union
template <BitSetEnum T, typename U>
requires(std::is_constructible_v<BitSet<T>, U>)
constexpr BitSet<T> operator|(T lhs, const U &rhs) {
return BitSet<T>(lhs) | BitSet<T>(rhs);
}
// Binary '|' operator: bitset union
template <typename U, BitSetEnum T>
requires(std::is_constructible_v<BitSet<T>, U> && !std::is_enum_v<U>)
constexpr BitSet<T> operator|(const U &lhs, T rhs) {
return BitSet<T>(lhs) | BitSet<T>(rhs);
}
// Binary '-' operator: bitset difference
template <BitSetEnum T, typename U>
requires(std::is_constructible_v<BitSet<T>, U>)
constexpr BitSet<T> operator-(T lhs, const U &rhs) {
return BitSet<T>(lhs) - BitSet<T>(rhs);
}
// Binary '-' operator: bitset difference
template <typename U, BitSetEnum T>
requires(std::is_constructible_v<BitSet<T>, U> && !std::is_enum_v<U>)
constexpr BitSet<T> operator-(const U &lhs, T rhs) {
return BitSet<T>(lhs) - BitSet<T>(rhs);
}
// Binary '&' operator: bitset intersection
template <BitSetEnum T, typename U>
requires(std::is_constructible_v<BitSet<T>, U>)
constexpr BitSet<T> operator&(T lhs, const U &rhs) {
return BitSet<T>(lhs) & BitSet<T>(rhs);
}
// Binary '&' operator: bitset intersection
template <typename U, BitSetEnum T>
requires(std::is_constructible_v<BitSet<T>, U> && !std::is_enum_v<U>)
constexpr BitSet<T> operator&(const U &lhs, T rhs) {
return BitSet<T>(lhs) & BitSet<T>(rhs);
}
// Binary '&' operator: bitset intersection
template <BitSetEnum T, typename U>
constexpr BitSet<T> operator&(T lhs, detail::InvertedBitSet<T> rhs) {
return BitSet<T>(lhs) & rhs;
}
// Binary '^' operator: bitset symmetric difference
template <BitSetEnum T, typename U>
requires(std::is_constructible_v<BitSet<T>, U>)
constexpr BitSet<T> operator^(T lhs, const U &rhs) {
return BitSet<T>(lhs) ^ BitSet<T>(rhs);
}
// Binary '^' operator: bitset symmetric difference
template <typename U, BitSetEnum T>
requires(std::is_constructible_v<BitSet<T>, U> && !std::is_enum_v<U>)
constexpr BitSet<T> operator^(const U &lhs, T rhs) {
return BitSet<T>(lhs) ^ BitSet<T>(rhs);
}
} // namespace bitset
} // namespace rx
using namespace rx::bitset;

358
rx/include/rx/asm.hpp Normal file
View file

@ -0,0 +1,358 @@
#pragma once
#include "types.hpp"
#include <atomic>
extern bool g_use_rtm;
extern u64 g_rtm_tx_limit1;
#ifdef _M_X64
#ifdef _MSC_VER
extern "C" {
u32 _xbegin();
void _xend();
void _mm_pause();
void _mm_prefetch(const char *, int);
void _m_prefetchw(const volatile void *);
uchar _rotl8(uchar, uchar);
ushort _rotl16(ushort, uchar);
u64 __popcnt64(u64);
s64 __mulh(s64, s64);
u64 __umulh(u64, u64);
s64 _div128(s64, s64, s64, s64 *);
u64 _udiv128(u64, u64, u64, u64 *);
void __debugbreak();
}
#include <intrin.h>
#else
#include <immintrin.h>
#endif
#endif
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
namespace rx {
// Try to prefetch to Level 2 cache since it's not split to data/code on most
// processors
template <typename T> constexpr void prefetch_exec(T func) {
if (std::is_constant_evaluated()) {
return;
}
const u64 value = reinterpret_cast<u64>(func);
const void *ptr = reinterpret_cast<const void *>(value);
#ifdef _M_X64
return _mm_prefetch(static_cast<const char *>(ptr), _MM_HINT_T1);
#else
return __builtin_prefetch(ptr, 0, 2);
#endif
}
// Try to prefetch to Level 1 cache
constexpr void prefetch_read(const void *ptr) {
if (std::is_constant_evaluated()) {
return;
}
#ifdef _M_X64
return _mm_prefetch(static_cast<const char *>(ptr), _MM_HINT_T0);
#else
return __builtin_prefetch(ptr, 0, 3);
#endif
}
constexpr void prefetch_write(void *ptr) {
if (std::is_constant_evaluated()) {
return;
}
#if defined(_M_X64) && !defined(__clang__)
return _m_prefetchw(ptr);
#else
return __builtin_prefetch(ptr, 1, 0);
#endif
}
constexpr u8 rol8(u8 x, u8 n) {
if (std::is_constant_evaluated()) {
return (x << (n & 7)) | (x >> ((-n & 7)));
}
#ifdef _MSC_VER
return _rotl8(x, n);
#elif defined(__clang__)
return __builtin_rotateleft8(x, n);
#elif defined(ARCH_X64)
return __builtin_ia32_rolqi(x, n);
#else
return (x << (n & 7)) | (x >> ((-n & 7)));
#endif
}
constexpr u16 rol16(u16 x, u16 n) {
if (std::is_constant_evaluated()) {
return (x << (n & 15)) | (x >> ((-n & 15)));
}
#ifdef _MSC_VER
return _rotl16(x, static_cast<uchar>(n));
#elif defined(__clang__)
return __builtin_rotateleft16(x, n);
#elif defined(ARCH_X64)
return __builtin_ia32_rolhi(x, n);
#else
return (x << (n & 15)) | (x >> ((-n & 15)));
#endif
}
constexpr u32 rol32(u32 x, u32 n) {
if (std::is_constant_evaluated()) {
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
}
#ifdef _MSC_VER
return _rotl(x, n);
#elif defined(__clang__)
return __builtin_rotateleft32(x, n);
#else
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
#endif
}
constexpr u64 rol64(u64 x, u64 n) {
if (std::is_constant_evaluated()) {
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
}
#ifdef _MSC_VER
return _rotl64(x, static_cast<int>(n));
#elif defined(__clang__)
return __builtin_rotateleft64(x, n);
#else
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
#endif
}
constexpr u32 popcnt64(u64 v) {
#if !defined(_MSC_VER) || defined(__SSE4_2__)
if (std::is_constant_evaluated())
#endif
{
v = (v & 0xaaaaaaaaaaaaaaaa) / 2 + (v & 0x5555555555555555);
v = (v & 0xcccccccccccccccc) / 4 + (v & 0x3333333333333333);
v = (v & 0xf0f0f0f0f0f0f0f0) / 16 + (v & 0x0f0f0f0f0f0f0f0f);
v = (v & 0xff00ff00ff00ff00) / 256 + (v & 0x00ff00ff00ff00ff);
v = ((v & 0xffff0000ffff0000) >> 16) + (v & 0x0000ffff0000ffff);
return static_cast<u32>((v >> 32) + v);
}
#if !defined(_MSC_VER) || defined(__SSE4_2__)
#ifdef _MSC_VER
return static_cast<u32>(__popcnt64(v));
#else
return __builtin_popcountll(v);
#endif
#endif
}
constexpr u32 popcnt128(const u128 &v) {
#ifdef _MSC_VER
return popcnt64(v.lo) + popcnt64(v.hi);
#else
return popcnt64(v) + popcnt64(v >> 64);
#endif
}
constexpr u64 umulh64(u64 x, u64 y) {
#ifdef _MSC_VER
if (std::is_constant_evaluated())
#endif
{
return static_cast<u64>((u128{x} * u128{y}) >> 64);
}
#ifdef _MSC_VER
return __umulh(x, y);
#endif
}
inline s64 mulh64(s64 x, s64 y) {
#ifdef _MSC_VER
return __mulh(x, y);
#else
return (s128{x} * s128{y}) >> 64;
#endif
}
inline s64 div128(s64 high, s64 low, s64 divisor, s64 *remainder = nullptr) {
#ifdef _MSC_VER
s64 rem = 0;
s64 r = _div128(high, low, divisor, &rem);
if (remainder) {
*remainder = rem;
}
#else
const s128 x = (u128{static_cast<u64>(high)} << 64) | u64(low);
const s128 r = x / divisor;
if (remainder) {
*remainder = x % divisor;
}
#endif
return r;
}
inline u64 udiv128(u64 high, u64 low, u64 divisor, u64 *remainder = nullptr) {
#ifdef _MSC_VER
u64 rem = 0;
u64 r = _udiv128(high, low, divisor, &rem);
if (remainder) {
*remainder = rem;
}
#else
const u128 x = (u128{high} << 64) | low;
const u128 r = x / divisor;
if (remainder) {
*remainder = x % divisor;
}
#endif
return r;
}
#ifdef _MSC_VER
inline u128 operator/(u128 lhs, u64 rhs) {
u64 rem = 0;
return _udiv128(lhs.hi, lhs.lo, rhs, &rem);
}
#endif
constexpr u32 ctz128(u128 arg) {
#ifdef _MSC_VER
if (!arg.lo)
return std::countr_zero(arg.hi) + 64u;
else
return std::countr_zero(arg.lo);
#else
if (u64 lo = static_cast<u64>(arg))
return std::countr_zero<u64>(lo);
else
return std::countr_zero<u64>(arg >> 64) + 64;
#endif
}
constexpr u32 clz128(u128 arg) {
#ifdef _MSC_VER
if (arg.hi)
return std::countl_zero(arg.hi);
else
return std::countl_zero(arg.lo) + 64;
#else
if (u64 hi = static_cast<u64>(arg >> 64))
return std::countl_zero<u64>(hi);
else
return std::countl_zero<u64>(arg) + 64;
#endif
}
inline void pause() {
#if defined(ARCH_ARM64)
__asm__ volatile("yield");
#elif defined(_M_X64)
_mm_pause();
#elif defined(ARCH_X64)
__builtin_ia32_pause();
#else
#error "Missing pause() implementation"
#endif
}
// Align to power of 2
template <typename T, typename U>
requires std::is_unsigned_v<T>
constexpr std::make_unsigned_t<std::common_type_t<T, U>> align(T value,
U align) {
return static_cast<std::make_unsigned_t<std::common_type_t<T, U>>>(
(value + (align - 1)) & (T{0} - align));
}
// General purpose aligned division, the result is rounded up not truncated
template <typename T>
requires std::is_unsigned_v<T>
constexpr T aligned_div(T value, std::type_identity_t<T> align) {
return static_cast<T>(value / align + T{!!(value % align)});
}
// General purpose aligned division, the result is rounded to nearest
template <typename T>
requires std::is_integral_v<T>
constexpr T rounded_div(T value, std::type_identity_t<T> align) {
if constexpr (std::is_unsigned_v<T>) {
return static_cast<T>(value / align + T{(value % align) > (align / 2)});
}
return static_cast<T>(value / align +
(value > 0 ? T{(value % align) > (align / 2)}
: 0 - T{(value % align) < (align / 2)}));
}
// Multiplying by ratio, semi-resistant to overflows
template <UnsignedInt T>
constexpr T rational_mul(T value, std::type_identity_t<T> numerator,
std::type_identity_t<T> denominator) {
if constexpr (sizeof(T) <= sizeof(u64) / 2) {
return static_cast<T>(value * u64{numerator} / u64{denominator});
}
#if is_u128_emulated
if constexpr (sizeof(T) <= sizeof(u128) / 2) {
return static_cast<T>(u128_from_mul(value, numerator) / u64{denominator});
}
#endif
return static_cast<T>(value / denominator * numerator +
(value % denominator) * numerator / denominator);
}
template <UnsignedInt T> constexpr T add_saturate(T addend1, T addend2) {
return static_cast<T>(~addend1) < addend2 ? T{umax}
: static_cast<T>(addend1 + addend2);
}
template <UnsignedInt T> constexpr T sub_saturate(T minuend, T subtrahend) {
return minuend < subtrahend ? T{0} : static_cast<T>(minuend - subtrahend);
}
template <UnsignedInt T> constexpr T mul_saturate(T factor1, T factor2) {
return factor1 > 0 && T{umax} / factor1 < factor2
? T{umax}
: static_cast<T>(factor1 * factor2);
}
inline void trigger_write_page_fault(void *ptr) {
#if defined(ARCH_X64) && !defined(_MSC_VER)
__asm__ volatile("lock orl $0, 0(%0)" ::"r"(ptr));
#elif defined(ARCH_ARM64) && !defined(ANDROID)
u32 value = 0;
u32 *u32_ptr = static_cast<u32 *>(ptr);
__asm__ volatile("ldset %w0, %w0, %1"
: "+r"(value), "=Q"(*u32_ptr)
: "r"(value));
#else
static_cast<std::atomic<std::uint32_t> *>(ptr)->fetch_or(
0, std::memory_order::relaxed);
#endif
}
} // namespace rx
#ifdef _MSC_VER
using rx::operator/;
#endif

View file

@ -218,13 +218,15 @@ struct std::formatter<T> {
std::string fieldName;
auto underlying = std::to_underlying(value);
// FIXME: requires C++23
// auto underlying = std::to_underlying(value);
auto underlying = static_cast<int>(value);
if (underlying < 0) {
fieldName = queryUnknownField(
underlying, std::integral_constant<std::int64_t, 0>{},
std::make_integer_sequence<std::int64_t, 128>{});
} else if (underlying >= rx::fieldCount<T>) {
} else if (static_cast<std::size_t>(underlying) >= rx::fieldCount<T>) {
fieldName = queryUnknownField(
underlying, std::integral_constant<std::int64_t, rx::fieldCount<T>>{},
std::make_integer_sequence<std::int64_t, 128>{});

2236
rx/include/rx/simd.hpp Normal file

File diff suppressed because it is too large Load diff

1522
rx/include/rx/types.hpp Normal file

File diff suppressed because it is too large Load diff

187
rx/include/rx/v128.hpp Normal file
View file

@ -0,0 +1,187 @@
#pragma once // No BOM and only basic ASCII in this header, or a neko will die
#include "types.hpp"
namespace rx {
template <typename T>
concept Vector128 = (sizeof(T) == 16) && (std::is_trivial_v<T>);
// 128-bit vector type
union alignas(16) v128 {
using enable_bitcopy = std::true_type; // FIXME: remove
uchar _bytes[16];
char _chars[16];
template <typename T, usz N, usz M>
struct masked_array_t // array type accessed as (index ^ M)
{
T data[N];
T &operator[](usz index) { return data[index ^ M]; }
const T &operator[](usz index) const { return data[index ^ M]; }
};
template <typename T, usz N = 16 / sizeof(T)>
using normal_array_t =
masked_array_t<T, N,
std::endian::little == std::endian::native ? 0 : N - 1>;
template <typename T, usz N = 16 / sizeof(T)>
using reversed_array_t =
masked_array_t<T, N,
std::endian::little == std::endian::native ? N - 1 : 0>;
normal_array_t<u64> _u64;
normal_array_t<s64> _s64;
reversed_array_t<u64> u64r;
reversed_array_t<s64> s64r;
normal_array_t<u32> _u32;
normal_array_t<s32> _s32;
reversed_array_t<u32> u32r;
reversed_array_t<s32> s32r;
normal_array_t<u16> _u16;
normal_array_t<s16> _s16;
reversed_array_t<u16> u16r;
reversed_array_t<s16> s16r;
normal_array_t<u8> _u8;
normal_array_t<s8> _s8;
reversed_array_t<u8> u8r;
reversed_array_t<s8> s8r;
normal_array_t<f32> _f;
normal_array_t<f64> _d;
reversed_array_t<f32> fr;
reversed_array_t<f64> dr;
u128 _u;
s128 _s;
v128() = default;
constexpr v128(const v128 &) noexcept = default;
template <Vector128 T>
constexpr v128(const T &rhs) noexcept : v128(std::bit_cast<v128>(rhs)) {}
constexpr v128 &operator=(const v128 &) noexcept = default;
template <Vector128 T> constexpr operator T() const noexcept {
return std::bit_cast<T>(*this);
}
static v128 from64(u64 _0, u64 _1 = 0) {
v128 ret;
ret._u64[0] = _0;
ret._u64[1] = _1;
return ret;
}
static v128 from64r(u64 _1, u64 _0 = 0) { return from64(_0, _1); }
static v128 from64p(u64 value) {
v128 ret;
ret._u64[0] = value;
ret._u64[1] = value;
return ret;
}
static v128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0) {
v128 ret;
ret._u32[0] = _0;
ret._u32[1] = _1;
ret._u32[2] = _2;
ret._u32[3] = _3;
return ret;
}
static v128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0) {
return from32(_0, _1, _2, _3);
}
static v128 from32p(u32 value) {
v128 ret;
ret._u32[0] = value;
ret._u32[1] = value;
ret._u32[2] = value;
ret._u32[3] = value;
return ret;
}
static v128 fromf32p(f32 value) {
v128 ret;
ret._f[0] = value;
ret._f[1] = value;
ret._f[2] = value;
ret._f[3] = value;
return ret;
}
static v128 from16p(u16 value) {
v128 ret;
ret._u16[0] = value;
ret._u16[1] = value;
ret._u16[2] = value;
ret._u16[3] = value;
ret._u16[4] = value;
ret._u16[5] = value;
ret._u16[6] = value;
ret._u16[7] = value;
return ret;
}
static v128 from8p(u8 value) {
v128 ret;
std::memset(&ret, value, sizeof(ret));
return ret;
}
static v128 undef() {
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wuninitialized"
#elif _MSC_VER
#pragma warning(push)
#pragma warning(disable : 6001)
#endif
v128 ret;
return ret;
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#elif _MSC_VER
#pragma warning(pop)
#endif
}
// Unaligned load with optional index offset
static v128 loadu(const void *ptr, usz index = 0) {
v128 ret;
std::memcpy(&ret, static_cast<const u8 *>(ptr) + index * sizeof(v128),
sizeof(v128));
return ret;
}
// Unaligned store with optional index offset
static void storeu(v128 value, void *ptr, usz index = 0) {
std::memcpy(static_cast<u8 *>(ptr) + index * sizeof(v128), &value,
sizeof(v128));
}
v128 operator|(const v128 &) const;
v128 operator&(const v128 &) const;
v128 operator^(const v128 &) const;
v128 operator~() const;
bool operator==(const v128 &right) const;
void clear() { *this = {}; }
};
} // namespace rx
template <> struct std::hash<rx::v128> {
usz operator()(const rx::v128 &key) const {
return key._u64[0] ^ (key._u64[1] << 1);
}
};

View file

@ -5,12 +5,47 @@
#include <thread>
#include <vector>
#ifdef __GNUC__
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#ifdef _WIN32
#include <windows.h>
#else
#ifdef __linux__
#include <linux/limits.h>
#include <sys/ptrace.h>
#endif
#include <unistd.h>
#endif
bool rx::isDebuggerPresent() {
#ifdef _WIN32
return ::IsDebuggerPresent();
#elif defined(__APPLE__) || defined(__DragonFly__) || defined(__FreeBSD__) || \
defined(__NetBSD__) || defined(__OpenBSD__)
int mib[] = {
CTL_KERN,
KERN_PROC,
KERN_PROC_PID,
getpid(),
#if defined(__NetBSD__) || defined(__OpenBSD__)
sizeof(struct kinfo_proc),
1,
#endif
};
u_int miblen = std::size(mib);
struct kinfo_proc info;
usz size = sizeof(info);
if (sysctl(mib, miblen, &info, &size, NULL, 0)) {
return false;
}
return info.KP_FLAGS & P_TRACED;
#elif defined(__linux__)
std::ifstream in("/proc/self/status");
std::string line;
while (std::getline(in, line)) {
@ -30,6 +65,7 @@ bool rx::isDebuggerPresent() {
}
return false;
#endif
}
void rx::waitForDebugger() {
@ -49,6 +85,7 @@ void rx::waitForDebugger() {
}
void rx::runDebugger() {
#ifdef __linux__
int pid = ::getpid();
char path[PATH_MAX];
::readlink("/proc/self/exe", path, sizeof(path));
@ -78,19 +115,20 @@ void rx::runDebugger() {
argv.push_back(nullptr);
execv(gdbPath, (char **)argv.data());
}
#else
bool rx::isDebuggerPresent() { return false; }
void rx::waitForDebugger() {}
void rx::runDebugger() {}
#endif
}
void rx::breakpoint() {
#if __has_builtin(__builtin_debugtrap)
__builtin_debugtrap();
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
#elif defined(__GNUC__)
#if defined(__i386__) || defined(__x86_64__)
__asm__ volatile("int3");
#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64)
__asm__ volatile("brk 0x42");
#endif
#elif defined(_M_X64)
__debugbreak();
#endif
}

View file

@ -1,4 +1,7 @@
#include "mem.hpp"
#ifdef __linux__
#include <cstdio>
#include <print>
#include <sys/mman.h>
@ -44,3 +47,4 @@ void rx::mem::printStats() {
free(line);
fclose(maps);
}
#endif