diff --git a/.gitignore b/.gitignore
index 8daea8f80..00fb6fe69 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@
/ipch
/rpcs3/Debug
/rpcs3/Release
+/llvm_build
/wxWidgets/lib
/bin/rpcs3.ini
diff --git a/.gitmodules b/.gitmodules
index a45f06520..fb181e64b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -8,3 +8,7 @@
[submodule "asmjit"]
path = asmjit
url = https://github.com/kobalicekp/asmjit
+[submodule "llvm"]
+ path = llvm
+ url = https://github.com/llvm-mirror/llvm.git
+ branch = release_35
diff --git a/.travis.yml b/.travis.yml
index 90ddcfd4d..5b47d8dd7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -27,7 +27,12 @@ before_install:
sudo ./cmake-3.0.0-Linux-i386.sh --skip-license --prefix=/usr;
before_script:
- - git submodule update --init asmjit ffmpeg
+ - git submodule update --init asmjit ffmpeg llvm
+ - cd llvm_build
+ - cmake -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_RUNTIME=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_DOCS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_INCLUDE_UTILS=OFF -DWITH_POLLY=OFF ../llvm
+ - make -j 4
+ - sudo make install
+ - cd ..
- mkdir build
- cd build
- cmake ..
diff --git a/Utilities/BEType.h b/Utilities/BEType.h
index abd557c89..1bbc0a811 100644
--- a/Utilities/BEType.h
+++ b/Utilities/BEType.h
@@ -1,6 +1,6 @@
#pragma once
-union u128
+union _CRT_ALIGN(16) u128
{
u64 _u64[2];
s64 _s64[2];
diff --git a/llvm b/llvm
new file mode 160000
index 000000000..f55c17bc3
--- /dev/null
+++ b/llvm
@@ -0,0 +1 @@
+Subproject commit f55c17bc3395fa7fdbd997d751a55de5228ebd77
diff --git a/llvm_build/llvm_build.vcxproj b/llvm_build/llvm_build.vcxproj
new file mode 100644
index 000000000..ceba2a711
--- /dev/null
+++ b/llvm_build/llvm_build.vcxproj
@@ -0,0 +1,68 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ {8BC303AB-25BE-4276-8E57-73F171B2D672}
+ MakeFileProj
+
+
+
+ Makefile
+ true
+ v120
+
+
+ Makefile
+ false
+ v120
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ cmake -G "Visual Studio 12 2013 Win64" -DCMAKE_CONFIGURATION_TYPES="Debug;Release" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_RUNTIME=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_DOCS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_INCLUDE_UTILS=OFF -DWITH_POLLY=OFF ../llvm
+msbuild.exe ALL_BUILD.vcxproj /t:build /p:Configuration=Debug
+
+ cmake -G "Visual Studio 12 2013 Win64" -DCMAKE_CONFIGURATION_TYPES="Debug;Release" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_RUNTIME=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_DOCS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_INCLUDE_UTILS=OFF -DWITH_POLLY=OFF ../llvm
+msbuild.exe ALL_BUILD.vcxproj /t:rebuild /p:Configuration=Debug
+
+ cmake -G "Visual Studio 12 2013 Win64" -DCMAKE_CONFIGURATION_TYPES="Debug;Release" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_RUNTIME=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_DOCS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_INCLUDE_UTILS=OFF -DWITH_POLLY=OFF ../llvm
+msbuild.exe ALL_BUILD.vcxproj /t:clean /p:Configuration=Debug
+
+
+
+
+ cmake -G "Visual Studio 12 2013 Win64" -DCMAKE_CONFIGURATION_TYPES="Debug;Release" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_RUNTIME=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_DOCS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_INCLUDE_UTILS=OFF -DWITH_POLLY=OFF ../llvm
+msbuild.exe ALL_BUILD.vcxproj /t:build /p:Configuration=Release
+
+ cmake -G "Visual Studio 12 2013 Win64" -DCMAKE_CONFIGURATION_TYPES="Debug;Release" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_RUNTIME=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_DOCS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_INCLUDE_UTILS=OFF -DWITH_POLLY=OFF ../llvm
+msbuild.exe ALL_BUILD.vcxproj /t:rebuild /p:Configuration=Release
+
+ cmake -G "Visual Studio 12 2013 Win64" -DCMAKE_CONFIGURATION_TYPES="Debug;Release" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_RUNTIME=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_DOCS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_INCLUDE_UTILS=OFF -DWITH_POLLY=OFF ../llvm
+msbuild.exe ALL_BUILD.vcxproj /t:clean /p:Configuration=Release
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/llvm_build/llvm_build.vcxproj.filters b/llvm_build/llvm_build.vcxproj.filters
new file mode 100644
index 000000000..6a1782f7d
--- /dev/null
+++ b/llvm_build/llvm_build.vcxproj.filters
@@ -0,0 +1,17 @@
+
+
+
+
+ {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
+ cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
+
+
+ {93995380-89BD-4b04-88EB-625FBE52EBFB}
+ h;hh;hpp;hxx;hm;inl;inc;xsd
+
+
+ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
\ No newline at end of file
diff --git a/rpcs3.sln b/rpcs3.sln
index 24c63c95c..473c4354c 100644
--- a/rpcs3.sln
+++ b/rpcs3.sln
@@ -1,6 +1,6 @@
Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 2013
-VisualStudioVersion = 12.0.21005.1
+# Visual Studio Express 2013 for Windows Desktop
+VisualStudioVersion = 12.0.30723.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rpcs3", "rpcs3\rpcs3.vcxproj", "{70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}"
ProjectSection(ProjectDependencies) = postProject
@@ -138,6 +138,13 @@ EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "asmjit", "asmjit", "{E2A982F2-4B1A-48B1-8D77-A17A589C58D7}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "emucore", "rpcs3\emucore.vcxproj", "{C4A10229-4712-4BD2-B63E-50D93C67A038}"
+ ProjectSection(ProjectDependencies) = postProject
+ {8BC303AB-25BE-4276-8E57-73F171B2D672} = {8BC303AB-25BE-4276-8E57-73F171B2D672}
+ EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "llvm", "llvm", "{C8068CE9-D626-4FEA-BEE7-893F06A25BF3}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "llvm_build", "llvm_build\llvm_build.vcxproj", "{8BC303AB-25BE-4276-8E57-73F171B2D672}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -297,6 +304,12 @@ Global
{C4A10229-4712-4BD2-B63E-50D93C67A038}.Debug|x64.Build.0 = Debug|x64
{C4A10229-4712-4BD2-B63E-50D93C67A038}.Release|x64.ActiveCfg = Release|x64
{C4A10229-4712-4BD2-B63E-50D93C67A038}.Release|x64.Build.0 = Release|x64
+ {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug - MemLeak|x64.ActiveCfg = Debug|x64
+ {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug - MemLeak|x64.Build.0 = Debug|x64
+ {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug|x64.ActiveCfg = Debug|x64
+ {8BC303AB-25BE-4276-8E57-73F171B2D672}.Debug|x64.Build.0 = Debug|x64
+ {8BC303AB-25BE-4276-8E57-73F171B2D672}.Release|x64.ActiveCfg = Release|x64
+ {8BC303AB-25BE-4276-8E57-73F171B2D672}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -325,5 +338,6 @@ Global
{23E1C437-A951-5943-8639-A17F3CF2E606} = {5812E712-6213-4372-B095-9EB9BAA1F2DF}
{74827EBD-93DC-5110-BA95-3F2AB029B6B0} = {5812E712-6213-4372-B095-9EB9BAA1F2DF}
{AC40FF01-426E-4838-A317-66354CEFAE88} = {E2A982F2-4B1A-48B1-8D77-A17A589C58D7}
+ {8BC303AB-25BE-4276-8E57-73F171B2D672} = {C8068CE9-D626-4FEA-BEE7-893F06A25BF3}
EndGlobalSection
EndGlobal
diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt
index 59f146c6d..b6b79361c 100644
--- a/rpcs3/CMakeLists.txt
+++ b/rpcs3/CMakeLists.txt
@@ -55,6 +55,7 @@ find_package(GLEW REQUIRED)
find_package(OpenGL REQUIRED)
find_package(ZLIB REQUIRED)
find_package(OpenAL REQUIRED)
+find_package(LLVM REQUIRED CONFIG)
include("${wxWidgets_USE_FILE}")
@@ -69,6 +70,7 @@ endif()
include_directories(
${wxWidgets_INCLUDE_DIRS}
${OPENAL_INCLUDE_DIR}
+${LLVM_INCLUDE_DIRS}
"${RPCS3_SRC_DIR}/../ffmpeg/${PLATFORM_ARCH}/include"
"${RPCS3_SRC_DIR}"
"${RPCS3_SRC_DIR}/Loader"
@@ -77,6 +79,9 @@ ${OPENAL_INCLUDE_DIR}
"${RPCS3_SRC_DIR}/../asmjit/src/asmjit"
)
+add_definitions(${LLVM_DEFINITIONS})
+llvm_map_components_to_libnames(LLVM_LIBS jit vectorize x86codegen x86disassembler)
+
link_directories("${RPCS3_SRC_DIR}/../ffmpeg/${PLATFORM_ARCH}/lib")
get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
@@ -97,10 +102,13 @@ RPCS3_SRC
"${RPCS3_SRC_DIR}/../Utilities/*"
)
+list(REMOVE_ITEM RPCS3_SRC ${RPCS3_SRC_DIR}/../Utilities/simpleini/ConvertUTF.c)
+set_source_files_properties(${RPCS3_SRC_DIR}/Emu/Cell/PPULLVMRecompiler.cpp PROPERTIES COMPILE_FLAGS -fno-rtti)
+
add_executable(rpcs3 ${RPCS3_SRC})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -L${CMAKE_CURRENT_BINARY_DIR}/../asmjit/") #hack because the asmjit cmake file force fno exceptions
-target_link_libraries(rpcs3 asmjit.a ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${GLEW_LIBRARY} ${OPENGL_LIBRARIES} libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a ${ZLIB_LIBRARIES})
+target_link_libraries(rpcs3 asmjit.a ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${GLEW_LIBRARY} ${OPENGL_LIBRARIES} libavformat.a libavcodec.a libavutil.a libswresample.a libswscale.a ${ZLIB_LIBRARIES} ${LLVM_LIBS})
set_target_properties(rpcs3 PROPERTIES COTIRE_CXX_PREFIX_HEADER_INIT "${RPCS3_SRC_DIR}/stdafx.h")
cotire(rpcs3)
diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h
index 2e8fda1d9..cdb7a2d2c 100644
--- a/rpcs3/Emu/Cell/PPUInterpreter.h
+++ b/rpcs3/Emu/Cell/PPUInterpreter.h
@@ -1,6 +1,12 @@
#pragma once
#include "Emu/Cell/PPUOpcodes.h"
+#include "Emu/SysCalls/SysCalls.h"
+#include "rpcs3/Ini.h"
+#include "Emu/System.h"
+#include "Emu/SysCalls/Static.h"
+#include "Emu/SysCalls/Modules.h"
+#include "Emu/Memory/Memory.h"
#include "Emu/SysCalls/lv2/sys_time.h"
#include
@@ -51,6 +57,7 @@ u64 rotr64(const u64 x, const u8 n) { return (x >> n) | (x << (64 - n)); }
class PPUInterpreter : public PPUOpcodes
{
+ friend class PPULLVMRecompiler;
private:
PPUThread& CPU;
diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp
new file mode 100644
index 000000000..5d66f575a
--- /dev/null
+++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp
@@ -0,0 +1,4988 @@
+#include "stdafx.h"
+#include "Utilities/Log.h"
+#include "Emu/Cell/PPULLVMRecompiler.h"
+#include "Emu/Memory/Memory.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/MC/MCDisassembler.h"
+
+using namespace llvm;
+
+u64 PPULLVMRecompiler::s_rotate_mask[64][64];
+bool PPULLVMRecompiler::s_rotate_mask_inited = false;
+
+PPULLVMRecompiler::PPULLVMRecompiler()
+ : ThreadBase("PPULLVMRecompiler")
+ , m_revision(0) {
+ InitializeNativeTarget();
+ InitializeNativeTargetAsmPrinter();
+ InitializeNativeTargetDisassembler();
+
+ m_llvm_context = new LLVMContext();
+ m_ir_builder = new IRBuilder<>(*m_llvm_context);
+ m_module = new llvm::Module("Module", *m_llvm_context);
+ m_fpm = new FunctionPassManager(m_module);
+
+ EngineBuilder engine_builder(m_module);
+ engine_builder.setMCPU(sys::getHostCPUName());
+ engine_builder.setEngineKind(EngineKind::JIT);
+ engine_builder.setOptLevel(CodeGenOpt::Default);
+ m_execution_engine = engine_builder.create();
+
+ m_fpm->add(new DataLayoutPass(m_module));
+ m_fpm->add(createNoAAPass());
+ m_fpm->add(createBasicAliasAnalysisPass());
+ m_fpm->add(createNoTargetTransformInfoPass());
+ m_fpm->add(createEarlyCSEPass());
+ m_fpm->add(createTailCallEliminationPass());
+ m_fpm->add(createReassociatePass());
+ m_fpm->add(createInstructionCombiningPass());
+ m_fpm->add(new DominatorTreeWrapperPass());
+ m_fpm->add(new MemoryDependenceAnalysis());
+ m_fpm->add(createGVNPass());
+ m_fpm->add(createInstructionCombiningPass());
+ m_fpm->add(new MemoryDependenceAnalysis());
+ m_fpm->add(createDeadStoreEliminationPass());
+ m_fpm->add(new LoopInfo());
+ m_fpm->add(new ScalarEvolution());
+ m_fpm->add(createSLPVectorizerPass());
+ m_fpm->add(createInstructionCombiningPass());
+ m_fpm->add(createCFGSimplificationPass());
+ m_fpm->doInitialization();
+
+ if (!s_rotate_mask_inited) {
+ InitRotateMask();
+ s_rotate_mask_inited = true;
+ }
+}
+
+PPULLVMRecompiler::~PPULLVMRecompiler() {
+ Stop();
+
+ delete m_execution_engine;
+ delete m_fpm;
+ delete m_ir_builder;
+ delete m_llvm_context;
+}
+
+std::pair PPULLVMRecompiler::GetExecutable(u32 address) {
+ std::lock_guard lock(m_compiled_shared_lock);
+
+ auto compiled = m_compiled_shared.lower_bound(std::make_pair(address, 0));
+ if (compiled != m_compiled_shared.end() && compiled->first.first == address) {
+ compiled->second.second++;
+ return std::make_pair(compiled->second.first, compiled->first.second);
+ }
+
+ return std::make_pair(nullptr, 0);
+}
+
+void PPULLVMRecompiler::ReleaseExecutable(u32 address, u32 revision) {
+ std::lock_guard lock(m_compiled_shared_lock);
+
+ auto compiled = m_compiled_shared.find(std::make_pair(address, revision));
+ if (compiled != m_compiled_shared.end()) {
+ compiled->second.second--;
+ }
+}
+
+void PPULLVMRecompiler::RequestCompilation(u32 address) {
+ {
+ std::lock_guard lock(m_uncompiled_shared_lock);
+ m_uncompiled_shared.push_back(address);
+ }
+
+ if (!IsAlive()) {
+ Start();
+ }
+
+ Notify();
+}
+
+u32 PPULLVMRecompiler::GetCurrentRevision() {
+ return m_revision.load(std::memory_order_relaxed);
+}
+
+void PPULLVMRecompiler::Task() {
+ auto start = std::chrono::high_resolution_clock::now();
+
+ while (!TestDestroy() && !Emu.IsStopped()) {
+ // Wait a few ms for something to happen
+ auto idling_start = std::chrono::high_resolution_clock::now();
+ WaitForAnySignal(250);
+ auto idling_end = std::chrono::high_resolution_clock::now();
+ m_idling_time += std::chrono::duration_cast(idling_end - idling_start);
+
+ // Update the set of blocks that have been hit with the set of blocks that have been requested for compilation.
+ {
+ std::lock_guard lock(m_uncompiled_shared_lock);
+ for (auto i = m_uncompiled_shared.begin(); i != m_uncompiled_shared.end(); i++) {
+ m_hit_blocks.insert(*i);
+ }
+ }
+
+ u32 num_compiled = 0;
+ while (!TestDestroy() && !Emu.IsStopped()) {
+ u32 address;
+
+ {
+ std::lock_guard lock(m_uncompiled_shared_lock);
+
+ auto i = m_uncompiled_shared.begin();
+ if (i != m_uncompiled_shared.end()) {
+ address = *i;
+ m_uncompiled_shared.erase(i);
+ } else {
+ break;
+ }
+ }
+
+ m_hit_blocks.insert(address);
+ if (NeedsCompiling(address)) {
+ Compile(address);
+ num_compiled++;
+ }
+ }
+
+ if (num_compiled == 0) {
+ // If we get here, it means the recompilation thread is idling.
+ // We use this oppurtunity to optimize the code.
+ RemoveUnusedOldVersions();
+ for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) {
+ if (NeedsCompiling(i->first.first)) {
+ Compile(i->first.first);
+ num_compiled++;
+ }
+ }
+ }
+ }
+
+ std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
+ m_total_time = std::chrono::duration_cast(end - start);
+
+ std::string error;
+ raw_fd_ostream log_file("PPULLVMRecompiler.log", error, sys::fs::F_Text);
+ log_file << "Total time = " << m_total_time.count() / 1000000 << "ms\n";
+ log_file << " Time spent compiling = " << m_compilation_time.count() / 1000000 << "ms\n";
+ log_file << " Time spent building IR = " << m_ir_build_time.count() / 1000000 << "ms\n";
+ log_file << " Time spent optimizing = " << m_optimizing_time.count() / 1000000 << "ms\n";
+ log_file << " Time spent translating = " << m_translation_time.count() / 1000000 << "ms\n";
+ log_file << " Time spent idling = " << m_idling_time.count() / 1000000 << "ms\n";
+ log_file << " Time spent doing misc tasks = " << (m_total_time.count() - m_idling_time.count() - m_compilation_time.count()) / 1000000 << "ms\n";
+ log_file << "Revision = " << m_revision << "\n";
+ log_file << "\nInterpreter fallback stats:\n";
+ for (auto i = m_interpreter_fallback_stats.begin(); i != m_interpreter_fallback_stats.end(); i++) {
+ log_file << i->first << " = " << i->second << "\n";
+ }
+
+ log_file << "\nDisassembly:\n";
+ //auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr);
+ for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) {
+ log_file << fmt::Format("%s: Size = %u bytes, Number of instructions = %u\n", i->second.llvm_function->getName().str().c_str(), i->second.size, i->second.num_instructions);
+
+ //uint8_t * fn_ptr = (uint8_t *)i->second.executable;
+ //for (size_t pc = 0; pc < i->second.size;) {
+ // char str[1024];
+
+ // auto size = LLVMDisasmInstruction(disassembler, fn_ptr + pc, i->second.size - pc, (uint64_t)(fn_ptr + pc), str, sizeof(str));
+ // log_file << str << '\n';
+ // pc += size;
+ //}
+ }
+
+ //LLVMDisasmDispose(disassembler);
+
+ //log_file << "\nLLVM IR:\n" << *m_module;
+
+ LOG_NOTICE(PPU, "PPU LLVM compiler thread exiting.");
+}
+
+void PPULLVMRecompiler::Decode(const u32 code) {
+ (*PPU_instr::main_list)(this, code);
+}
+
+void PPULLVMRecompiler::NULL_OP() {
+ InterpreterCall("NULL_OP", &PPUInterpreter::NULL_OP);
+}
+
+void PPULLVMRecompiler::NOP() {
+ InterpreterCall("NOP", &PPUInterpreter::NOP);
+}
+
+void PPULLVMRecompiler::TDI(u32 to, u32 ra, s32 simm16) {
+ InterpreterCall("TDI", &PPUInterpreter::TDI, to, ra, simm16);
+}
+
+void PPULLVMRecompiler::TWI(u32 to, u32 ra, s32 simm16) {
+ InterpreterCall("TWI", &PPUInterpreter::TWI, to, ra, simm16);
+}
+
+void PPULLVMRecompiler::MFVSCR(u32 vd) {
+ auto vscr_i32 = GetVscr();
+ auto vscr_i128 = m_ir_builder->CreateZExt(vscr_i32, m_ir_builder->getIntNTy(128));
+ SetVr(vd, vscr_i128);
+}
+
+void PPULLVMRecompiler::MTVSCR(u32 vb) {
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto vscr_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(0));
+ vscr_i32 = m_ir_builder->CreateAnd(vscr_i32, 0x00010001);
+ SetVscr(vscr_i32);
+}
+
+void PPULLVMRecompiler::VADDCUW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+
+ va_v4i32 = m_ir_builder->CreateNot(va_v4i32);
+ auto cmpv4i1 = m_ir_builder->CreateICmpULT(va_v4i32, vb_v4i32);
+ auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, cmpv4i32);
+}
+
+void PPULLVMRecompiler::VADDFP(u32 vd, u32 va, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto sum_v4f32 = m_ir_builder->CreateFAdd(va_v4f32, vb_v4f32);
+ SetVr(vd, sum_v4f32);
+}
+
+void PPULLVMRecompiler::VADDSBS(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_b), va_v16i8, vb_v16i8);
+ SetVr(vd, sum_v16i8);
+
+ // TODO: Set VSCR.SAT
+}
+
+void PPULLVMRecompiler::VADDSHS(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_w), va_v8i16, vb_v8i16);
+ SetVr(vd, sum_v8i16);
+
+ // TODO: Set VSCR.SAT
+}
+
+void PPULLVMRecompiler::VADDSWS(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+
+ // It looks like x86 does not have an instruction to add 32 bit intergers with signed/unsigned saturation.
+ // To implement add with saturation, we first determine what the result would be if the operation were to cause
+ // an overflow. If two -ve numbers are being added and cause an overflow, the result would be 0x80000000.
+ // If two +ve numbers are being added and cause an overflow, the result would be 0x7FFFFFFF. Addition of a -ve
+ // number and a +ve number cannot cause overflow. So the result in case of an overflow is 0x7FFFFFFF + sign bit
+ // of any one of the operands.
+ auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31);
+ tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF)));
+ auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+
+ // Next, we find if the addition can actually result in an overflow. Since an overflow can only happen if the operands
+ // have the same sign, we bitwise XOR both the operands. If the sign bit of the result is 0 then the operands have the
+ // same sign and so may cause an overflow. We invert the result so that the sign bit is 1 when the operands have the
+ // same sign.
+ auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32);
+ tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32);
+
+ // Perform the sum.
+ auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32);
+ auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+
+ // If an overflow occurs, then the sign of the sum will be different from the sign of the operands. So, we xor the
+ // result with one of the operands. The sign bit of the result will be 1 if the sign bit of the sum and the sign bit of the
+ // result is different. This result is again ANDed with tmp3 (the sign bit of tmp3 is 1 only if the operands have the same
+ // sign and so can cause an overflow).
+ auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, sum_v4i32);
+ tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32);
+ tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31);
+ auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+
+ // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise.
+ auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8);
+ SetVr(vd, res_v16i8);
+
+ // TODO: Set SAT
+}
+
+void PPULLVMRecompiler::VADDUBM(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto sum_v16i8 = m_ir_builder->CreateAdd(va_v16i8, vb_v16i8);
+ SetVr(vd, sum_v16i8);
+}
+
+void PPULLVMRecompiler::VADDUBS(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_b), va_v16i8, vb_v16i8);
+ SetVr(vd, sum_v16i8);
+
+ // TODO: Set SAT
+}
+
+void PPULLVMRecompiler::VADDUHM(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto sum_v8i16 = m_ir_builder->CreateAdd(va_v8i16, vb_v8i16);
+ SetVr(vd, sum_v8i16);
+}
+
+void PPULLVMRecompiler::VADDUHS(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_w), va_v8i16, vb_v8i16);
+ SetVr(vd, sum_v8i16);
+
+ // TODO: Set SAT
+}
+
+void PPULLVMRecompiler::VADDUWM(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32);
+ SetVr(vd, sum_v4i32);
+}
+
+void PPULLVMRecompiler::VADDUWS(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32);
+ auto cmp_v4i1 = m_ir_builder->CreateICmpULT(sum_v4i32, va_v4i32);
+ auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ auto res_v4i32 = m_ir_builder->CreateOr(sum_v4i32, cmp_v4i32);
+ SetVr(vd, res_v4i32);
+
+ // TODO: Set SAT
+}
+
+void PPULLVMRecompiler::VAND(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VANDC(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ vb_v4i32 = m_ir_builder->CreateNot(vb_v4i32);
+ auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VAVGSB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16));
+ auto vb_v16i16 = m_ir_builder->CreateSExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16));
+ auto sum_v16i16 = m_ir_builder->CreateAdd(va_v16i16, vb_v16i16);
+ sum_v16i16 = m_ir_builder->CreateAdd(sum_v16i16, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt16(1)));
+ auto avg_v16i16 = m_ir_builder->CreateAShr(sum_v16i16, 1);
+ auto avg_v16i8 = m_ir_builder->CreateTrunc(avg_v16i16, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+ SetVr(vd, avg_v16i8);
+}
+
+void PPULLVMRecompiler::VAVGSH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
+ auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
+ auto sum_v8i32 = m_ir_builder->CreateAdd(va_v8i32, vb_v8i32);
+ sum_v8i32 = m_ir_builder->CreateAdd(sum_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(1)));
+ auto avg_v8i32 = m_ir_builder->CreateAShr(sum_v8i32, 1);
+ auto avg_v8i16 = m_ir_builder->CreateTrunc(avg_v8i32, VectorType::get(m_ir_builder->getInt16Ty(), 8));
+ SetVr(vd, avg_v8i16);
+}
+
+void PPULLVMRecompiler::VAVGSW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
+ auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
+ auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64);
+ sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1)));
+ auto avg_v4i64 = m_ir_builder->CreateAShr(sum_v4i64, 1);
+ auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, avg_v4i32);
+}
+
+void PPULLVMRecompiler::VAVGUB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto avg_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_b), va_v16i8, vb_v16i8);
+ SetVr(vd, avg_v16i8);
+}
+
+void PPULLVMRecompiler::VAVGUH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto avg_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_w), va_v8i16, vb_v8i16);
+ SetVr(vd, avg_v8i16);
+}
+
+void PPULLVMRecompiler::VAVGUW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto va_v4i64 = m_ir_builder->CreateZExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
+ auto vb_v4i64 = m_ir_builder->CreateZExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4));
+ auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64);
+ sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1)));
+ auto avg_v4i64 = m_ir_builder->CreateLShr(sum_v4i64, 1);
+ auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, avg_v4i32);
+}
+
+void PPULLVMRecompiler::VCFSX(u32 vd, u32 uimm5, u32 vb) {
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto res_v4f32 = m_ir_builder->CreateSIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4));
+
+ if (uimm5) {
+ float scale = (float)((u64)1 << uimm5);
+ res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale)));
+ }
+
+ SetVr(vd, res_v4f32);
+}
+
+void PPULLVMRecompiler::VCFUX(u32 vd, u32 uimm5, u32 vb) {
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto res_v4f32 = m_ir_builder->CreateUIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4));
+
+ if (uimm5) {
+ float scale = (float)((u64)1 << uimm5);
+ res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale)));
+ }
+
+ SetVr(vd, res_v4f32);
+}
+
+void PPULLVMRecompiler::VCMPBFP(u32 vd, u32 va, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto cmp_gt_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32);
+ vb_v4f32 = m_ir_builder->CreateFNeg(vb_v4f32);
+ auto cmp_lt_v4i1 = m_ir_builder->CreateFCmpOLT(va_v4f32, vb_v4f32);
+ auto cmp_gt_v4i32 = m_ir_builder->CreateZExt(cmp_gt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ auto cmp_lt_v4i32 = m_ir_builder->CreateZExt(cmp_lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ cmp_gt_v4i32 = m_ir_builder->CreateShl(cmp_gt_v4i32, 31);
+ cmp_lt_v4i32 = m_ir_builder->CreateShl(cmp_lt_v4i32, 30);
+ auto res_v4i32 = m_ir_builder->CreateOr(cmp_gt_v4i32, cmp_lt_v4i32);
+ SetVr(vd, res_v4i32);
+
+ // TODO: Implement NJ mode
+}
+
+void PPULLVMRecompiler::VCMPBFP_(u32 vd, u32 va, u32 vb) {
+ VCMPBFP(vd, va, vb);
+
+ auto vd_v16i8 = GetVrAsIntVec(vd, 8);
+ u32 mask_v16i32[16] = {3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ vd_v16i8 = m_ir_builder->CreateShuffleVector(vd_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32));
+ auto vd_v4i32 = m_ir_builder->CreateBitCast(vd_v16i8, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ auto vd_mask_i32 = m_ir_builder->CreateExtractElement(vd_v4i32, m_ir_builder->getInt32(0));
+ auto cmp_i1 = m_ir_builder->CreateICmpEQ(vd_mask_i32, m_ir_builder->getInt32(0));
+ SetCrField(6, nullptr, nullptr, cmp_i1, nullptr);
+}
+
+void PPULLVMRecompiler::VCMPEQFP(u32 vd, u32 va, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto cmp_v4i1 = m_ir_builder->CreateFCmpOEQ(va_v4f32, vb_v4f32);
+ auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, cmp_v4i32);
+}
+
+void PPULLVMRecompiler::VCMPEQFP_(u32 vd, u32 va, u32 vb) {
+ VCMPEQFP(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPEQUB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto cmp_v16i1 = m_ir_builder->CreateICmpEQ(va_v16i8, vb_v16i8);
+ auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+ SetVr(vd, cmp_v16i8);
+}
+
+void PPULLVMRecompiler::VCMPEQUB_(u32 vd, u32 va, u32 vb) {
+ VCMPEQUB(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPEQUH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto cmp_v8i1 = m_ir_builder->CreateICmpEQ(va_v8i16, vb_v8i16);
+ auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8));
+ SetVr(vd, cmp_v8i16);
+}
+
+void PPULLVMRecompiler::VCMPEQUH_(u32 vd, u32 va, u32 vb) {
+ VCMPEQUH(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPEQUW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto cmp_v4i1 = m_ir_builder->CreateICmpEQ(va_v4i32, vb_v4i32);
+ auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, cmp_v4i32);
+}
+
+void PPULLVMRecompiler::VCMPEQUW_(u32 vd, u32 va, u32 vb) {
+ VCMPEQUW(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPGEFP(u32 vd, u32 va, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(va_v4f32, vb_v4f32);
+ auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, cmp_v4i32);
+}
+
+void PPULLVMRecompiler::VCMPGEFP_(u32 vd, u32 va, u32 vb) {
+ VCMPGEFP(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPGTFP(u32 vd, u32 va, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto cmp_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32);
+ auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, cmp_v4i32);
+}
+
+void PPULLVMRecompiler::VCMPGTFP_(u32 vd, u32 va, u32 vb) {
+ VCMPGTFP(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPGTSB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto cmp_v16i1 = m_ir_builder->CreateICmpSGT(va_v16i8, vb_v16i8);
+ auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+ SetVr(vd, cmp_v16i8);
+}
+
+void PPULLVMRecompiler::VCMPGTSB_(u32 vd, u32 va, u32 vb) {
+ VCMPGTSB(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPGTSH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto cmp_v8i1 = m_ir_builder->CreateICmpSGT(va_v8i16, vb_v8i16);
+ auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8));
+ SetVr(vd, cmp_v8i16);
+}
+
+void PPULLVMRecompiler::VCMPGTSH_(u32 vd, u32 va, u32 vb) {
+ VCMPGTSH(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPGTSW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto cmp_v4i1 = m_ir_builder->CreateICmpSGT(va_v4i32, vb_v4i32);
+ auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, cmp_v4i32);
+}
+
+void PPULLVMRecompiler::VCMPGTSW_(u32 vd, u32 va, u32 vb) {
+ VCMPGTSW(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPGTUB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto cmp_v16i1 = m_ir_builder->CreateICmpUGT(va_v16i8, vb_v16i8);
+ auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+ SetVr(vd, cmp_v16i8);
+}
+
+void PPULLVMRecompiler::VCMPGTUB_(u32 vd, u32 va, u32 vb) {
+ VCMPGTUB(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPGTUH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto cmp_v8i1 = m_ir_builder->CreateICmpUGT(va_v8i16, vb_v8i16);
+ auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8));
+ SetVr(vd, cmp_v8i16);
+}
+
+void PPULLVMRecompiler::VCMPGTUH_(u32 vd, u32 va, u32 vb) {
+ VCMPGTUH(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCMPGTUW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto cmp_v4i1 = m_ir_builder->CreateICmpUGT(va_v4i32, vb_v4i32);
+ auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, cmp_v4i32);
+}
+
+void PPULLVMRecompiler::VCMPGTUW_(u32 vd, u32 va, u32 vb) {
+ VCMPGTUW(vd, va, vb);
+ SetCr6AfterVectorCompare(vd);
+}
+
+void PPULLVMRecompiler::VCTSXS(u32 vd, u32 uimm5, u32 vb) {
+ InterpreterCall("VCTSXS", &PPUInterpreter::VCTSXS, vd, uimm5, vb);
+}
+
+void PPULLVMRecompiler::VCTUXS(u32 vd, u32 uimm5, u32 vb) {
+ InterpreterCall("VCTUXS", &PPUInterpreter::VCTUXS, vd, uimm5, vb);
+}
+
+void PPULLVMRecompiler::VEXPTEFP(u32 vd, u32 vb) {
+ InterpreterCall("VEXPTEFP", &PPUInterpreter::VEXPTEFP, vd, vb);
+}
+
+void PPULLVMRecompiler::VLOGEFP(u32 vd, u32 vb) {
+ InterpreterCall("VLOGEFP", &PPUInterpreter::VLOGEFP, vd, vb);
+}
+
+void PPULLVMRecompiler::VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto vc_v4f32 = GetVrAsFloatVec(vc);
+ auto res_v4f32 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32);
+ SetVr(vd, res_v4f32);
+}
+
+void PPULLVMRecompiler::VMAXFP(u32 vd, u32 va, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), va_v4f32, vb_v4f32);
+ SetVr(vd, res_v4f32);
+}
+
+void PPULLVMRecompiler::VMAXSB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsb), va_v16i8, vb_v16i8);
+ SetVr(vd, res_v16i8);
+}
+
+void PPULLVMRecompiler::VMAXSH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxs_w), va_v8i16, vb_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VMAXSW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsd), va_v4i32, vb_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VMAXUB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxu_b), va_v16i8, vb_v16i8);
+ SetVr(vd, res_v16i8);
+}
+
+void PPULLVMRecompiler::VMAXUH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxuw), va_v8i16, vb_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VMAXUW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxud), va_v4i32, vb_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) {
+ InterpreterCall("VMHADDSHS", &PPUInterpreter::VMHADDSHS, vd, va, vb, vc);
+}
+
+void PPULLVMRecompiler::VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) {
+ InterpreterCall("VMHRADDSHS", &PPUInterpreter::VMHRADDSHS, vd, va, vb, vc);
+}
+
+void PPULLVMRecompiler::VMINFP(u32 vd, u32 va, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_min_ps), va_v4f32, vb_v4f32);
+ SetVr(vd, res_v4f32);
+}
+
+void PPULLVMRecompiler::VMINSB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsb), va_v16i8, vb_v16i8);
+ SetVr(vd, res_v16i8);
+}
+
+void PPULLVMRecompiler::VMINSH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmins_w), va_v8i16, vb_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VMINSW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsd), va_v4i32, vb_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VMINUB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pminu_b), va_v16i8, vb_v16i8);
+ SetVr(vd, res_v16i8);
+}
+
+void PPULLVMRecompiler::VMINUH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, vb_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VMINUW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, vb_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) {
+ InterpreterCall("VMLADDUHM", &PPUInterpreter::VMLADDUHM, vd, va, vb, vc);
+}
+
+void PPULLVMRecompiler::VMRGHB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ u32 mask_v16i32[16] = {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15};
+ auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32));
+ SetVr(vd, vd_v16i8);
+}
+
+void PPULLVMRecompiler::VMRGHH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ u32 mask_v8i32[8] = {12, 4, 13, 5, 14, 6, 15, 7};
+ auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
+ SetVr(vd, vd_v8i16);
+}
+
+void PPULLVMRecompiler::VMRGHW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ u32 mask_v4i32[4] = {6, 2, 7, 3};
+ auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32));
+ SetVr(vd, vd_v4i32);
+}
+
+void PPULLVMRecompiler::VMRGLB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ u32 mask_v16i32[16] = {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7};
+ auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32));
+ SetVr(vd, vd_v16i8);
+}
+
+void PPULLVMRecompiler::VMRGLH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ u32 mask_v8i32[8] = {8, 0, 9, 1, 10, 2, 11, 3};
+ auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
+ SetVr(vd, vd_v8i16);
+}
+
+void PPULLVMRecompiler::VMRGLW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ u32 mask_v4i32[4] = {4, 0, 5, 1};
+ auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32));
+ SetVr(vd, vd_v4i32);
+}
+
+void PPULLVMRecompiler::VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16));
+ auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16));
+ auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16);
+
+ auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16));
+ u32 mask1_v4i32[4] = {0, 4, 8, 12};
+ auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
+ auto tmp1_v4i32 = m_ir_builder->CreateSExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ u32 mask2_v4i32[4] = {1, 5, 9, 13};
+ auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
+ auto tmp2_v4i32 = m_ir_builder->CreateSExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ u32 mask3_v4i32[4] = {2, 6, 10, 14};
+ auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32));
+ auto tmp3_v4i32 = m_ir_builder->CreateSExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ u32 mask4_v4i32[4] = {3, 7, 11, 15};
+ auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32));
+ auto tmp4_v4i32 = m_ir_builder->CreateSExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+
+ auto vc_v4i32 = GetVrAsIntVec(vc, 32);
+ auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32);
+ res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32);
+ res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32);
+ res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32);
+
+ SetVr(vd, res_v4i32);
+
+ // TODO: Try to optimize with horizontal add
+}
+
+void PPULLVMRecompiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto vc_v4i32 = GetVrAsIntVec(vc, 32);
+ auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16);
+ res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) {
+ InterpreterCall("VMSUMSHS", &PPUInterpreter::VMSUMSHS, vd, va, vb, vc);
+}
+
+void PPULLVMRecompiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto va_v16i16 = m_ir_builder->CreateZExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16));
+ auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16));
+ auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16);
+
+ auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16));
+ u32 mask1_v4i32[4] = {0, 4, 8, 12};
+ auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
+ auto tmp1_v4i32 = m_ir_builder->CreateZExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ u32 mask2_v4i32[4] = {1, 5, 9, 13};
+ auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
+ auto tmp2_v4i32 = m_ir_builder->CreateZExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ u32 mask3_v4i32[4] = {2, 6, 10, 14};
+ auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32));
+ auto tmp3_v4i32 = m_ir_builder->CreateZExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ u32 mask4_v4i32[4] = {3, 7, 11, 15};
+ auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32));
+ auto tmp4_v4i32 = m_ir_builder->CreateZExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+
+ auto vc_v4i32 = GetVrAsIntVec(vc, 32);
+ auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32);
+ res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32);
+ res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32);
+ res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32);
+
+ SetVr(vd, res_v4i32);
+
+ // TODO: Try to optimize with horizontal add
+}
+
+void PPULLVMRecompiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
+ auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
+ auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32);
+
+ auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8));
+ u32 mask1_v4i32[4] = {0, 2, 4, 6};
+ auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
+ u32 mask2_v4i32[4] = {1, 3, 5, 7};
+ auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
+
+ auto vc_v4i32 = GetVrAsIntVec(vc, 32);
+ auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32);
+ res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32);
+
+ SetVr(vd, res_v4i32);
+
+ // TODO: Try to optimize with horizontal add
+}
+
+void PPULLVMRecompiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) {
+ InterpreterCall("VMSUMUHS", &PPUInterpreter::VMSUMUHS, vd, va, vb, vc);
+}
+
+void PPULLVMRecompiler::VMULESB(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VMULESB", &PPUInterpreter::VMULESB, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VMULESH(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VMULESH", &PPUInterpreter::VMULESH, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VMULEUB(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VMULEUB", &PPUInterpreter::VMULEUB, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VMULEUH(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VMULEUH", &PPUInterpreter::VMULEUH, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VMULOSB(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VMULOSB", &PPUInterpreter::VMULOSB, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VMULOSH(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VMULOSH", &PPUInterpreter::VMULOSH, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VMULOUB(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VMULOUB", &PPUInterpreter::VMULOUB, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VMULOUH(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VMULOUH", &PPUInterpreter::VMULOUH, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto vc_v4f32 = GetVrAsFloatVec(vc);
+ vc_v4f32 = m_ir_builder->CreateFNeg(vc_v4f32);
+ auto res_v4f32 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32);
+ SetVr(vd, res_v4f32);
+}
+
+void PPULLVMRecompiler::VNOR(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16);
+ res_v8i16 = m_ir_builder->CreateNot(res_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VOR(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto vc_v16i8 = GetVrAsIntVec(vc, 8);
+
+ auto thrity_one_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(31));
+ vc_v16i8 = m_ir_builder->CreateAnd(vc_v16i8, thrity_one_v16i8);
+
+ auto fifteen_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(15));
+ auto vc_le15_v16i8 = m_ir_builder->CreateSub(fifteen_v16i8, vc_v16i8);
+ auto res_va_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), va_v16i8, vc_le15_v16i8);
+
+ auto vc_gt15_v16i8 = m_ir_builder->CreateSub(thrity_one_v16i8, vc_v16i8);
+ auto cmp_i1 = m_ir_builder->CreateICmpUGT(vc_gt15_v16i8, fifteen_v16i8);
+ auto cmp_i8 = m_ir_builder->CreateSExt(cmp_i1, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+ vc_gt15_v16i8 = m_ir_builder->CreateOr(cmp_i8, vc_gt15_v16i8);
+ auto res_vb_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), vb_v16i8, vc_gt15_v16i8);
+
+ auto res_v16i8 = m_ir_builder->CreateOr(res_vb_v16i8, res_va_v16i8);
+ SetVr(vd, res_v16i8);
+}
+
+void PPULLVMRecompiler::VPKPX(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VPKPX", &PPUInterpreter::VPKPX, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VPKSHSS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VPKSHSS", &PPUInterpreter::VPKSHSS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VPKSHUS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VPKSHUS", &PPUInterpreter::VPKSHUS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VPKSWSS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VPKSWSS", &PPUInterpreter::VPKSWSS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VPKSWUS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VPKSWUS", &PPUInterpreter::VPKSWUS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VPKUHUM(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VPKUHUM", &PPUInterpreter::VPKUHUM, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VPKUHUS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VPKUHUS", &PPUInterpreter::VPKUHUS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VPKUWUM(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VPKUWUM", &PPUInterpreter::VPKUWUM, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VPKUWUS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VPKUWUS", &PPUInterpreter::VPKUWUS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VREFP(u32 vd, u32 vb) {
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), vb_v4f32);
+ SetVr(vd, res_v4f32);
+}
+
+void PPULLVMRecompiler::VRFIM(u32 vd, u32 vb) {
+ InterpreterCall("VRFIM", &PPUInterpreter::VRFIM, vd, vb);
+}
+
+void PPULLVMRecompiler::VRFIN(u32 vd, u32 vb) {
+ InterpreterCall("VRFIN", &PPUInterpreter::VRFIN, vd, vb);
+}
+
+void PPULLVMRecompiler::VRFIP(u32 vd, u32 vb) {
+ InterpreterCall("VRFIP", &PPUInterpreter::VRFIP, vd, vb);
+}
+
+void PPULLVMRecompiler::VRFIZ(u32 vd, u32 vb) {
+ InterpreterCall("VRFIZ", &PPUInterpreter::VRFIZ, vd, vb);
+}
+
+void PPULLVMRecompiler::VRLB(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VRLB", &PPUInterpreter::VRLB, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VRLH(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VRLH", &PPUInterpreter::VRLH, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VRLW(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VRLW", &PPUInterpreter::VRLW, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VRSQRTEFP(u32 vd, u32 vb) {
+ InterpreterCall("VRSQRTEFP", &PPUInterpreter::VRSQRTEFP, vd, vb);
+}
+
+void PPULLVMRecompiler::VSEL(u32 vd, u32 va, u32 vb, u32 vc) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto vc_v4i32 = GetVrAsIntVec(vc, 32);
+ vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, vc_v4i32);
+ vc_v4i32 = m_ir_builder->CreateNot(vc_v4i32);
+ va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vc_v4i32);
+ auto vd_v4i32 = m_ir_builder->CreateOr(va_v4i32, vb_v4i32);
+ SetVr(vd, vd_v4i32);
+}
+
+void PPULLVMRecompiler::VSL(u32 vd, u32 va, u32 vb) {
+ auto va_i128 = GetVr(va);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0));
+ sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7);
+ auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128));
+ va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128);
+ SetVr(vd, va_i128);
+}
+
+void PPULLVMRecompiler::VSLB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7)));
+ auto res_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8);
+ SetVr(vd, res_v16i8);
+}
+
+void PPULLVMRecompiler::VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ sh = 16 - sh;
+ u32 mask_v16i32[16] = {sh, sh + 1, sh + 2, sh + 3, sh + 4, sh + 5, sh + 6, sh + 7, sh + 8, sh + 9, sh + 10, sh + 11, sh + 12, sh + 13, sh + 14, sh + 15};
+ auto vd_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32));
+ SetVr(vd, vd_v16i8);
+}
+
+void PPULLVMRecompiler::VSLH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF)));
+ auto res_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VSLO(u32 vd, u32 va, u32 vb) {
+ auto va_i128 = GetVr(va);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0));
+ sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78);
+ auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128));
+ va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128);
+ SetVr(vd, va_i128);
+}
+
+void PPULLVMRecompiler::VSLW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F)));
+ auto res_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VSPLTB(u32 vd, u32 uimm5, u32 vb) {
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto undef_v16i8 = UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16));
+ auto mask_v16i32 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt32(15 - uimm5));
+ auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, undef_v16i8, mask_v16i32);
+ SetVr(vd, res_v16i8);
+}
+
+void PPULLVMRecompiler::VSPLTH(u32 vd, u32 uimm5, u32 vb) {
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto undef_v8i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8));
+ auto mask_v8i32 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(7 - uimm5));
+ auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, undef_v8i16, mask_v8i32);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VSPLTISB(u32 vd, s32 simm5) {
+ auto vd_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8((s8)simm5));
+ SetVr(vd, vd_v16i8);
+}
+
+void PPULLVMRecompiler::VSPLTISH(u32 vd, s32 simm5) {
+ auto vd_v8i16 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16((s16)simm5));
+ SetVr(vd, vd_v8i16);
+}
+
+void PPULLVMRecompiler::VSPLTISW(u32 vd, s32 simm5) {
+ auto vd_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32((s32)simm5));
+ SetVr(vd, vd_v4i32);
+}
+
+void PPULLVMRecompiler::VSPLTW(u32 vd, u32 uimm5, u32 vb) {
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto undef_v4i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ auto mask_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3 - uimm5));
+ auto res_v4i32 = m_ir_builder->CreateShuffleVector(vb_v4i32, undef_v4i32, mask_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VSR(u32 vd, u32 va, u32 vb) {
+ auto va_i128 = GetVr(va);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0));
+ sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7);
+ auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128));
+ va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128);
+ SetVr(vd, va_i128);
+}
+
+void PPULLVMRecompiler::VSRAB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7)));
+ auto res_v16i8 = m_ir_builder->CreateAShr(va_v16i8, vb_v16i8);
+ SetVr(vd, res_v16i8);
+}
+
+void PPULLVMRecompiler::VSRAH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF)));
+ auto res_v8i16 = m_ir_builder->CreateAShr(va_v8i16, vb_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VSRAW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F)));
+ auto res_v4i32 = m_ir_builder->CreateAShr(va_v4i32, vb_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VSRB(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7)));
+ auto res_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8);
+ SetVr(vd, res_v16i8);
+}
+
+void PPULLVMRecompiler::VSRH(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF)));
+ auto res_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::VSRO(u32 vd, u32 va, u32 vb) {
+ auto va_i128 = GetVr(va);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0));
+ sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78);
+ auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128));
+ va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128);
+ SetVr(vd, va_i128);
+}
+
+void PPULLVMRecompiler::VSRW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F)));
+ auto res_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32);
+ SetVr(vd, res_v4i32);
+}
+
+void PPULLVMRecompiler::VSUBCUW(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+
+ auto cmpv4i1 = m_ir_builder->CreateICmpUGE(va_v4i32, vb_v4i32);
+ auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ SetVr(vd, cmpv4i32);
+}
+
+void PPULLVMRecompiler::VSUBFP(u32 vd, u32 va, u32 vb) {
+ auto va_v4f32 = GetVrAsFloatVec(va);
+ auto vb_v4f32 = GetVrAsFloatVec(vb);
+ auto diff_v4f32 = m_ir_builder->CreateFSub(va_v4f32, vb_v4f32);
+ SetVr(vd, diff_v4f32);
+}
+
+void PPULLVMRecompiler::VSUBSBS(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_b), va_v16i8, vb_v16i8);
+ SetVr(vd, diff_v16i8);
+
+ // TODO: Set VSCR.SAT
+}
+
+void PPULLVMRecompiler::VSUBSHS(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_w), va_v8i16, vb_v8i16);
+ SetVr(vd, diff_v8i16);
+
+ // TODO: Set VSCR.SAT
+}
+
+void PPULLVMRecompiler::VSUBSWS(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+
+ // See the comments for VADDSWS for a detailed description of how this works
+
+ // Find the result in case of an overflow
+ auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31);
+ tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF)));
+ auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+
+ // Find the elements that can overflow (elements with opposite sign bits)
+ auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32);
+
+ // Perform the sub
+ auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32);
+ auto diff_v16i8 = m_ir_builder->CreateBitCast(diff_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+
+ // Find the elements that overflowed
+ auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, diff_v4i32);
+ tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32);
+ tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31);
+ auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
+
+ // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise.
+ auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), diff_v16i8, tmp1_v16i8, tmp3_v16i8);
+ SetVr(vd, res_v16i8);
+
+ // TODO: Set SAT
+}
+
+void PPULLVMRecompiler::VSUBUBM(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto diff_v16i8 = m_ir_builder->CreateSub(va_v16i8, vb_v16i8);
+ SetVr(vd, diff_v16i8);
+}
+
+void PPULLVMRecompiler::VSUBUBS(u32 vd, u32 va, u32 vb) {
+ auto va_v16i8 = GetVrAsIntVec(va, 8);
+ auto vb_v16i8 = GetVrAsIntVec(vb, 8);
+ auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_b), va_v16i8, vb_v16i8);
+ SetVr(vd, diff_v16i8);
+
+ // TODO: Set SAT
+}
+
+void PPULLVMRecompiler::VSUBUHM(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto diff_v8i16 = m_ir_builder->CreateSub(va_v8i16, vb_v8i16);
+ SetVr(vd, diff_v8i16);
+}
+
+void PPULLVMRecompiler::VSUBUHS(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_w), va_v8i16, vb_v8i16);
+ SetVr(vd, diff_v8i16);
+
+ // TODO: Set SAT
+}
+
+void PPULLVMRecompiler::VSUBUWM(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32);
+ SetVr(vd, diff_v4i32);
+}
+
+void PPULLVMRecompiler::VSUBUWS(u32 vd, u32 va, u32 vb) {
+ auto va_v4i32 = GetVrAsIntVec(va, 32);
+ auto vb_v4i32 = GetVrAsIntVec(vb, 32);
+ auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32);
+ auto cmp_v4i1 = m_ir_builder->CreateICmpULE(diff_v4i32, va_v4i32);
+ auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
+ auto res_v4i32 = m_ir_builder->CreateAnd(diff_v4i32, cmp_v4i32);
+ SetVr(vd, res_v4i32);
+
+ // TODO: Set SAT
+}
+
+void PPULLVMRecompiler::VSUMSWS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VSUMSWS", &PPUInterpreter::VSUMSWS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VSUM2SWS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VSUM2SWS", &PPUInterpreter::VSUM2SWS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VSUM4SBS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VSUM4SBS", &PPUInterpreter::VSUM4SBS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VSUM4SHS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VSUM4SHS", &PPUInterpreter::VSUM4SHS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VSUM4UBS(u32 vd, u32 va, u32 vb) {
+ InterpreterCall("VSUM4UBS", &PPUInterpreter::VSUM4UBS, vd, va, vb);
+}
+
+void PPULLVMRecompiler::VUPKHPX(u32 vd, u32 vb) {
+ InterpreterCall("VUPKHPX", &PPUInterpreter::VUPKHPX, vd, vb);
+}
+
+void PPULLVMRecompiler::VUPKHSB(u32 vd, u32 vb) {
+ InterpreterCall("VUPKHSB", &PPUInterpreter::VUPKHSB, vd, vb);
+}
+
+void PPULLVMRecompiler::VUPKHSH(u32 vd, u32 vb) {
+ InterpreterCall("VUPKHSH", &PPUInterpreter::VUPKHSH, vd, vb);
+}
+
+void PPULLVMRecompiler::VUPKLPX(u32 vd, u32 vb) {
+ InterpreterCall("VUPKLPX", &PPUInterpreter::VUPKLPX, vd, vb);
+}
+
+void PPULLVMRecompiler::VUPKLSB(u32 vd, u32 vb) {
+ InterpreterCall("VUPKLSB", &PPUInterpreter::VUPKLSB, vd, vb);
+}
+
+void PPULLVMRecompiler::VUPKLSH(u32 vd, u32 vb) {
+ InterpreterCall("VUPKLSH", &PPUInterpreter::VUPKLSH, vd, vb);
+}
+
+void PPULLVMRecompiler::VXOR(u32 vd, u32 va, u32 vb) {
+ auto va_v8i16 = GetVrAsIntVec(va, 16);
+ auto vb_v8i16 = GetVrAsIntVec(vb, 16);
+ auto res_v8i16 = m_ir_builder->CreateXor(va_v8i16, vb_v8i16);
+ SetVr(vd, res_v8i16);
+}
+
+void PPULLVMRecompiler::MULLI(u32 rd, u32 ra, s32 simm16) {
+ auto ra_i64 = GetGpr(ra);
+ auto res_i64 = m_ir_builder->CreateMul(ra_i64, m_ir_builder->getInt64((s64)simm16));
+ SetGpr(rd, res_i64);
+ //InterpreterCall("MULLI", &PPUInterpreter::MULLI, rd, ra, simm16);
+}
+
+void PPULLVMRecompiler::SUBFIC(u32 rd, u32 ra, s32 simm16) {
+ auto ra_i64 = GetGpr(ra);
+ ra_i64 = m_ir_builder->CreateNeg(ra_i64);
+ auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, m_ir_builder->getInt64((s64)simm16));
+ auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, {0});
+ auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1});
+ SetGpr(rd, diff_i64);
+ SetXerCa(carry_i1);
+ //InterpreterCall("SUBFIC", &PPUInterpreter::SUBFIC, rd, ra, simm16);
+}
+
+void PPULLVMRecompiler::CMPLI(u32 crfd, u32 l, u32 ra, u32 uimm16) {
+ Value * ra_i64;
+ if (l == 0) {
+ ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty());
+ } else {
+ ra_i64 = GetGpr(ra);
+ }
+
+ SetCrFieldUnsignedCmp(crfd, ra_i64, m_ir_builder->getInt64(uimm16));
+ //InterpreterCall("CMPLI", &PPUInterpreter::CMPLI, crfd, l, ra, uimm16);
+}
+
+void PPULLVMRecompiler::CMPI(u32 crfd, u32 l, u32 ra, s32 simm16) {
+ Value * ra_i64;
+ if (l == 0) {
+ ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty());
+ } else {
+ ra_i64 = GetGpr(ra);
+ }
+
+ SetCrFieldSignedCmp(crfd, ra_i64, m_ir_builder->getInt64((s64)simm16));
+ //InterpreterCall("CMPI", &PPUInterpreter::CMPI, crfd, l, ra, simm16);
+}
+
+void PPULLVMRecompiler::ADDIC(u32 rd, u32 ra, s32 simm16) {
+ auto ra_i64 = GetGpr(ra);
+ auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), m_ir_builder->getInt64((s64)simm16), ra_i64);
+ auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0});
+ auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1});
+ SetGpr(rd, sum_i64);
+ SetXerCa(carry_i1);
+ //InterpreterCall("ADDIC", &PPUInterpreter::ADDIC, rd, ra, simm16);
+}
+
+void PPULLVMRecompiler::ADDIC_(u32 rd, u32 ra, s32 simm16) {
+ ADDIC(rd, ra, simm16);
+ SetCrFieldSignedCmp(0, GetGpr(rd), m_ir_builder->getInt64(0));
+ //InterpreterCall("ADDIC_", &PPUInterpreter::ADDIC_, rd, ra, simm16);
+}
+
+void PPULLVMRecompiler::ADDI(u32 rd, u32 ra, s32 simm16) {
+ if (ra == 0) {
+ SetGpr(rd, m_ir_builder->getInt64((s64)simm16));
+ } else {
+ auto ra_i64 = GetGpr(ra);
+ auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16));
+ SetGpr(rd, sum_i64);
+ }
+ //InterpreterCall("ADDI", &PPUInterpreter::ADDI, rd, ra, simm16);
+}
+
+void PPULLVMRecompiler::ADDIS(u32 rd, u32 ra, s32 simm16) {
+ if (ra == 0) {
+ SetGpr(rd, m_ir_builder->getInt64((s64)simm16 << 16));
+ } else {
+ auto ra_i64 = GetGpr(ra);
+ auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16 << 16));
+ SetGpr(rd, sum_i64);
+ }
+ //InterpreterCall("ADDIS", &PPUInterpreter::ADDIS, rd, ra, simm16);
+}
+
+void PPULLVMRecompiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) {
+ auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_current_instruction_address, bd));
+ CreateBranch(CheckBranchCondition(bo, bi), target_i64, lk ? true : false);
+ //m_hit_branch_instruction = true;
+ //SetPc(m_ir_builder->getInt32(m_current_instruction_address));
+ //InterpreterCall("BC", &PPUInterpreter::BC, bo, bi, bd, aa, lk);
+ //SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4));
+ //m_ir_builder->CreateRetVoid();
+}
+
+void PPULLVMRecompiler::SC(u32 sc_code) {
+ InterpreterCall("SC", &PPUInterpreter::SC, sc_code);
+}
+
+void PPULLVMRecompiler::B(s32 ll, u32 aa, u32 lk) {
+ auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_current_instruction_address, ll));
+ CreateBranch(nullptr, target_i64, lk ? true : false);
+ //m_hit_branch_instruction = true;
+ //SetPc(m_ir_builder->getInt32(m_current_instruction_address));
+ //InterpreterCall("B", &PPUInterpreter::B, ll, aa, lk);
+ //m_ir_builder->CreateRetVoid();
+}
+
+void PPULLVMRecompiler::MCRF(u32 crfd, u32 crfs) {
+ if (crfd != crfs) {
+ auto cr_i32 = GetCr();
+ auto crf_i32 = GetNibble(cr_i32, crfs);
+ cr_i32 = SetNibble(cr_i32, crfd, crf_i32);
+ SetCr(cr_i32);
+ }
+ //InterpreterCall("MCRF", &PPUInterpreter::MCRF, crfd, crfs);
+}
+
+void PPULLVMRecompiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) {
+ auto lr_i64 = GetLr();
+ lr_i64 = m_ir_builder->CreateAnd(lr_i64, ~0x3ULL);
+ CreateBranch(CheckBranchCondition(bo, bi), lr_i64, lk ? true : false);
+ //m_hit_branch_instruction = true;
+ //SetPc(m_ir_builder->getInt32(m_current_instruction_address));
+ //InterpreterCall("BCLR", &PPUInterpreter::BCLR, bo, bi, bh, lk);
+ //SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4));
+ //m_ir_builder->CreateRetVoid();
+}
+
+void PPULLVMRecompiler::CRNOR(u32 crbd, u32 crba, u32 crbb) {
+ auto cr_i32 = GetCr();
+ auto ba_i32 = GetBit(cr_i32, crba);
+ auto bb_i32 = GetBit(cr_i32, crbb);
+ auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32);
+ res_i32 = m_ir_builder->CreateXor(res_i32, 1);
+ cr_i32 = SetBit(cr_i32, crbd, res_i32);
+ SetCr(cr_i32);
+ //InterpreterCall("CRNOR", &PPUInterpreter::CRNOR, crbd, crba, crbb);
+}
+
+void PPULLVMRecompiler::CRANDC(u32 crbd, u32 crba, u32 crbb) {
+ auto cr_i32 = GetCr();
+ auto ba_i32 = GetBit(cr_i32, crba);
+ auto bb_i32 = GetBit(cr_i32, crbb);
+ auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1);
+ res_i32 = m_ir_builder->CreateAnd(ba_i32, res_i32);
+ cr_i32 = SetBit(cr_i32, crbd, res_i32);
+ SetCr(cr_i32);
+ //InterpreterCall("CRANDC", &PPUInterpreter::CRANDC, crbd, crba, crbb);
+}
+
+void PPULLVMRecompiler::ISYNC() {
+ m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence));
+ //InterpreterCall("ISYNC", &PPUInterpreter::ISYNC);
+}
+
+void PPULLVMRecompiler::CRXOR(u32 crbd, u32 crba, u32 crbb) {
+ auto cr_i32 = GetCr();
+ auto ba_i32 = GetBit(cr_i32, crba);
+ auto bb_i32 = GetBit(cr_i32, crbb);
+ auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32);
+ cr_i32 = SetBit(cr_i32, crbd, res_i32);
+ SetCr(cr_i32);
+ //InterpreterCall("CRXOR", &PPUInterpreter::CRXOR, crbd, crba, crbb);
+}
+
+void PPULLVMRecompiler::CRNAND(u32 crbd, u32 crba, u32 crbb) {
+ auto cr_i32 = GetCr();
+ auto ba_i32 = GetBit(cr_i32, crba);
+ auto bb_i32 = GetBit(cr_i32, crbb);
+ auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32);
+ res_i32 = m_ir_builder->CreateXor(res_i32, 1);
+ cr_i32 = SetBit(cr_i32, crbd, res_i32);
+ SetCr(cr_i32);
+ //InterpreterCall("CRNAND", &PPUInterpreter::CRNAND, crbd, crba, crbb);
+}
+
+void PPULLVMRecompiler::CRAND(u32 crbd, u32 crba, u32 crbb) {
+ auto cr_i32 = GetCr();
+ auto ba_i32 = GetBit(cr_i32, crba);
+ auto bb_i32 = GetBit(cr_i32, crbb);
+ auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32);
+ cr_i32 = SetBit(cr_i32, crbd, res_i32);
+ SetCr(cr_i32);
+ //InterpreterCall("CRAND", &PPUInterpreter::CRAND, crbd, crba, crbb);
+}
+
+void PPULLVMRecompiler::CREQV(u32 crbd, u32 crba, u32 crbb) {
+ auto cr_i32 = GetCr();
+ auto ba_i32 = GetBit(cr_i32, crba);
+ auto bb_i32 = GetBit(cr_i32, crbb);
+ auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32);
+ res_i32 = m_ir_builder->CreateXor(res_i32, 1);
+ cr_i32 = SetBit(cr_i32, crbd, res_i32);
+ SetCr(cr_i32);
+ //InterpreterCall("CREQV", &PPUInterpreter::CREQV, crbd, crba, crbb);
+}
+
+void PPULLVMRecompiler::CRORC(u32 crbd, u32 crba, u32 crbb) {
+ auto cr_i32 = GetCr();
+ auto ba_i32 = GetBit(cr_i32, crba);
+ auto bb_i32 = GetBit(cr_i32, crbb);
+ auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1);
+ res_i32 = m_ir_builder->CreateOr(ba_i32, res_i32);
+ cr_i32 = SetBit(cr_i32, crbd, res_i32);
+ SetCr(cr_i32);
+ //InterpreterCall("CRORC", &PPUInterpreter::CRORC, crbd, crba, crbb);
+}
+
+void PPULLVMRecompiler::CROR(u32 crbd, u32 crba, u32 crbb) {
+ auto cr_i32 = GetCr();
+ auto ba_i32 = GetBit(cr_i32, crba);
+ auto bb_i32 = GetBit(cr_i32, crbb);
+ auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32);
+ cr_i32 = SetBit(cr_i32, crbd, res_i32);
+ SetCr(cr_i32);
+ //InterpreterCall("CROR", &PPUInterpreter::CROR, crbd, crba, crbb);
+}
+
+void PPULLVMRecompiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) {
+ auto ctr_i64 = GetCtr();
+ ctr_i64 = m_ir_builder->CreateAnd(ctr_i64, ~0x3ULL);
+ CreateBranch(CheckBranchCondition(bo, bi), ctr_i64, lk ? true : false);
+ //m_hit_branch_instruction = true;
+ //SetPc(m_ir_builder->getInt32(m_current_instruction_address));
+ //InterpreterCall("BCCTR", &PPUInterpreter::BCCTR, bo, bi, bh, lk);
+ //SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4));
+ //m_ir_builder->CreateRetVoid();
+}
+
+void PPULLVMRecompiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty());
+ auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32);
+ rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64);
+ auto ra_i64 = GetGpr(ra);
+ auto res_i64 = rs_i64;
+ if (sh) {
+ auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh);
+ auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh);
+ res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64);
+ }
+
+ u64 mask = s_rotate_mask[32 + mb][32 + me];
+ res_i64 = m_ir_builder->CreateAnd(res_i64, mask);
+ ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask);
+ res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("RLWIMI", &PPUInterpreter::RLWIMI, ra, rs, sh, mb, me, rc);
+}
+
+void PPULLVMRecompiler::RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty());
+ auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32);
+ rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64);
+ auto res_i64 = rs_i64;
+ if (sh) {
+ auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh);
+ auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh);
+ res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64);
+ }
+
+ res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("RLWINM", &PPUInterpreter::RLWINM, ra, rs, sh, mb, me, rc);
+}
+
+void PPULLVMRecompiler::RLWNM(u32 ra, u32 rs, u32 rb, u32 mb, u32 me, bool rc) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty());
+ auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32);
+ rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64);
+ auto rb_i64 = GetGpr(rb);
+ auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x1F);
+ auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(32), shl_i64);
+ auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64);
+ auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64);
+ auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64);
+ res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("RLWNM", &PPUInterpreter::RLWNM, ra, rs, rb, mb, me, rc);
+}
+
+void PPULLVMRecompiler::ORI(u32 ra, u32 rs, u32 uimm16) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = m_ir_builder->CreateOr(rs_i64, uimm16);
+ SetGpr(ra, res_i64);
+ //InterpreterCall("ORI", &PPUInterpreter::ORI, ra, rs, uimm16);
+}
+
+void PPULLVMRecompiler::ORIS(u32 ra, u32 rs, u32 uimm16) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = m_ir_builder->CreateOr(rs_i64, (u64)uimm16 << 16);
+ SetGpr(ra, res_i64);
+ //InterpreterCall("ORIS", &PPUInterpreter::ORIS, ra, rs, uimm16);
+}
+
+void PPULLVMRecompiler::XORI(u32 ra, u32 rs, u32 uimm16) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = m_ir_builder->CreateXor(rs_i64, uimm16);
+ SetGpr(ra, res_i64);
+ //InterpreterCall("XORI", &PPUInterpreter::XORI, ra, rs, uimm16);
+}
+
+void PPULLVMRecompiler::XORIS(u32 ra, u32 rs, u32 uimm16) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = m_ir_builder->CreateXor(rs_i64, (u64)uimm16 << 16);
+ SetGpr(ra, res_i64);
+ //InterpreterCall("XORIS", &PPUInterpreter::XORIS, ra, rs, uimm16);
+}
+
+void PPULLVMRecompiler::ANDI_(u32 ra, u32 rs, u32 uimm16) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = m_ir_builder->CreateAnd(rs_i64, uimm16);
+ SetGpr(ra, res_i64);
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ //InterpreterCall("ANDI_", &PPUInterpreter::ANDI_, ra, rs, uimm16);
+}
+
+void PPULLVMRecompiler::ANDIS_(u32 ra, u32 rs, u32 uimm16) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = m_ir_builder->CreateAnd(rs_i64, (u64)uimm16 << 16);
+ SetGpr(ra, res_i64);
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ //InterpreterCall("ANDIS_", &PPUInterpreter::ANDIS_, ra, rs, uimm16);
+}
+
+void PPULLVMRecompiler::RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = rs_i64;
+ if (sh) {
+ auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh);
+ auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh);
+ res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64);
+ }
+
+ res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63]);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("RLDICL", &PPUInterpreter::RLDICL, ra, rs, sh, mb, rc);
+}
+
+void PPULLVMRecompiler::RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = rs_i64;
+ if (sh) {
+ auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh);
+ auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh);
+ res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64);
+ }
+
+ res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][me]);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("RLDICR", &PPUInterpreter::RLDICR, ra, rs, sh, me, rc);
+}
+
+void PPULLVMRecompiler::RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = rs_i64;
+ if (sh) {
+ auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh);
+ auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh);
+ res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64);
+ }
+
+ res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63 - sh]);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("RLDIC", &PPUInterpreter::RLDIC, ra, rs, sh, mb, rc);
+}
+
+void PPULLVMRecompiler::RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto ra_i64 = GetGpr(ra);
+ auto res_i64 = rs_i64;
+ if (sh) {
+ auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh);
+ auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh);
+ res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64);
+ }
+
+ u64 mask = s_rotate_mask[mb][63 - sh];
+ res_i64 = m_ir_builder->CreateAnd(res_i64, mask);
+ ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask);
+ res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("RLDIMI", &PPUInterpreter::RLDIMI, ra, rs, sh, mb, rc);
+}
+
+void PPULLVMRecompiler::RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto rb_i64 = GetGpr(rb);
+ auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x3F);
+ auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(64), shl_i64);
+ auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64);
+ auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64);
+ auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64);
+
+ if (is_r) {
+ res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][m_eb]);
+ } else {
+ res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[m_eb][63]);
+ }
+
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("RLDC_LR", &PPUInterpreter::RLDC_LR, ra, rs, rb, m_eb, is_r, rc);
+}
+
+void PPULLVMRecompiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) {
+ Value * ra_i64;
+ Value * rb_i64;
+ if (l == 0) {
+ ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty());
+ rb_i64 = m_ir_builder->CreateSExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty());
+ } else {
+ ra_i64 = GetGpr(ra);
+ rb_i64 = GetGpr(rb);
+ }
+
+ SetCrFieldSignedCmp(crfd, ra_i64, rb_i64);
+ //InterpreterCall("CMP", &PPUInterpreter::CMP, crfd, l, ra, rb);
+}
+
+void PPULLVMRecompiler::TW(u32 to, u32 ra, u32 rb) {
+ InterpreterCall("TW", &PPUInterpreter::TW, to, ra, rb);
+}
+
+void PPULLVMRecompiler::LVSL(u32 vd, u32 ra, u32 rb) {
+ static const u128 s_lvsl_values[] = {
+ {0x08090A0B0C0D0E0F, 0x0001020304050607},
+ {0x090A0B0C0D0E0F10, 0x0102030405060708},
+ {0x0A0B0C0D0E0F1011, 0x0203040506070809},
+ {0x0B0C0D0E0F101112, 0x030405060708090A},
+ {0x0C0D0E0F10111213, 0x0405060708090A0B},
+ {0x0D0E0F1011121314, 0x05060708090A0B0C},
+ {0x0E0F101112131415, 0x060708090A0B0C0D},
+ {0x0F10111213141516, 0x0708090A0B0C0D0E},
+ {0x1011121314151617, 0x08090A0B0C0D0E0F},
+ {0x1112131415161718, 0x090A0B0C0D0E0F10},
+ {0x1213141516171819, 0x0A0B0C0D0E0F1011},
+ {0x131415161718191A, 0x0B0C0D0E0F101112},
+ {0x1415161718191A1B, 0x0C0D0E0F10111213},
+ {0x15161718191A1B1C, 0x0D0E0F1011121314},
+ {0x161718191A1B1C1D, 0x0E0F101112131415},
+ {0x1718191A1B1C1D1E, 0x0F10111213141516},
+ };
+
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF);
+ auto lvsl_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsl_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo());
+ lvsl_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsl_values_v16i8_ptr, index_i64);
+ auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsl_values_v16i8_ptr, 16);
+ SetVr(vd, val_v16i8);
+
+ //InterpreterCall("LVSL", &PPUInterpreter::LVSL, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::LVEBX(u32 vd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto val_i8 = ReadMemory(addr_i64, 8);
+ auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf);
+ index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64);
+ auto vd_v16i8 = GetVrAsIntVec(vd, 8);
+ vd_v16i8 = m_ir_builder->CreateInsertElement(vd_v16i8, val_i8, index_i64);
+ SetVr(vd, vd_v16i8);
+
+ //InterpreterCall("LVEBX", &PPUInterpreter::LVEBX, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ ra_i64 = m_ir_builder->CreateNeg(ra_i64);
+ auto rb_i64 = GetGpr(rb);
+ auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64);
+ auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, {0});
+ auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1});
+ SetGpr(rd, diff_i64);
+ SetXerCa(carry_i1);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0));
+ }
+
+ if (oe) {
+ // TODO: Implement this
+ }
+ //InterpreterCall("SUBFC", &PPUInterpreter::SUBFC, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto rb_i64 = GetGpr(rb);
+ auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64);
+ auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0});
+ auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1});
+ SetGpr(rd, sum_i64);
+ SetXerCa(carry_i1);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0));
+ }
+
+ if (oe) {
+ // TODO: Implement this
+ }
+ //InterpreterCall("ADDC", &PPUInterpreter::ADDC, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::MULHDU(u32 rd, u32 ra, u32 rb, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto rb_i64 = GetGpr(rb);
+ auto ra_i128 = m_ir_builder->CreateZExt(ra_i64, m_ir_builder->getIntNTy(128));
+ auto rb_i128 = m_ir_builder->CreateZExt(rb_i64, m_ir_builder->getIntNTy(128));
+ auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128);
+ prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64);
+ auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty());
+ SetGpr(rd, prod_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("MULHDU", &PPUInterpreter::MULHDU, rd, ra, rb, rc);
+}
+
+void PPULLVMRecompiler::MULHWU(u32 rd, u32 ra, u32 rb, bool rc) {
+ auto ra_i32 = GetGpr(ra, 32);
+ auto rb_i32 = GetGpr(rb, 32);
+ auto ra_i64 = m_ir_builder->CreateZExt(ra_i32, m_ir_builder->getInt64Ty());
+ auto rb_i64 = m_ir_builder->CreateZExt(rb_i32, m_ir_builder->getInt64Ty());
+ auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64);
+ prod_i64 = m_ir_builder->CreateLShr(prod_i64, 32);
+ SetGpr(rd, prod_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("MULHWU", &PPUInterpreter::MULHWU, rd, ra, rb, rc);
+}
+
+void PPULLVMRecompiler::MFOCRF(u32 a, u32 rd, u32 crm) {
+ auto cr_i32 = GetCr();
+ auto cr_i64 = m_ir_builder->CreateZExt(cr_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, cr_i64);
+ //InterpreterCall("MFOCRF", &PPUInterpreter::MFOCRF, a, rd, crm);
+}
+
+void PPULLVMRecompiler::LWARX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, R_ADDR));
+ auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(addr_i64, resv_addr_i64_ptr, 8);
+
+ auto resv_val_i32 = ReadMemory(addr_i64, 32, 4, false, false);
+ auto resv_val_i64 = m_ir_builder->CreateZExt(resv_val_i32, m_ir_builder->getInt64Ty());
+ auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, R_VALUE));
+ auto resv_val_i64_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(resv_val_i64, resv_val_i64_ptr, 8);
+
+ resv_val_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), resv_val_i32);
+ resv_val_i64 = m_ir_builder->CreateZExt(resv_val_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, resv_val_i64);
+ //InterpreterCall("LWARX", &PPUInterpreter::LWARX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::LDX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i64 = ReadMemory(addr_i64, 64);
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LDX", &PPUInterpreter::LDX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::LWZX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LWZX", &PPUInterpreter::LWZX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::SLW(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty());
+ auto rb_i8 = GetGpr(rb, 8);
+ rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F);
+ auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty());
+ auto res_i64 = m_ir_builder->CreateShl(rs_i64, rb_i64);
+ auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty());
+ res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty());
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("SLW", &PPUInterpreter::SLW, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::CNTLZW(u32 ra, u32 rs, bool rc) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto res_i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt32Ty()), rs_i32, m_ir_builder->getInt1(false));
+ auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty());
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("CNTLZW", &PPUInterpreter::CNTLZW, ra, rs, rc);
+}
+
+void PPULLVMRecompiler::SLD(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128));
+ auto rb_i8 = GetGpr(rb, 8);
+ rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F);
+ auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128));
+ auto res_i128 = m_ir_builder->CreateShl(rs_i128, rb_i128);
+ auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty());
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("SLD", &PPUInterpreter::SLD, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::AND(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto rb_i64 = GetGpr(rb);
+ auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("AND", &PPUInterpreter::AND, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) {
+ Value * ra_i64;
+ Value * rb_i64;
+ if (l == 0) {
+ ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty());
+ rb_i64 = m_ir_builder->CreateZExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty());
+ } else {
+ ra_i64 = GetGpr(ra);
+ rb_i64 = GetGpr(rb);
+ }
+
+ SetCrFieldUnsignedCmp(crfd, ra_i64, rb_i64);
+ //InterpreterCall("CMPL", &PPUInterpreter::CMPL, crfd, l, ra, rb);
+}
+
+void PPULLVMRecompiler::LVSR(u32 vd, u32 ra, u32 rb) {
+ static const u128 s_lvsr_values[] = {
+ {0x18191A1B1C1D1E1F, 0x1011121314151617},
+ {0x1718191A1B1C1D1E, 0x0F10111213141516},
+ {0x161718191A1B1C1D, 0x0E0F101112131415},
+ {0x15161718191A1B1C, 0x0D0E0F1011121314},
+ {0x1415161718191A1B, 0x0C0D0E0F10111213},
+ {0x131415161718191A, 0x0B0C0D0E0F101112},
+ {0x1213141516171819, 0x0A0B0C0D0E0F1011},
+ {0x1112131415161718, 0x090A0B0C0D0E0F10},
+ {0x1011121314151617, 0x08090A0B0C0D0E0F},
+ {0x0F10111213141516, 0x0708090A0B0C0D0E},
+ {0x0E0F101112131415, 0x060708090A0B0C0D},
+ {0x0D0E0F1011121314, 0x05060708090A0B0C},
+ {0x0C0D0E0F10111213, 0x0405060708090A0B},
+ {0x0B0C0D0E0F101112, 0x030405060708090A},
+ {0x0A0B0C0D0E0F1011, 0x0203040506070809},
+ {0x090A0B0C0D0E0F10, 0x0102030405060708},
+ };
+
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF);
+ auto lvsr_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsr_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo());
+ lvsr_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsr_values_v16i8_ptr, index_i64);
+ auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsr_values_v16i8_ptr, 16);
+ SetVr(vd, val_v16i8);
+
+ //InterpreterCall("LVSR", &PPUInterpreter::LVSR, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::LVEHX(u32 vd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL);
+ auto val_i16 = ReadMemory(addr_i64, 16, 2);
+ auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf);
+ index_i64 = m_ir_builder->CreateLShr(index_i64, 1);
+ index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64);
+ auto vd_v8i16 = GetVrAsIntVec(vd, 16);
+ vd_v8i16 = m_ir_builder->CreateInsertElement(vd_v8i16, val_i16, index_i64);
+ SetVr(vd, vd_v8i16);
+
+ //InterpreterCall("LVEHX", &PPUInterpreter::LVEHX, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto rb_i64 = GetGpr(rb);
+ auto diff_i64 = m_ir_builder->CreateSub(rb_i64, ra_i64);
+ SetGpr(rd, diff_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0));
+ }
+
+ if (oe) {
+ // TODO: Implement this
+ }
+ //InterpreterCall("SUBF", &PPUInterpreter::SUBF, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::LDUX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i64 = ReadMemory(addr_i64, 64);
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LDUX", &PPUInterpreter::LDUX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::DCBST(u32 ra, u32 rb) {
+ // TODO: Implement this
+ //InterpreterCall("DCBST", &PPUInterpreter::DCBST, ra, rb);
+}
+
+void PPULLVMRecompiler::LWZUX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LWZUX", &PPUInterpreter::LWZUX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::CNTLZD(u32 ra, u32 rs, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto res_i64 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt64Ty()), rs_i64, m_ir_builder->getInt1(false));
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("CNTLZD", &PPUInterpreter::CNTLZD, ra, rs, rc);
+}
+
+void PPULLVMRecompiler::ANDC(u32 ra, u32 rs, u32 rb, bool rc) {
+ InterpreterCall("ANDC", &PPUInterpreter::ANDC, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::TD(u32 to, u32 ra, u32 rb) {
+ InterpreterCall("TD", &PPUInterpreter::TD, to, ra, rb);
+}
+
+void PPULLVMRecompiler::LVEWX(u32 vd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL);
+ auto val_i32 = ReadMemory(addr_i64, 32, 4);
+ auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf);
+ index_i64 = m_ir_builder->CreateLShr(index_i64, 2);
+ index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64);
+ auto vd_v4i32 = GetVrAsIntVec(vd, 32);
+ vd_v4i32 = m_ir_builder->CreateInsertElement(vd_v4i32, val_i32, index_i64);
+ SetVr(vd, vd_v4i32);
+
+ //InterpreterCall("LVEWX", &PPUInterpreter::LVEWX, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::MULHD(u32 rd, u32 ra, u32 rb, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto rb_i64 = GetGpr(rb);
+ auto ra_i128 = m_ir_builder->CreateSExt(ra_i64, m_ir_builder->getIntNTy(128));
+ auto rb_i128 = m_ir_builder->CreateSExt(rb_i64, m_ir_builder->getIntNTy(128));
+ auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128);
+ prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64);
+ auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty());
+ SetGpr(rd, prod_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("MULHD", &PPUInterpreter::MULHD, rd, ra, rb, rc);
+}
+
+void PPULLVMRecompiler::MULHW(u32 rd, u32 ra, u32 rb, bool rc) {
+ auto ra_i32 = GetGpr(ra, 32);
+ auto rb_i32 = GetGpr(rb, 32);
+ auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty());
+ auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty());
+ auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64);
+ prod_i64 = m_ir_builder->CreateAShr(prod_i64, 32);
+ SetGpr(rd, prod_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("MULHW", &PPUInterpreter::MULHW, rd, ra, rb, rc);
+}
+
+void PPULLVMRecompiler::LDARX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto resv_addr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, R_ADDR));
+ auto resv_addr_i64_ptr = m_ir_builder->CreateBitCast(resv_addr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(addr_i64, resv_addr_i64_ptr, 8);
+
+ auto resv_val_i64 = ReadMemory(addr_i64, 64, 8, false);
+ auto resv_val_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, R_VALUE));
+ auto resv_val_i64_ptr = m_ir_builder->CreateBitCast(resv_val_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(resv_val_i64, resv_val_i64_ptr, 8);
+
+ resv_val_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt64Ty()), resv_val_i64);
+ SetGpr(rd, resv_val_i64);
+ //InterpreterCall("LDARX", &PPUInterpreter::LDARX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::DCBF(u32 ra, u32 rb) {
+ // TODO: Implement this
+ //InterpreterCall("DCBF", &PPUInterpreter::DCBF, ra, rb);
+}
+
+void PPULLVMRecompiler::LBZX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i8 = ReadMemory(addr_i64, 8);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LBZX", &PPUInterpreter::LBZX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::LVX(u32 vd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL);
+ auto mem_i128 = ReadMemory(addr_i64, 128, 16);
+ SetVr(vd, mem_i128);
+ //InterpreterCall("LVX", &PPUInterpreter::LVX, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::NEG(u32 rd, u32 ra, u32 oe, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto diff_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(0), ra_i64);
+ SetGpr(rd, diff_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0));
+ }
+
+ if (oe) {
+ // TODO: Implement this
+ }
+ //InterpreterCall("NEG", &PPUInterpreter::NEG, rd, ra, oe, rc);
+}
+
+void PPULLVMRecompiler::LBZUX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i8 = ReadMemory(addr_i64, 8);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LBZUX", &PPUInterpreter::LBZUX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::NOR(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto rb_i64 = GetGpr(rb);
+ auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64);
+ res_i64 = m_ir_builder->CreateXor(res_i64, (s64)-1);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("NOR", &PPUInterpreter::NOR, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::STVEBX(u32 vs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf);
+ index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64);
+ auto vs_v16i8 = GetVrAsIntVec(vs, 8);
+ auto val_i8 = m_ir_builder->CreateExtractElement(vs_v16i8, index_i64);
+ WriteMemory(addr_i64, val_i8);
+ //InterpreterCall("STVEBX", &PPUInterpreter::STVEBX, vs, ra, rb);
+}
+
+void PPULLVMRecompiler::SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ InterpreterCall("SUBFE", &PPUInterpreter::SUBFE, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ InterpreterCall("ADDE", &PPUInterpreter::ADDE, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::MTOCRF(u32 l, u32 crm, u32 rs) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto cr_i32 = GetCr();
+ u32 mask = 0;
+
+ for (u32 i = 0; i < 8; i++) {
+ if (crm & (1 << i)) {
+ mask |= 0xF << ((7 - i) * 4);
+ if (l) {
+ break;
+ }
+ }
+ }
+
+ cr_i32 = m_ir_builder->CreateAnd(cr_i32, ~mask);
+ rs_i32 = m_ir_builder->CreateAnd(rs_i32, ~mask);
+ cr_i32 = m_ir_builder->CreateOr(cr_i32, rs_i32);
+ SetCr(cr_i32);
+ //InterpreterCall("MTOCRF", &PPUInterpreter::MTOCRF, l, crm, rs);
+}
+
+void PPULLVMRecompiler::STDX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 64));
+ //InterpreterCall("STDX", &PPUInterpreter::STDX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STWCX_(u32 rs, u32 ra, u32 rb) {
+ InterpreterCall("STWCX_", &PPUInterpreter::STWCX_, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STWX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 32));
+ //InterpreterCall("STWX", &PPUInterpreter::STWX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STVEHX(u32 vs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL);
+ auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf);
+ index_i64 = m_ir_builder->CreateLShr(index_i64, 1);
+ index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64);
+ auto vs_v8i16 = GetVrAsIntVec(vs, 16);
+ auto val_i16 = m_ir_builder->CreateExtractElement(vs_v8i16, index_i64);
+ WriteMemory(addr_i64, val_i16, 2);
+ //InterpreterCall("STVEHX", &PPUInterpreter::STVEHX, vs, ra, rb);
+}
+
+void PPULLVMRecompiler::STDUX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ WriteMemory(addr_i64, GetGpr(rs, 64));
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STDUX", &PPUInterpreter::STDUX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STWUX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ WriteMemory(addr_i64, GetGpr(rs, 32));
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STWUX", &PPUInterpreter::STWUX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STVEWX(u32 vs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL);
+ auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf);
+ index_i64 = m_ir_builder->CreateLShr(index_i64, 2);
+ index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64);
+ auto vs_v4i32 = GetVrAsIntVec(vs, 32);
+ auto val_i32 = m_ir_builder->CreateExtractElement(vs_v4i32, index_i64);
+ WriteMemory(addr_i64, val_i32, 4);
+ //InterpreterCall("STVEWX", &PPUInterpreter::STVEWX, vs, ra, rb);
+}
+
+void PPULLVMRecompiler::ADDZE(u32 rd, u32 ra, u32 oe, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto ca_i64 = GetXerCa();
+ auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64);
+ auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, {0});
+ auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, {1});
+ SetGpr(rd, sum_i64);
+ SetXerCa(carry_i1);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("ADDZE", &PPUInterpreter::ADDZE, rd, ra, oe, rc);
+}
+
+void PPULLVMRecompiler::SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) {
+ InterpreterCall("SUBFZE", &PPUInterpreter::SUBFZE, rd, ra, oe, rc);
+}
+
+void PPULLVMRecompiler::STDCX_(u32 rs, u32 ra, u32 rb) {
+ InterpreterCall("STDCX_", &PPUInterpreter::STDCX_, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STBX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 8));
+ //InterpreterCall("STBX", &PPUInterpreter::STBX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STVX(u32 vs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL);
+ WriteMemory(addr_i64, GetVr(vs), 16);
+ //InterpreterCall("STVX", &PPUInterpreter::STVX, vs, ra, rb);
+}
+
+void PPULLVMRecompiler::SUBFME(u32 rd, u32 ra, u32 oe, bool rc) {
+ InterpreterCall("SUBFME", &PPUInterpreter::SUBFME, rd, ra, oe, rc);
+}
+
+void PPULLVMRecompiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto rb_i64 = GetGpr(rb);
+ auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64);
+ SetGpr(rd, prod_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0));
+ }
+
+ // TODO implement oe
+ //InterpreterCall("MULLD", &PPUInterpreter::MULLD, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::ADDME(u32 rd, u32 ra, u32 oe, bool rc) {
+ InterpreterCall("ADDME", &PPUInterpreter::ADDME, rd, ra, oe, rc);
+}
+
+void PPULLVMRecompiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i32 = GetGpr(ra, 32);
+ auto rb_i32 = GetGpr(rb, 32);
+ auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty());
+ auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty());
+ auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64);
+ SetGpr(rd, prod_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0));
+ }
+
+ // TODO implement oe
+ //InterpreterCall("MULLW", &PPUInterpreter::MULLW, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::DCBTST(u32 ra, u32 rb, u32 th) {
+ // TODO: Implement this
+ //InterpreterCall("DCBTST", &PPUInterpreter::DCBTST, ra, rb, th);
+}
+
+void PPULLVMRecompiler::STBUX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ WriteMemory(addr_i64, GetGpr(rs, 8));
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STBUX", &PPUInterpreter::STBUX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto rb_i64 = GetGpr(rb);
+ auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, rb_i64);
+ SetGpr(rd, sum_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0));
+ }
+
+ if (oe) {
+ // TODO: Implement this
+ }
+ //InterpreterCall("ADD", &PPUInterpreter::ADD, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::DCBT(u32 ra, u32 rb, u32 th) {
+ // TODO: Implement this using prefetch
+ //InterpreterCall("DCBT", &PPUInterpreter::DCBT, ra, rb, th);
+}
+
+void PPULLVMRecompiler::LHZX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i16 = ReadMemory(addr_i64, 16);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LHZX", &PPUInterpreter::LHZX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::EQV(u32 ra, u32 rs, u32 rb, bool rc) {
+ InterpreterCall("EQV", &PPUInterpreter::EQV, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::ECIWX(u32 rd, u32 ra, u32 rb) {
+ InterpreterCall("ECIWX", &PPUInterpreter::ECIWX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::LHZUX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i16 = ReadMemory(addr_i64, 16);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LHZUX", &PPUInterpreter::LHZUX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::XOR(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto rb_i64 = GetGpr(rb);
+ auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("XOR", &PPUInterpreter::XOR, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::MFSPR(u32 rd, u32 spr) {
+ Value * rd_i64;
+ auto n = (spr >> 5) | ((spr & 0x1f) << 5);
+
+ switch (n) {
+ case 0x001:
+ rd_i64 = GetXer();
+ break;
+ case 0x008:
+ rd_i64 = GetLr();
+ break;
+ case 0x009:
+ rd_i64 = GetCtr();
+ break;
+ case 0x100:
+ rd_i64 = GetUsprg0();
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ SetGpr(rd, rd_i64);
+ //InterpreterCall("MFSPR", &PPUInterpreter::MFSPR, rd, spr);
+}
+
+void PPULLVMRecompiler::LWAX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LWAX", &PPUInterpreter::LWAX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::DST(u32 ra, u32 rb, u32 strm, u32 t) {
+ InterpreterCall("DST", &PPUInterpreter::DST, ra, rb, strm, t);
+}
+
+void PPULLVMRecompiler::LHAX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i16 = ReadMemory(addr_i64, 16);
+ auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LHAX", &PPUInterpreter::LHAX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::LVXL(u32 vd, u32 ra, u32 rb) {
+ LVX(vd, ra, rb);
+ //InterpreterCall("LVXL", &PPUInterpreter::LVXL, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::MFTB(u32 rd, u32 spr) {
+ static Function * s_get_time_fn = nullptr;
+
+ if (s_get_time_fn == nullptr) {
+ s_get_time_fn = (Function *)m_module->getOrInsertFunction("get_time", m_ir_builder->getInt64Ty(), nullptr);
+ s_get_time_fn->setCallingConv(CallingConv::X86_64_Win64);
+ m_execution_engine->addGlobalMapping(s_get_time_fn, (void *)get_time);
+ }
+
+ auto tb_i64 = (Value *)m_ir_builder->CreateCall(s_get_time_fn);
+
+ u32 n = (spr >> 5) | ((spr & 0x1f) << 5);
+ if (n == 0x10D) {
+ tb_i64 = m_ir_builder->CreateLShr(tb_i64, 32);
+ }
+
+ SetGpr(rd, tb_i64);
+}
+
+void PPULLVMRecompiler::LWAUX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LWAUX", &PPUInterpreter::LWAUX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::DSTST(u32 ra, u32 rb, u32 strm, u32 t) {
+ InterpreterCall("DSTST", &PPUInterpreter::DSTST, ra, rb, strm, t);
+}
+
+void PPULLVMRecompiler::LHAUX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i16 = ReadMemory(addr_i64, 16);
+ auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LHAUX", &PPUInterpreter::LHAUX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::STHX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 16));
+ //InterpreterCall("STHX", &PPUInterpreter::STHX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::ORC(u32 ra, u32 rs, u32 rb, bool rc) {
+ InterpreterCall("ORC", &PPUInterpreter::ORC, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::ECOWX(u32 rs, u32 ra, u32 rb) {
+ InterpreterCall("ECOWX", &PPUInterpreter::ECOWX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STHUX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ WriteMemory(addr_i64, GetGpr(rs, 16));
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STHUX", &PPUInterpreter::STHUX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::OR(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto rb_i64 = GetGpr(rb);
+ auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("OR", &PPUInterpreter::OR, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto rb_i64 = GetGpr(rb);
+ auto res_i64 = m_ir_builder->CreateUDiv(ra_i64, rb_i64);
+ SetGpr(rd, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ // TODO implement oe
+ // TODO make sure an exception does not occur on divide by 0 and overflow
+ //InterpreterCall("DIVDU", &PPUInterpreter::DIVDU, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i32 = GetGpr(ra, 32);
+ auto rb_i32 = GetGpr(rb, 32);
+ auto res_i32 = m_ir_builder->CreateUDiv(ra_i32, rb_i32);
+ auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ // TODO implement oe
+ // TODO make sure an exception does not occur on divide by 0 and overflow
+ //InterpreterCall("DIVWU", &PPUInterpreter::DIVWU, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::MTSPR(u32 spr, u32 rs) {
+ auto rs_i64 = GetGpr(rs);
+ auto n = (spr >> 5) | ((spr & 0x1f) << 5);
+
+ switch (n) {
+ case 0x001:
+ SetXer(rs_i64);
+ break;
+ case 0x008:
+ SetLr(rs_i64);
+ break;
+ case 0x009:
+ SetCtr(rs_i64);
+ break;
+ case 0x100:
+ SetUsprg0(rs_i64);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ //InterpreterCall("MTSPR", &PPUInterpreter::MTSPR, spr, rs);
+}
+
+void PPULLVMRecompiler::NAND(u32 ra, u32 rs, u32 rb, bool rc) {
+ InterpreterCall("NAND", &PPUInterpreter::NAND, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::STVXL(u32 vs, u32 ra, u32 rb) {
+ STVX(vs, ra, rb);
+ //InterpreterCall("STVXL", &PPUInterpreter::STVXL, vs, ra, rb);
+}
+
+void PPULLVMRecompiler::DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i64 = GetGpr(ra);
+ auto rb_i64 = GetGpr(rb);
+ auto res_i64 = m_ir_builder->CreateSDiv(ra_i64, rb_i64);
+ SetGpr(rd, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ // TODO implement oe
+ // TODO make sure an exception does not occur on divide by 0 and overflow
+ //InterpreterCall("DIVD", &PPUInterpreter::DIVD, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) {
+ auto ra_i32 = GetGpr(ra, 32);
+ auto rb_i32 = GetGpr(rb, 32);
+ auto res_i32 = m_ir_builder->CreateSDiv(ra_i32, rb_i32);
+ auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ // TODO implement oe
+ // TODO make sure an exception does not occur on divide by 0 and overflow
+ //InterpreterCall("DIVW", &PPUInterpreter::DIVW, rd, ra, rb, oe, rc);
+}
+
+void PPULLVMRecompiler::LVLX(u32 vd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto eb_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF);
+ eb_i64 = m_ir_builder->CreateShl(eb_i64, 3);
+ auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128));
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL);
+ auto mem_i128 = ReadMemory(addr_i64, 128, 16);
+ mem_i128 = m_ir_builder->CreateShl(mem_i128, eb_i128);
+ SetVr(vd, mem_i128);
+ //InterpreterCall("LVLX", &PPUInterpreter::LVLX, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::LDBRX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i64 = ReadMemory(addr_i64, 64, 0, false);
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LDBRX", &PPUInterpreter::LDBRX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::LSWX(u32 rd, u32 ra, u32 rb) {
+ InterpreterCall("LSWX", &PPUInterpreter::LSWX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::LWBRX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i32 = ReadMemory(addr_i64, 32, 0, false);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LWBRX", &PPUInterpreter::LWBRX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::LFSX(u32 frd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ SetFpr(frd, mem_i32);
+ //InterpreterCall("LFSX", &PPUInterpreter::LFSX, frd, ra, rb);
+}
+
+void PPULLVMRecompiler::SRW(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty());
+ auto rb_i8 = GetGpr(rb, 8);
+ rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F);
+ auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty());
+ auto res_i64 = m_ir_builder->CreateLShr(rs_i64, rb_i64);
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("SRW", &PPUInterpreter::SRW, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::SRD(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128));
+ auto rb_i8 = GetGpr(rb, 8);
+ rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F);
+ auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128));
+ auto res_i128 = m_ir_builder->CreateLShr(rs_i128, rb_i128);
+ auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty());
+ SetGpr(ra, res_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("SRD", &PPUInterpreter::SRD, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::LVRX(u32 vd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto eb_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), addr_i64);
+ eb_i64 = m_ir_builder->CreateAnd(eb_i64, 0xF);
+ eb_i64 = m_ir_builder->CreateShl(eb_i64, 3);
+ auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128));
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL);
+ auto mem_i128 = ReadMemory(addr_i64, 128, 16);
+ mem_i128 = m_ir_builder->CreateLShr(mem_i128, eb_i128);
+ auto cmp_i1 = m_ir_builder->CreateICmpNE(eb_i64, m_ir_builder->getInt64(0));
+ auto cmp_i128 = m_ir_builder->CreateSExt(cmp_i1, m_ir_builder->getIntNTy(128));
+ mem_i128 = m_ir_builder->CreateAnd(mem_i128, cmp_i128);
+ SetVr(vd, mem_i128);
+
+ //InterpreterCall("LVRX", &PPUInterpreter::LVRX, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::LSWI(u32 rd, u32 ra, u32 nb) {
+ auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0);
+
+ nb = nb ? nb : 32;
+ for (u32 i = 0; i < nb; i += 4) {
+ auto val_i32 = ReadMemory(addr_i64, 32, 0, true, false);
+
+ if (i + 4 <= nb) {
+ addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4));
+ } else {
+ u32 mask = 0xFFFFFFFF << ((4 - (nb - i)) * 8);
+ val_i32 = m_ir_builder->CreateAnd(val_i32, mask);
+ }
+
+ auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, val_i64);
+ rd = (rd + 1) % 32;
+ }
+
+ //InterpreterCall("LSWI", &PPUInterpreter::LSWI, rd, ra, nb);
+}
+
+void PPULLVMRecompiler::LFSUX(u32 frd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ SetFpr(frd, mem_i32);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LFSUX", &PPUInterpreter::LFSUX, frd, ra, rb);
+}
+
+void PPULLVMRecompiler::SYNC(u32 l) {
+ m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence));
+ //InterpreterCall("SYNC", &PPUInterpreter::SYNC, l);
+}
+
+void PPULLVMRecompiler::LFDX(u32 frd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i64 = ReadMemory(addr_i64, 64);
+ SetFpr(frd, mem_i64);
+ //InterpreterCall("LFDX", &PPUInterpreter::LFDX, frd, ra, rb);
+}
+
+void PPULLVMRecompiler::LFDUX(u32 frd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ auto mem_i64 = ReadMemory(addr_i64, 64);
+ SetFpr(frd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LFDUX", &PPUInterpreter::LFDUX, frd, ra, rb);
+}
+
+void PPULLVMRecompiler::STVLX(u32 vs, u32 ra, u32 rb) {
+ InterpreterCall("STVLX", &PPUInterpreter::STVLX, vs, ra, rb);
+}
+
+void PPULLVMRecompiler::STSWX(u32 rs, u32 ra, u32 rb) {
+ InterpreterCall("STSWX", &PPUInterpreter::STSWX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STWBRX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 32), 0, false);
+ //InterpreterCall("STWBRX", &PPUInterpreter::STWBRX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::STFSX(u32 frs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty());
+ WriteMemory(addr_i64, frs_i32);
+ //InterpreterCall("STFSX", &PPUInterpreter::STFSX, frs, ra, rb);
+}
+
+void PPULLVMRecompiler::STVRX(u32 vs, u32 ra, u32 rb) {
+ InterpreterCall("STVRX", &PPUInterpreter::STVRX, vs, ra, rb);
+}
+
+void PPULLVMRecompiler::STFSUX(u32 frs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty());
+ WriteMemory(addr_i64, frs_i32);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STFSUX", &PPUInterpreter::STFSUX, frs, ra, rb);
+}
+
+void PPULLVMRecompiler::STSWI(u32 rd, u32 ra, u32 nb) {
+ auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0);
+
+ nb = nb ? nb : 32;
+ for (u32 i = 0; i < nb; i += 4) {
+ auto val_i32 = GetGpr(rd, 32);
+
+ if (i + 4 <= nb) {
+ WriteMemory(addr_i64, val_i32, 0, true, false);
+ addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4));
+ rd = (rd + 1) % 32;
+ } else {
+ u32 n = nb - i;
+ if (n >= 2) {
+ auto val_i16 = m_ir_builder->CreateLShr(val_i32, 16);
+ val_i16 = m_ir_builder->CreateTrunc(val_i16, m_ir_builder->getInt16Ty());
+ WriteMemory(addr_i64, val_i16);
+
+ if (n == 3) {
+ auto val_i8 = m_ir_builder->CreateLShr(val_i32, 8);
+ val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty());
+ addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(2));
+ WriteMemory(addr_i64, val_i8);
+ }
+ } else {
+ auto val_i8 = m_ir_builder->CreateLShr(val_i32, 24);
+ val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty());
+ WriteMemory(addr_i64, val_i8);
+ }
+ }
+ }
+
+ //InterpreterCall("STSWI", &PPUInterpreter::STSWI, rd, ra, nb);
+}
+
+void PPULLVMRecompiler::STFDX(u32 frs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty());
+ WriteMemory(addr_i64, frs_i64);
+ //InterpreterCall("STFDX", &PPUInterpreter::STFDX, frs, ra, rb);
+}
+
+void PPULLVMRecompiler::STFDUX(u32 frs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty());
+ WriteMemory(addr_i64, frs_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STFDUX", &PPUInterpreter::STFDUX, frs, ra, rb);
+}
+
+void PPULLVMRecompiler::LVLXL(u32 vd, u32 ra, u32 rb) {
+ LVLX(vd, ra, rb);
+ //InterpreterCall("LVLXL", &PPUInterpreter::LVLXL, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::LHBRX(u32 rd, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i16 = ReadMemory(addr_i64, 16, 0, false);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LHBRX", &PPUInterpreter::LHBRX, rd, ra, rb);
+}
+
+void PPULLVMRecompiler::SRAW(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty());
+ rs_i64 = m_ir_builder->CreateShl(rs_i64, 32);
+ auto rb_i8 = GetGpr(rb, 8);
+ rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F);
+ auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty());
+ auto res_i64 = m_ir_builder->CreateAShr(rs_i64, rb_i64);
+ auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32);
+ SetGpr(ra, ra_i64);
+
+ auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty());
+ auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0));
+ auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0));
+ auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1);
+ SetXerCa(ca_i1);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("SRAW", &PPUInterpreter::SRAW, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::SRAD(u32 ra, u32 rs, u32 rb, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128));
+ rs_i128 = m_ir_builder->CreateShl(rs_i128, 64);
+ auto rb_i8 = GetGpr(rb, 8);
+ rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F);
+ auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128));
+ auto res_i128 = m_ir_builder->CreateAShr(rs_i128, rb_i128);
+ auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64);
+ auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty());
+ SetGpr(ra, ra_i64);
+
+ auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty());
+ auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0));
+ auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0));
+ auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1);
+ SetXerCa(ca_i1);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("SRAD", &PPUInterpreter::SRAD, ra, rs, rb, rc);
+}
+
+void PPULLVMRecompiler::LVRXL(u32 vd, u32 ra, u32 rb) {
+ LVRX(vd, ra, rb);
+ //InterpreterCall("LVRXL", &PPUInterpreter::LVRXL, vd, ra, rb);
+}
+
+void PPULLVMRecompiler::DSS(u32 strm, u32 a) {
+ InterpreterCall("DSS", &PPUInterpreter::DSS, strm, a);
+}
+
+void PPULLVMRecompiler::SRAWI(u32 ra, u32 rs, u32 sh, bool rc) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty());
+ rs_i64 = m_ir_builder->CreateShl(rs_i64, 32);
+ auto res_i64 = m_ir_builder->CreateAShr(rs_i64, sh);
+ auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32);
+ SetGpr(ra, ra_i64);
+
+ auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty());
+ auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0));
+ auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0));
+ auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1);
+ SetXerCa(ca_i1);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("SRAWI", &PPUInterpreter::SRAWI, ra, rs, sh, rc);
+}
+
+void PPULLVMRecompiler::SRADI1(u32 ra, u32 rs, u32 sh, bool rc) {
+ auto rs_i64 = GetGpr(rs);
+ auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128));
+ rs_i128 = m_ir_builder->CreateShl(rs_i128, 64);
+ auto res_i128 = m_ir_builder->CreateAShr(rs_i128, sh);
+ auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64);
+ auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty());
+ SetGpr(ra, ra_i64);
+
+ auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty());
+ auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0));
+ auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0));
+ auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1);
+ SetXerCa(ca_i1);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0));
+ }
+
+ //InterpreterCall("SRADI1", &PPUInterpreter::SRADI1, ra, rs, sh, rc);
+}
+
+void PPULLVMRecompiler::SRADI2(u32 ra, u32 rs, u32 sh, bool rc) {
+ SRADI1(ra, rs, sh, rc);
+ //InterpreterCall("SRADI2", &PPUInterpreter::SRADI2, ra, rs, sh, rc);
+}
+
+void PPULLVMRecompiler::EIEIO() {
+ m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence));
+ //InterpreterCall("EIEIO", &PPUInterpreter::EIEIO);
+}
+
+void PPULLVMRecompiler::STVLXL(u32 vs, u32 ra, u32 rb) {
+ STVLX(vs, ra, rb);
+ //InterpreterCall("STVLXL", &PPUInterpreter::STVLXL, vs, ra, rb);
+}
+
+void PPULLVMRecompiler::STHBRX(u32 rs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 16), 0, false);
+ //InterpreterCall("STHBRX", &PPUInterpreter::STHBRX, rs, ra, rb);
+}
+
+void PPULLVMRecompiler::EXTSH(u32 ra, u32 rs, bool rc) {
+ auto rs_i16 = GetGpr(rs, 16);
+ auto rs_i64 = m_ir_builder->CreateSExt(rs_i16, m_ir_builder->getInt64Ty());
+ SetGpr(ra, rs_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("EXTSH", &PPUInterpreter::EXTSH, ra, rs, rc);
+}
+
+void PPULLVMRecompiler::STVRXL(u32 vs, u32 ra, u32 rb) {
+ STVRX(vs, ra, rb);
+ //InterpreterCall("STVRXL", &PPUInterpreter::STVRXL, vs, ra, rb);
+}
+
+void PPULLVMRecompiler::EXTSB(u32 ra, u32 rs, bool rc) {
+ auto rs_i8 = GetGpr(rs, 8);
+ auto rs_i64 = m_ir_builder->CreateSExt(rs_i8, m_ir_builder->getInt64Ty());
+ SetGpr(ra, rs_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("EXTSB", &PPUInterpreter::EXTSB, ra, rs, rc);
+}
+
+void PPULLVMRecompiler::STFIWX(u32 frs, u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty());
+ auto frs_i32 = m_ir_builder->CreateTrunc(frs_i64, m_ir_builder->getInt32Ty());
+ WriteMemory(addr_i64, frs_i32);
+ //InterpreterCall("STFIWX", &PPUInterpreter::STFIWX, frs, ra, rb);
+}
+
+void PPULLVMRecompiler::EXTSW(u32 ra, u32 rs, bool rc) {
+ auto rs_i32 = GetGpr(rs, 32);
+ auto rs_i64 = m_ir_builder->CreateSExt(rs_i32, m_ir_builder->getInt64Ty());
+ SetGpr(ra, rs_i64);
+
+ if (rc) {
+ SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0));
+ }
+ //InterpreterCall("EXTSW", &PPUInterpreter::EXTSW, ra, rs, rc);
+}
+
+void PPULLVMRecompiler::ICBI(u32 ra, u32 rs) {
+ InterpreterCall("ICBI", &PPUInterpreter::ICBI, ra, rs);
+}
+
+void PPULLVMRecompiler::DCBZ(u32 ra, u32 rb) {
+ auto addr_i64 = GetGpr(rb);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, ~(127ULL));
+ addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0)));
+ auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy());
+
+ std::vector types = {(Type *)m_ir_builder->getInt8PtrTy(), (Type *)m_ir_builder->getInt32Ty()};
+ m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memset, types),
+ addr_i8_ptr, m_ir_builder->getInt8(0), m_ir_builder->getInt32(128), m_ir_builder->getInt32(128), m_ir_builder->getInt1(true));
+ //InterpreterCall("DCBZ", &PPUInterpreter::DCBZ, ra, rb);L
+}
+
+void PPULLVMRecompiler::LWZ(u32 rd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LWZ", &PPUInterpreter::LWZ, rd, ra, d);
+}
+
+void PPULLVMRecompiler::LWZU(u32 rd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LWZU", &PPUInterpreter::LWZU, rd, ra, d);
+}
+
+void PPULLVMRecompiler::LBZ(u32 rd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i8 = ReadMemory(addr_i64, 8);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LBZ", &PPUInterpreter::LBZ, rd, ra, d);
+}
+
+void PPULLVMRecompiler::LBZU(u32 rd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i8 = ReadMemory(addr_i64, 8);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LBZU", &PPUInterpreter::LBZU, rd, ra, d);
+}
+
+void PPULLVMRecompiler::STW(u32 rs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 32));
+ //InterpreterCall("STW", &PPUInterpreter::STW, rs, ra, d);
+}
+
+void PPULLVMRecompiler::STWU(u32 rs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ WriteMemory(addr_i64, GetGpr(rs, 32));
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STWU", &PPUInterpreter::STWU, rs, ra, d);
+}
+
+void PPULLVMRecompiler::STB(u32 rs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 8));
+ //InterpreterCall("STB", &PPUInterpreter::STB, rs, ra, d);
+}
+
+void PPULLVMRecompiler::STBU(u32 rs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ WriteMemory(addr_i64, GetGpr(rs, 8));
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STBU", &PPUInterpreter::STBU, rs, ra, d);
+}
+
+void PPULLVMRecompiler::LHZ(u32 rd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i16 = ReadMemory(addr_i64, 16);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LHZ", &PPUInterpreter::LHZ, rd, ra, d);
+}
+
+void PPULLVMRecompiler::LHZU(u32 rd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i16 = ReadMemory(addr_i64, 16);
+ auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LHZU", &PPUInterpreter::LHZU, rd, ra, d);
+}
+
+void PPULLVMRecompiler::LHA(u32 rd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i16 = ReadMemory(addr_i64, 16);
+ auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LHA", &PPUInterpreter::LHA, rd, ra, d);
+}
+
+void PPULLVMRecompiler::LHAU(u32 rd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i16 = ReadMemory(addr_i64, 16);
+ auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LHAU", &PPUInterpreter::LHAU, rd, ra, d);
+}
+
+void PPULLVMRecompiler::STH(u32 rs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 16));
+ //InterpreterCall("STH", &PPUInterpreter::STH, rs, ra, d);
+}
+
+void PPULLVMRecompiler::STHU(u32 rs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ WriteMemory(addr_i64, GetGpr(rs, 16));
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STHU", &PPUInterpreter::STHU, rs, ra, d);
+}
+
+void PPULLVMRecompiler::LMW(u32 rd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra));
+ }
+
+ for (u32 i = rd; i < 32; i++) {
+ auto val_i32 = ReadMemory(addr_i64, 32);
+ auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty());
+ SetGpr(i, val_i64);
+ addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4));
+ }
+
+ //InterpreterCall("LMW", &PPUInterpreter::LMW, rd, ra, d);
+}
+
+void PPULLVMRecompiler::STMW(u32 rs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra));
+ }
+
+ for (u32 i = rs; i < 32; i++) {
+ auto val_i32 = GetGpr(i, 32);
+ WriteMemory(addr_i64, val_i32);
+ addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4));
+ }
+
+ //InterpreterCall("STMW", &PPUInterpreter::STMW, rs, ra, d);
+}
+
+void PPULLVMRecompiler::LFS(u32 frd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ SetFpr(frd, mem_i32);
+ //InterpreterCall("LFS", &PPUInterpreter::LFS, frd, ra, d);
+}
+
+void PPULLVMRecompiler::LFSU(u32 frd, u32 ra, s32 ds) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ SetFpr(frd, mem_i32);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LFSU", &PPUInterpreter::LFSU, frd, ra, ds);
+}
+
+void PPULLVMRecompiler::LFD(u32 frd, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i64 = ReadMemory(addr_i64, 64);
+ SetFpr(frd, mem_i64);
+ //InterpreterCall("LFD", &PPUInterpreter::LFD, frd, ra, d);
+}
+
+void PPULLVMRecompiler::LFDU(u32 frd, u32 ra, s32 ds) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i64 = ReadMemory(addr_i64, 64);
+ SetFpr(frd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LFDU", &PPUInterpreter::LFDU, frd, ra, ds);
+}
+
+void PPULLVMRecompiler::STFS(u32 frs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty());
+ WriteMemory(addr_i64, frs_i32);
+ //InterpreterCall("STFS", &PPUInterpreter::STFS, frs, ra, d);
+}
+
+void PPULLVMRecompiler::STFSU(u32 frs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty());
+ WriteMemory(addr_i64, frs_i32);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STFSU", &PPUInterpreter::STFSU, frs, ra, d);
+}
+
+void PPULLVMRecompiler::STFD(u32 frs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty());
+ WriteMemory(addr_i64, frs_i64);
+ //InterpreterCall("STFD", &PPUInterpreter::STFD, frs, ra, d);
+}
+
+void PPULLVMRecompiler::STFDU(u32 frs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty());
+ WriteMemory(addr_i64, frs_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STFDU", &PPUInterpreter::STFDU, frs, ra, d);
+}
+
+void PPULLVMRecompiler::LD(u32 rd, u32 ra, s32 ds) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i64 = ReadMemory(addr_i64, 64);
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LD", &PPUInterpreter::LD, rd, ra, ds);
+}
+
+void PPULLVMRecompiler::LDU(u32 rd, u32 ra, s32 ds) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ auto mem_i64 = ReadMemory(addr_i64, 64);
+ SetGpr(rd, mem_i64);
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("LDU", &PPUInterpreter::LDU, rd, ra, ds);
+}
+
+void PPULLVMRecompiler::LWA(u32 rd, u32 ra, s32 ds) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ auto mem_i32 = ReadMemory(addr_i64, 32);
+ auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty());
+ SetGpr(rd, mem_i64);
+ //InterpreterCall("LWA", &PPUInterpreter::LWA, rd, ra, ds);
+}
+
+void PPULLVMRecompiler::FDIVS(u32 frd, u32 fra, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64);
+ auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy());
+ res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FDIVS", &PPUInterpreter::FDIVS, frd, fra, frb, rc);
+}
+
+void PPULLVMRecompiler::FSUBS(u32 frd, u32 fra, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64);
+ auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy());
+ res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FSUBS", &PPUInterpreter::FSUBS, frd, fra, frb, rc);
+}
+
+void PPULLVMRecompiler::FADDS(u32 frd, u32 fra, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64);
+ auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy());
+ res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FADDS", &PPUInterpreter::FADDS, frd, fra, frb, rc);
+}
+
+void PPULLVMRecompiler::FSQRTS(u32 frd, u32 frb, bool rc) {
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64);
+ auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy());
+ res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FSQRTS", &PPUInterpreter::FSQRTS, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FRES(u32 frd, u32 frb, bool rc) {
+ InterpreterCall("FRES", &PPUInterpreter::FRES, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FMULS(u32 frd, u32 fra, u32 frc, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rc_f64 = GetFpr(frc);
+ auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64);
+ auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy());
+ res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FMULS", &PPUInterpreter::FMULS, frd, fra, frc, rc);
+}
+
+void PPULLVMRecompiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto rc_f64 = GetFpr(frc);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64);
+ auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy());
+ res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FMADDS", &PPUInterpreter::FMADDS, frd, fra, frc, frb, rc);
+}
+
+void PPULLVMRecompiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto rc_f64 = GetFpr(frc);
+ rb_f64 = m_ir_builder->CreateFNeg(rb_f64);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64);
+ auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy());
+ res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FMSUBS", &PPUInterpreter::FMSUBS, frd, fra, frc, frb, rc);
+}
+
+void PPULLVMRecompiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto rc_f64 = GetFpr(frc);
+ rb_f64 = m_ir_builder->CreateFNeg(rb_f64);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64);
+ res_f64 = m_ir_builder->CreateFNeg(res_f64);
+ auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy());
+ res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FNMSUBS", &PPUInterpreter::FNMSUBS, frd, fra, frc, frb, rc);
+}
+
+void PPULLVMRecompiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto rc_f64 = GetFpr(frc);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64);
+ res_f64 = m_ir_builder->CreateFNeg(res_f64);
+ auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy());
+ res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FNMADDS", &PPUInterpreter::FNMADDS, frd, fra, frc, frb, rc);
+}
+
+void PPULLVMRecompiler::STD(u32 rs, u32 ra, s32 d) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d);
+ if (ra) {
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+ }
+
+ WriteMemory(addr_i64, GetGpr(rs, 64));
+ //InterpreterCall("STD", &PPUInterpreter::STD, rs, ra, d);
+}
+
+void PPULLVMRecompiler::STDU(u32 rs, u32 ra, s32 ds) {
+ auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds);
+ auto ra_i64 = GetGpr(ra);
+ addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64);
+
+ WriteMemory(addr_i64, GetGpr(rs, 64));
+ SetGpr(ra, addr_i64);
+ //InterpreterCall("STDU", &PPUInterpreter::STDU, rs, ra, ds);
+}
+
+void PPULLVMRecompiler::MTFSB1(u32 crbd, bool rc) {
+ InterpreterCall("MTFSB1", &PPUInterpreter::MTFSB1, crbd, rc);
+}
+
+void PPULLVMRecompiler::MCRFS(u32 crbd, u32 crbs) {
+ InterpreterCall("MCRFS", &PPUInterpreter::MCRFS, crbd, crbs);
+}
+
+void PPULLVMRecompiler::MTFSB0(u32 crbd, bool rc) {
+ InterpreterCall("MTFSB0", &PPUInterpreter::MTFSB0, crbd, rc);
+}
+
+void PPULLVMRecompiler::MTFSFI(u32 crfd, u32 i, bool rc) {
+ InterpreterCall("MTFSFI", &PPUInterpreter::MTFSFI, crfd, i, rc);
+}
+
+void PPULLVMRecompiler::MFFS(u32 frd, bool rc) {
+ InterpreterCall("MFFS", &PPUInterpreter::MFFS, frd, rc);
+}
+
+void PPULLVMRecompiler::MTFSF(u32 flm, u32 frb, bool rc) {
+ InterpreterCall("MTFSF", &PPUInterpreter::MTFSF, flm, frb, rc);
+}
+
+void PPULLVMRecompiler::FCMPU(u32 crfd, u32 fra, u32 frb) {
+ InterpreterCall("FCMPU", &PPUInterpreter::FCMPU, crfd, fra, frb);
+}
+
+void PPULLVMRecompiler::FRSP(u32 frd, u32 frb, bool rc) {
+ InterpreterCall("FRSP", &PPUInterpreter::FRSP, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FCTIW(u32 frd, u32 frb, bool rc) {
+ InterpreterCall("FCTIW", &PPUInterpreter::FCTIW, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FCTIWZ(u32 frd, u32 frb, bool rc) {
+ InterpreterCall("FCTIWZ", &PPUInterpreter::FCTIWZ, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FDIV(u32 frd, u32 fra, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FDIV", &PPUInterpreter::FDIV, frd, fra, frb, rc);
+}
+
+void PPULLVMRecompiler::FSUB(u32 frd, u32 fra, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FSUB", &PPUInterpreter::FSUB, frd, fra, frb, rc);
+}
+
+void PPULLVMRecompiler::FADD(u32 frd, u32 fra, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FADD", &PPUInterpreter::FADD, frd, fra, frb, rc);
+}
+
+void PPULLVMRecompiler::FSQRT(u32 frd, u32 frb, bool rc) {
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FSQRT", &PPUInterpreter::FSQRT, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {
+ InterpreterCall("FSEL", &PPUInterpreter::FSEL, frd, fra, frc, frb, rc);
+}
+
+void PPULLVMRecompiler::FMUL(u32 frd, u32 fra, u32 frc, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rc_f64 = GetFpr(frc);
+ auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FMUL", &PPUInterpreter::FMUL, frd, fra, frc, rc);
+}
+
+void PPULLVMRecompiler::FRSQRTE(u32 frd, u32 frb, bool rc) {
+ InterpreterCall("FRSQRTE", &PPUInterpreter::FRSQRTE, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto rc_f64 = GetFpr(frc);
+ rb_f64 = m_ir_builder->CreateFNeg(rb_f64);
+ auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FMSUB", &PPUInterpreter::FMSUB, frd, fra, frc, frb, rc);
+}
+
+void PPULLVMRecompiler::FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto rc_f64 = GetFpr(frc);
+ auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FMADD", &PPUInterpreter::FMADD, frd, fra, frc, frb, rc);
+}
+
+void PPULLVMRecompiler::FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto rc_f64 = GetFpr(frc);
+ rc_f64 = m_ir_builder->CreateFNeg(rc_f64);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FNMSUB", &PPUInterpreter::FNMSUB, frd, fra, frc, frb, rc);
+}
+
+void PPULLVMRecompiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {
+ auto ra_f64 = GetFpr(fra);
+ auto rb_f64 = GetFpr(frb);
+ auto rc_f64 = GetFpr(frc);
+ rb_f64 = m_ir_builder->CreateFNeg(rb_f64);
+ rc_f64 = m_ir_builder->CreateFNeg(rc_f64);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FNMADD", &PPUInterpreter::FNMADD, frd, fra, frc, frb, rc);
+}
+
+void PPULLVMRecompiler::FCMPO(u32 crfd, u32 fra, u32 frb) {
+ InterpreterCall("FCMPO", &PPUInterpreter::FCMPO, crfd, fra, frb);
+}
+
+void PPULLVMRecompiler::FNEG(u32 frd, u32 frb, bool rc) {
+ auto rb_f64 = GetFpr(frb);
+ rb_f64 = m_ir_builder->CreateFNeg(rb_f64);
+ SetFpr(frd, rb_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FNEG", &PPUInterpreter::FNEG, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FMR(u32 frd, u32 frb, bool rc) {
+ SetFpr(frd, GetFpr(frb));
+ // TODO: Set flags
+ //InterpreterCall("FMR", &PPUInterpreter::FMR, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FNABS(u32 frd, u32 frb, bool rc) {
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64);
+ res_f64 = m_ir_builder->CreateFNeg(res_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FNABS", &PPUInterpreter::FNABS, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FABS(u32 frd, u32 frb, bool rc) {
+ auto rb_f64 = GetFpr(frb);
+ auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64);
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FABS", &PPUInterpreter::FABS, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FCTID(u32 frd, u32 frb, bool rc) {
+ InterpreterCall("FCTID", &PPUInterpreter::FCTID, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FCTIDZ(u32 frd, u32 frb, bool rc) {
+ InterpreterCall("FCTIDZ", &PPUInterpreter::FCTIDZ, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::FCFID(u32 frd, u32 frb, bool rc) {
+ auto rb_i64 = GetFpr(frb, 64, true);
+ auto res_f64 = m_ir_builder->CreateSIToFP(rb_i64, m_ir_builder->getDoubleTy());
+ SetFpr(frd, res_f64);
+
+ // TODO: Set flags
+ //InterpreterCall("FCFID", &PPUInterpreter::FCFID, frd, frb, rc);
+}
+
+void PPULLVMRecompiler::UNK(const u32 code, const u32 opcode, const u32 gcode) {
+ //InterpreterCall("UNK", &PPUInterpreter::UNK, code, opcode, gcode);
+}
+
+BasicBlock * PPULLVMRecompiler::GetBlockInFunction(u32 address, Function * function, bool create_if_not_exist) {
+ auto block_name = fmt::Format("instr_0x%X", address);
+ BasicBlock * block = nullptr;
+ for (auto i = function->getBasicBlockList().begin(); i != function->getBasicBlockList().end(); i++) {
+ if (i->getName() == block_name) {
+ block = &(*i);
+ break;
+ }
+ }
+
+ if (!block && create_if_not_exist) {
+ block = BasicBlock::Create(m_ir_builder->getContext(), block_name, function);
+ }
+
+ return block;
+}
+
+void PPULLVMRecompiler::Compile(u32 address) {
+ auto compilation_start = std::chrono::high_resolution_clock::now();
+
+ // Get the revision number for this section
+ u32 revision = 0;
+ auto compiled = m_compiled.lower_bound(std::make_pair(address, 0));
+ if (compiled != m_compiled.end() && compiled->first.first == address) {
+ revision = ~(compiled->first.second);
+ revision++;
+ }
+
+ auto ir_build_start = std::chrono::high_resolution_clock::now();
+
+ // Create a function for this section
+ auto function_name = fmt::Format("fn_0x%X_%u", address, revision);
+ m_current_function = (Function *)m_module->getOrInsertFunction(function_name, m_ir_builder->getVoidTy(),
+ m_ir_builder->getInt8PtrTy() /*ppu_state*/,
+ m_ir_builder->getInt8PtrTy() /*interpreter*/, nullptr);
+ m_current_function->setCallingConv(CallingConv::X86_64_Win64);
+ auto arg_i = m_current_function->arg_begin();
+ arg_i->setName("ppu_state");
+ (++arg_i)->setName("interpreter");
+
+ // Add an entry block that branches to the first instruction
+ m_ir_builder->SetInsertPoint(BasicBlock::Create(m_ir_builder->getContext(), "entry", m_current_function));
+ m_ir_builder->CreateBr(GetBlockInFunction(address, m_current_function, true));
+
+ // Convert each block in this section to LLVM IR
+ m_num_instructions = 0;
+ m_current_function_uncompiled_blocks_list.clear();
+ m_current_function_unhit_blocks_list.clear();
+ m_current_function_uncompiled_blocks_list.push_back(address);
+ while (!m_current_function_uncompiled_blocks_list.empty()) {
+ m_current_instruction_address = m_current_function_uncompiled_blocks_list.front();
+ auto block = GetBlockInFunction(m_current_instruction_address, m_current_function, true);
+ m_hit_branch_instruction = false;
+ m_ir_builder->SetInsertPoint(block);
+ m_current_function_uncompiled_blocks_list.pop_front();
+
+ while (!m_hit_branch_instruction) {
+ if (!block->getInstList().empty()) {
+ break;
+ }
+
+ u32 instr = vm::read32(m_current_instruction_address);
+ Decode(instr);
+ m_num_instructions++;
+
+ m_current_instruction_address += 4;
+ if (!m_hit_branch_instruction) {
+ block = GetBlockInFunction(m_current_instruction_address, m_current_function, true);
+ m_ir_builder->CreateBr(block);
+ m_ir_builder->SetInsertPoint(block);
+ }
+ }
+ }
+
+ auto ir_build_end = std::chrono::high_resolution_clock::now();
+ m_ir_build_time += std::chrono::duration_cast(ir_build_end - ir_build_start);
+
+ // Optimize this function
+ auto optimize_start = std::chrono::high_resolution_clock::now();
+ m_fpm->run(*m_current_function);
+ auto optimize_end = std::chrono::high_resolution_clock::now();
+ m_optimizing_time += std::chrono::duration_cast(optimize_end - optimize_start);
+
+ // Translate to machine code
+ auto translate_start = std::chrono::high_resolution_clock::now();
+ MachineCodeInfo mci;
+ m_execution_engine->runJITOnFunction(m_current_function, &mci);
+ auto translate_end = std::chrono::high_resolution_clock::now();
+ m_translation_time += std::chrono::duration_cast(translate_end - translate_start);
+
+ // Add the executable to private and shared data stores
+ ExecutableInfo executable_info;
+ executable_info.executable = (Executable)mci.address();
+ executable_info.size = mci.size();
+ executable_info.num_instructions = m_num_instructions;
+ executable_info.unhit_blocks_list = std::move(m_current_function_unhit_blocks_list);
+ executable_info.llvm_function = m_current_function;
+ m_compiled[std::make_pair(address, ~revision)] = executable_info;
+
+ {
+ std::lock_guard lock(m_compiled_shared_lock);
+ m_compiled_shared[std::make_pair(address, ~revision)] = std::make_pair(executable_info.executable, 0);
+ }
+
+ if (revision) {
+ m_revision.fetch_add(1, std::memory_order_relaxed);
+ }
+
+ auto compilation_end = std::chrono::high_resolution_clock::now();
+ m_compilation_time += std::chrono::duration_cast(compilation_end - compilation_start);
+}
+
+void PPULLVMRecompiler::RemoveUnusedOldVersions() {
+ u32 num_removed = 0;
+ u32 prev_address = 0;
+ for (auto i = m_compiled.begin(); i != m_compiled.end(); i++) {
+ u32 current_address = i->first.first;
+ if (prev_address == current_address) {
+ bool erase_this_entry = false;
+
+ {
+ std::lock_guard lock(m_compiled_shared_lock);
+ auto j = m_compiled_shared.find(i->first);
+ if (j->second.second == 0) {
+ m_compiled_shared.erase(j);
+ erase_this_entry = true;
+ }
+ }
+
+ if (erase_this_entry) {
+ auto tmp = i;
+ i--;
+ m_execution_engine->freeMachineCodeForFunction(tmp->second.llvm_function);
+ tmp->second.llvm_function->eraseFromParent();
+ m_compiled.erase(tmp);
+ num_removed++;
+ }
+ }
+
+ prev_address = current_address;
+ }
+
+ if (num_removed > 0) {
+ LOG_NOTICE(PPU, "Removed %u old versions", num_removed);
+ }
+}
+
+bool PPULLVMRecompiler::NeedsCompiling(u32 address) {
+ auto i = m_compiled.lower_bound(std::make_pair(address, 0));
+ if (i != m_compiled.end() && i->first.first == address) {
+ if (i->second.num_instructions >= 300) {
+ // This section has reached its limit. Don't allow further expansion.
+ return false;
+ }
+
+ // If any of the unhit blocks in this function have been hit, then recompile this section
+ for (auto j = i->second.unhit_blocks_list.begin(); j != i->second.unhit_blocks_list.end(); j++) {
+ if (m_hit_blocks.find(*j) != m_hit_blocks.end()) {
+ return true;
+ }
+ }
+
+ return false;
+ } else {
+ // This section has not been encountered before
+ return true;
+ }
+}
+
+Value * PPULLVMRecompiler::GetPPUState() {
+ return m_current_function->arg_begin();
+}
+
+Value * PPULLVMRecompiler::GetInterpreter() {
+ auto i = m_current_function->arg_begin();
+ i++;
+ return i;
+}
+
+Value * PPULLVMRecompiler::GetBit(Value * val, u32 n) {
+ Value * bit;
+
+#ifdef PPU_LLVM_RECOMPILER_USE_BMI
+ if (val->getType()->isIntegerTy(32)) {
+ bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32(1 << (31- n)));
+ } else if (val->getType()->isIntegerTy(64)) {
+ bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)1 << (63 - n)));
+ } else {
+#endif
+ if (val->getType()->getIntegerBitWidth() != (n + 1)) {
+ bit = m_ir_builder->CreateLShr(val, val->getType()->getIntegerBitWidth() - n - 1);
+ }
+
+ bit = m_ir_builder->CreateAnd(bit, 1);
+#ifdef PPU_LLVM_RECOMPILER_USE_BMI
+ }
+#endif
+
+ return bit;
+}
+
+Value * PPULLVMRecompiler::ClrBit(Value * val, u32 n) {
+ return m_ir_builder->CreateAnd(val, ~((u64)1 << (val->getType()->getIntegerBitWidth() - n - 1)));
+}
+
+Value * PPULLVMRecompiler::SetBit(Value * val, u32 n, Value * bit, bool doClear) {
+ if (doClear) {
+ val = ClrBit(val, n);
+ }
+
+ if (bit->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) {
+ bit = m_ir_builder->CreateZExt(bit, val->getType());
+ } else if (bit->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) {
+ bit = m_ir_builder->CreateTrunc(bit, val->getType());
+ }
+
+ if (val->getType()->getIntegerBitWidth() != (n + 1)) {
+ bit = m_ir_builder->CreateShl(bit, bit->getType()->getIntegerBitWidth() - n - 1);
+ }
+
+ return m_ir_builder->CreateOr(val, bit);
+}
+
+Value * PPULLVMRecompiler::GetNibble(Value * val, u32 n) {
+ Value * nibble;
+
+#ifdef PPU_LLVM_RECOMPILER_USE_BMI
+ if (val->getType()->isIntegerTy(32)) {
+ nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32((u64)0xF << ((7 - n) * 4)));
+ } else if (val->getType()->isIntegerTy(64)) {
+ nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)0xF << ((15 - n) * 4)));
+ } else {
+#endif
+ if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) {
+ val = m_ir_builder->CreateLShr(val, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4);
+ }
+
+ nibble = m_ir_builder->CreateAnd(val, 0xF);
+#ifdef PPU_LLVM_RECOMPILER_USE_BMI
+ }
+#endif
+
+ return nibble;
+}
+
+Value * PPULLVMRecompiler::ClrNibble(Value * val, u32 n) {
+ return m_ir_builder->CreateAnd(val, ~((u64)0xF << ((((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4)));
+}
+
+Value * PPULLVMRecompiler::SetNibble(Value * val, u32 n, Value * nibble, bool doClear) {
+ if (doClear) {
+ val = ClrNibble(val, n);
+ }
+
+ if (nibble->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) {
+ nibble = m_ir_builder->CreateZExt(nibble, val->getType());
+ } else if (nibble->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) {
+ nibble = m_ir_builder->CreateTrunc(nibble, val->getType());
+ }
+
+ if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) {
+ nibble = m_ir_builder->CreateShl(nibble, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4);
+ }
+
+ return m_ir_builder->CreateOr(val, nibble);
+}
+
+Value * PPULLVMRecompiler::SetNibble(Value * val, u32 n, Value * b0, Value * b1, Value * b2, Value * b3, bool doClear) {
+ if (doClear) {
+ val = ClrNibble(val, n);
+ }
+
+ if (b0) {
+ val = SetBit(val, n * 4, b0, false);
+ }
+
+ if (b1) {
+ val = SetBit(val, (n * 4) + 1, b1, false);
+ }
+
+ if (b2) {
+ val = SetBit(val, (n * 4) + 2, b2, false);
+ }
+
+ if (b3) {
+ val = SetBit(val, (n * 4) + 3, b3, false);
+ }
+
+ return val;
+}
+
+Value * PPULLVMRecompiler::GetPc() {
+ auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, PC));
+ auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(pc_i32_ptr, 4);
+}
+
+void PPULLVMRecompiler::SetPc(Value * val_ix) {
+ auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, PC));
+ auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo());
+ auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_ix, m_ir_builder->getInt32Ty());
+ m_ir_builder->CreateAlignedStore(val_i32, pc_i32_ptr, 4);
+}
+
+Value * PPULLVMRecompiler::GetGpr(u32 r, u32 num_bits) {
+ auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, GPR[r]));
+ auto r_ix_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getIntNTy(num_bits)->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(r_ix_ptr, 8);
+}
+
+void PPULLVMRecompiler::SetGpr(u32 r, Value * val_x64) {
+ auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, GPR[r]));
+ auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty());
+ m_ir_builder->CreateAlignedStore(val_i64, r_i64_ptr, 8);
+}
+
+Value * PPULLVMRecompiler::GetCr() {
+ auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, CR));
+ auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(cr_i32_ptr, 4);
+}
+
+Value * PPULLVMRecompiler::GetCrField(u32 n) {
+ return GetNibble(GetCr(), n);
+}
+
+void PPULLVMRecompiler::SetCr(Value * val_x32) {
+ auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty());
+ auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, CR));
+ auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(val_i32, cr_i32_ptr, 4);
+}
+
+void PPULLVMRecompiler::SetCrField(u32 n, Value * field) {
+ SetCr(SetNibble(GetCr(), n, field));
+}
+
+void PPULLVMRecompiler::SetCrField(u32 n, Value * b0, Value * b1, Value * b2, Value * b3) {
+ SetCr(SetNibble(GetCr(), n, b0, b1, b2, b3));
+}
+
+void PPULLVMRecompiler::SetCrFieldSignedCmp(u32 n, Value * a, Value * b) {
+ auto lt_i1 = m_ir_builder->CreateICmpSLT(a, b);
+ auto gt_i1 = m_ir_builder->CreateICmpSGT(a, b);
+ auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b);
+ auto cr_i32 = GetCr();
+ cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo());
+ SetCr(cr_i32);
+}
+
+void PPULLVMRecompiler::SetCrFieldUnsignedCmp(u32 n, Value * a, Value * b) {
+ auto lt_i1 = m_ir_builder->CreateICmpULT(a, b);
+ auto gt_i1 = m_ir_builder->CreateICmpUGT(a, b);
+ auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b);
+ auto cr_i32 = GetCr();
+ cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo());
+ SetCr(cr_i32);
+}
+
+void PPULLVMRecompiler::SetCr6AfterVectorCompare(u32 vr) {
+ auto vr_v16i8 = GetVrAsIntVec(vr, 8);
+ auto vr_mask_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmovmskb_128), vr_v16i8);
+ auto cmp0_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0));
+ auto cmp1_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0xFFFF));
+ auto cr_i32 = GetCr();
+ cr_i32 = SetNibble(cr_i32, 6, cmp1_i1, nullptr, cmp0_i1, nullptr);
+ SetCr(cr_i32);
+}
+
+Value * PPULLVMRecompiler::GetLr() {
+ auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, LR));
+ auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(lr_i64_ptr, 8);
+}
+
+void PPULLVMRecompiler::SetLr(Value * val_x64) {
+ auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty());
+ auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, LR));
+ auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(val_i64, lr_i64_ptr, 8);
+}
+
+Value * PPULLVMRecompiler::GetCtr() {
+ auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, CTR));
+ auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(ctr_i64_ptr, 8);
+}
+
+void PPULLVMRecompiler::SetCtr(Value * val_x64) {
+ auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty());
+ auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, CTR));
+ auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(val_i64, ctr_i64_ptr, 8);
+}
+
+Value * PPULLVMRecompiler::GetXer() {
+ auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, XER));
+ auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(xer_i64_ptr, 8);
+}
+
+Value * PPULLVMRecompiler::GetXerCa() {
+ return GetBit(GetXer(), 34);
+}
+
+Value * PPULLVMRecompiler::GetXerSo() {
+ return GetBit(GetXer(), 32);
+}
+
+void PPULLVMRecompiler::SetXer(Value * val_x64) {
+ auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty());
+ auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, XER));
+ auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(val_i64, xer_i64_ptr, 8);
+}
+
+void PPULLVMRecompiler::SetXerCa(Value * ca) {
+ auto xer_i64 = GetXer();
+ xer_i64 = SetBit(xer_i64, 34, ca);
+ SetXer(xer_i64);
+}
+
+void PPULLVMRecompiler::SetXerSo(Value * so) {
+ auto xer_i64 = GetXer();
+ xer_i64 = SetBit(xer_i64, 32, so);
+ SetXer(xer_i64);
+}
+
+Value * PPULLVMRecompiler::GetUsprg0() {
+ auto usrpg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, USPRG0));
+ auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usrpg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(usprg0_i64_ptr, 8);
+}
+
+void PPULLVMRecompiler::SetUsprg0(Value * val_x64) {
+ auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty());
+ auto usprg0_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, USPRG0));
+ auto usprg0_i64_ptr = m_ir_builder->CreateBitCast(usprg0_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(val_i64, usprg0_i64_ptr, 8);
+}
+
+Value * PPULLVMRecompiler::GetFpr(u32 r, u32 bits, bool as_int) {
+ auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, FPR[r]));
+ if (!as_int) {
+ auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo());
+ auto r_f64 = m_ir_builder->CreateAlignedLoad(r_f64_ptr, 8);
+ if (bits == 32) {
+ return m_ir_builder->CreateFPTrunc(r_f64, m_ir_builder->getFloatTy());
+ } else {
+ return r_f64;
+ }
+ } else {
+ auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo());
+ auto r_i64 = m_ir_builder->CreateAlignedLoad(r_i64_ptr, 8);
+ if (bits == 32) {
+ return m_ir_builder->CreateTrunc(r_i64, m_ir_builder->getInt32Ty());
+ } else {
+ return r_i64;
+ }
+ }
+}
+
+void PPULLVMRecompiler::SetFpr(u32 r, Value * val) {
+ auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, FPR[r]));
+ auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo());
+
+ Value* val_f64;
+ if (val->getType()->isDoubleTy() || val->getType()->isIntegerTy(64)) {
+ val_f64 = m_ir_builder->CreateBitCast(val, m_ir_builder->getDoubleTy());
+ } else if (val->getType()->isFloatTy() || val->getType()->isIntegerTy(32)) {
+ auto val_f32 = m_ir_builder->CreateBitCast(val, m_ir_builder->getFloatTy());
+ val_f64 = m_ir_builder->CreateFPExt(val_f32, m_ir_builder->getDoubleTy());
+ } else {
+ assert(0);
+ }
+
+ m_ir_builder->CreateAlignedStore(val_f64, r_f64_ptr, 8);
+}
+
+Value * PPULLVMRecompiler::GetVscr() {
+ auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VSCR));
+ auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(vscr_i32_ptr, 4);
+}
+
+void PPULLVMRecompiler::SetVscr(Value * val_x32) {
+ auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty());
+ auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VSCR));
+ auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(val_i32, vscr_i32_ptr, 4);
+}
+
+Value * PPULLVMRecompiler::GetVr(u32 vr) {
+ auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr]));
+ auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(vr_i128_ptr, 16);
+}
+
+Value * PPULLVMRecompiler::GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits) {
+ auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr]));
+ auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo());
+ auto vr_vec_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getIntNTy(vec_elt_num_bits), 128 / vec_elt_num_bits)->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(vr_vec_ptr, 16);
+}
+
+Value * PPULLVMRecompiler::GetVrAsFloatVec(u32 vr) {
+ auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr]));
+ auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo());
+ auto vr_v4f32_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getFloatTy(), 4)->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(vr_v4f32_ptr, 16);
+}
+
+Value * PPULLVMRecompiler::GetVrAsDoubleVec(u32 vr) {
+ auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr]));
+ auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo());
+ auto vr_v2f64_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getDoubleTy(), 2)->getPointerTo());
+ return m_ir_builder->CreateAlignedLoad(vr_v2f64_ptr, 16);
+}
+
+void PPULLVMRecompiler::SetVr(u32 vr, Value * val_x128) {
+ auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(GetPPUState(), (unsigned int)offsetof(PPUThread, VPR[vr]));
+ auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo());
+ auto val_i128 = m_ir_builder->CreateBitCast(val_x128, m_ir_builder->getIntNTy(128));
+ m_ir_builder->CreateAlignedStore(val_i128, vr_i128_ptr, 16);
+}
+
+Value * PPULLVMRecompiler::CheckBranchCondition(u32 bo, u32 bi) {
+ bool bo0 = bo & 0x10 ? true : false;
+ bool bo1 = bo & 0x08 ? true : false;
+ bool bo2 = bo & 0x04 ? true : false;
+ bool bo3 = bo & 0x02 ? true : false;
+
+ auto ctr_i64 = GetCtr();
+ if (!bo2) {
+ ctr_i64 = m_ir_builder->CreateSub(ctr_i64, m_ir_builder->getInt64(1));
+ SetCtr(ctr_i64);
+ }
+
+ Value * ctr_ok_i1 = nullptr;
+ if (!bo2) {
+ // TODO: Check if we should compare all bits or just the lower 32 bits. This depends on MSR[SF]. Not sure what it is for PS3.
+ ctr_ok_i1 = m_ir_builder->CreateICmpNE(ctr_i64, m_ir_builder->getInt64(0));
+ if (bo3) {
+ ctr_ok_i1 = m_ir_builder->CreateXor(ctr_ok_i1, m_ir_builder->getInt1(bo3));
+ }
+ }
+
+ Value * cond_ok_i1 = nullptr;
+ if (!bo0) {
+ auto cr_bi_i32 = GetBit(GetCr(), bi);
+ cond_ok_i1 = m_ir_builder->CreateTrunc(cr_bi_i32, m_ir_builder->getInt1Ty());
+ if (!bo1) {
+ cond_ok_i1 = m_ir_builder->CreateXor(cond_ok_i1, m_ir_builder->getInt1(!bo1));
+ }
+ }
+
+ Value * cmp_i1 = nullptr;
+ if (ctr_ok_i1 && cond_ok_i1) {
+ cmp_i1 = m_ir_builder->CreateAnd(ctr_ok_i1, cond_ok_i1);
+ } else if (ctr_ok_i1) {
+ cmp_i1 = ctr_ok_i1;
+ } else if (cond_ok_i1) {
+ cmp_i1 = cond_ok_i1;
+ }
+
+ return cmp_i1;
+}
+
+void PPULLVMRecompiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk) {
+ if (lk) {
+ SetLr(m_ir_builder->getInt64(m_current_instruction_address + 4));
+ }
+
+ auto current_block = m_ir_builder->GetInsertBlock();
+ BasicBlock * target_block = nullptr;
+ if (dyn_cast(target_i64)) {
+ // Target address is an immediate value.
+ u32 target_address = (u32)(dyn_cast(target_i64)->getLimitedValue());
+ target_block = GetBlockInFunction(target_address, m_current_function);
+ if (!target_block) {
+ target_block = GetBlockInFunction(target_address, m_current_function, true);
+ if ((m_hit_blocks.find(target_address) != m_hit_blocks.end() || !cmp_i1) && m_num_instructions < 300) {
+ // Target block has either been hit or this is an unconditional branch.
+ m_current_function_uncompiled_blocks_list.push_back(target_address);
+ m_hit_blocks.insert(target_address);
+ } else {
+ // Target block has not been encountered yet and this is not an unconditional branch
+ m_ir_builder->SetInsertPoint(target_block);
+ SetPc(target_i64);
+ m_ir_builder->CreateRetVoid();
+ m_current_function_unhit_blocks_list.push_back(target_address);
+ }
+ }
+ } else {
+ // Target addres is in a register
+ target_block = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function);
+ m_ir_builder->SetInsertPoint(target_block);
+ SetPc(target_i64);
+ m_ir_builder->CreateRetVoid();
+ }
+
+ if (cmp_i1) {
+ // Conditional branch
+ auto next_block = GetBlockInFunction(m_current_instruction_address + 4, m_current_function);
+ if (!next_block) {
+ next_block = GetBlockInFunction(m_current_instruction_address + 4, m_current_function, true);
+ if (m_hit_blocks.find(m_current_instruction_address + 4) != m_hit_blocks.end() && m_num_instructions < 300) {
+ // Next block has already been hit.
+ m_current_function_uncompiled_blocks_list.push_back(m_current_instruction_address + 4);
+ } else {
+ // Next block has not been encountered yet
+ m_ir_builder->SetInsertPoint(next_block);
+ SetPc(m_ir_builder->getInt32(m_current_instruction_address + 4));
+ m_ir_builder->CreateRetVoid();
+ m_current_function_unhit_blocks_list.push_back(m_current_instruction_address + 4);
+ }
+ }
+
+ m_ir_builder->SetInsertPoint(current_block);
+ m_ir_builder->CreateCondBr(cmp_i1, target_block, next_block);
+ } else {
+ // Unconditional branch
+ m_ir_builder->SetInsertPoint(current_block);
+ m_ir_builder->CreateBr(target_block);
+ }
+
+ m_hit_branch_instruction = true;
+}
+
+Value * PPULLVMRecompiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bswap, bool could_be_mmio) {
+ if (bits != 32 || could_be_mmio == false) {
+ auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0)));
+ auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, m_ir_builder->getIntNTy(bits)->getPointerTo());
+ auto val_ix = (Value *)m_ir_builder->CreateLoad(eaddr_ix_ptr, alignment);
+ if (bits > 8 && bswap) {
+ val_ix = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getIntNTy(bits)), val_ix);
+ }
+
+ return val_ix;
+ } else {
+ BasicBlock * next_block = nullptr;
+ for (auto i = m_current_function->begin(); i != m_current_function->end(); i++) {
+ if (&(*i) == m_ir_builder->GetInsertBlock()) {
+ i++;
+ if (i != m_current_function->end()) {
+ next_block = &(*i);
+ }
+
+ break;
+ }
+ }
+
+ auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR));
+ auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
+ auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
+ auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
+ m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb);
+
+ m_ir_builder->SetInsertPoint(then_bb);
+ auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0)));
+ auto eaddr_i32_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, m_ir_builder->getInt32Ty()->getPointerTo());
+ auto val_then_i32 = (Value *)m_ir_builder->CreateAlignedLoad(eaddr_i32_ptr, alignment);
+ if (bswap) {
+ val_then_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_then_i32);
+ }
+
+ m_ir_builder->CreateBr(merge_bb);
+
+ m_ir_builder->SetInsertPoint(else_bb);
+ auto val_else_i32 = Call("vm_read32", (u32(*)(u64))vm::read32, addr_i64);
+ if (!bswap) {
+ val_else_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_else_i32);
+ }
+ m_ir_builder->CreateBr(merge_bb);
+
+ m_ir_builder->SetInsertPoint(merge_bb);
+ auto phi = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 2);
+ phi->addIncoming(val_then_i32, then_bb);
+ phi->addIncoming(val_else_i32, else_bb);
+ return phi;
+ }
+}
+
+void PPULLVMRecompiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool bswap, bool could_be_mmio) {
+ addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF);
+ if (val_ix->getType()->getIntegerBitWidth() != 32 || could_be_mmio == false) {
+ if (val_ix->getType()->getIntegerBitWidth() > 8 && bswap) {
+ val_ix = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, val_ix->getType()), val_ix);
+ }
+
+ auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0)));
+ auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, val_ix->getType()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(val_ix, eaddr_ix_ptr, alignment);
+ } else {
+ BasicBlock * next_block = nullptr;
+ for (auto i = m_current_function->begin(); i != m_current_function->end(); i++) {
+ if (&(*i) == m_ir_builder->GetInsertBlock()) {
+ i++;
+ if (i != m_current_function->end()) {
+ next_block = &(*i);
+ }
+
+ break;
+ }
+ }
+
+ auto cmp_i1 = m_ir_builder->CreateICmpULT(addr_i64, m_ir_builder->getInt64(RAW_SPU_BASE_ADDR));
+ auto then_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
+ auto else_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
+ auto merge_bb = BasicBlock::Create(m_ir_builder->getContext(), "", m_current_function, next_block);
+ m_ir_builder->CreateCondBr(cmp_i1, then_bb, else_bb);
+
+ m_ir_builder->SetInsertPoint(then_bb);
+ Value * val_then_i32 = val_ix;
+ if (bswap) {
+ val_then_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_then_i32);
+ }
+
+ auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0)));
+ auto eaddr_i32_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, m_ir_builder->getInt32Ty()->getPointerTo());
+ m_ir_builder->CreateAlignedStore(val_then_i32, eaddr_i32_ptr, alignment);
+ m_ir_builder->CreateBr(merge_bb);
+
+ m_ir_builder->SetInsertPoint(else_bb);
+ Value * val_else_i32 = val_ix;
+ if (!bswap) {
+ val_else_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_else_i32);
+ }
+
+ Call("vm_write32", (void(*)(u64, u32))vm::write32, addr_i64, val_else_i32);
+ m_ir_builder->CreateBr(merge_bb);
+
+ m_ir_builder->SetInsertPoint(merge_bb);
+ }
+}
+
+template
+Value * PPULLVMRecompiler::InterpreterCall(const char * name, Func function, Args... args) {
+ auto i = m_interpreter_fallback_stats.find(name);
+ if (i == m_interpreter_fallback_stats.end()) {
+ i = m_interpreter_fallback_stats.insert(m_interpreter_fallback_stats.end(), std::make_pair(name, 0));
+ }
+
+ i->second++;
+
+ return Call(name, function, GetInterpreter(), m_ir_builder->getInt32(args)...);
+}
+
+template
+Type * PPULLVMRecompiler::CppToLlvmType() {
+ if (std::is_void::value) {
+ return m_ir_builder->getVoidTy();
+ } else if (std::is_same::value || std::is_same::value) {
+ return m_ir_builder->getInt64Ty();
+ } else if (std::is_same::value || std::is_same::value) {
+ return m_ir_builder->getInt32Ty();
+ } else if (std::is_same::value || std::is_same::value) {
+ return m_ir_builder->getInt16Ty();
+ } else if (std::is_same::value || std::is_same::value) {
+ return m_ir_builder->getInt8Ty();
+ } else if (std::is_same::value) {
+ return m_ir_builder->getFloatTy();
+ } else if (std::is_same::value) {
+ return m_ir_builder->getDoubleTy();
+ } else if (std::is_pointer::value) {
+ return m_ir_builder->getInt8PtrTy();
+ } else {
+ assert(0);
+ }
+
+ return nullptr;
+}
+
+template
+Value * PPULLVMRecompiler::Call(const char * name, Func function, Args... args) {
+ auto fn = m_module->getFunction(name);
+ if (!fn) {
+ std::vector fn_args_type = {args->getType()...};
+ auto fn_type = FunctionType::get(CppToLlvmType(), fn_args_type, false);
+ fn = cast(m_module->getOrInsertFunction(name, fn_type));
+ fn->setCallingConv(CallingConv::X86_64_Win64);
+ m_execution_engine->addGlobalMapping(fn, (void *&)function);
+ }
+
+ std::vector fn_args = {args...};
+ return m_ir_builder->CreateCall(fn, fn_args);
+}
+
+void PPULLVMRecompiler::InitRotateMask() {
+ for (u32 mb = 0; mb < 64; mb++) {
+ for (u32 me = 0; me < 64; me++) {
+ u64 mask = ((u64)-1 >> mb) ^ ((me >= 63) ? 0 : (u64)-1 >> (me + 1));
+ s_rotate_mask[mb][me] = mb > me ? ~mask : mask;
+ }
+ }
+}
+
+u32 PPULLVMEmulator::s_num_instances = 0;
+std::mutex PPULLVMEmulator::s_recompiler_mutex;
+PPULLVMRecompiler * PPULLVMEmulator::s_recompiler = nullptr;
+
+PPULLVMEmulator::PPULLVMEmulator(PPUThread & ppu)
+ : m_ppu(ppu)
+ , m_interpreter(new PPUInterpreter(ppu))
+ , m_decoder(m_interpreter)
+ , m_last_instr_was_branch(true)
+ , m_last_cache_clear_time(std::chrono::high_resolution_clock::now())
+ , m_recompiler_revision(0) {
+ std::lock_guard lock(s_recompiler_mutex);
+
+ s_num_instances++;
+ if (!s_recompiler) {
+ s_recompiler = new PPULLVMRecompiler();
+ s_recompiler->RunAllTests(&m_ppu, m_interpreter);
+ }
+}
+
+PPULLVMEmulator::~PPULLVMEmulator() {
+ for (auto iter = m_address_to_executable.begin(); iter != m_address_to_executable.end(); iter++) {
+ s_recompiler->ReleaseExecutable(iter->first, iter->second.revision);
+ }
+
+ std::lock_guard lock(s_recompiler_mutex);
+
+ s_num_instances--;
+ if (s_recompiler && s_num_instances == 0) {
+ delete s_recompiler;
+ s_recompiler = nullptr;
+ }
+}
+
+u8 PPULLVMEmulator::DecodeMemory(const u32 address) {
+ auto now = std::chrono::high_resolution_clock::now();
+
+ if (std::chrono::duration_cast(now - m_last_cache_clear_time).count() > 1000) {
+ bool clear_all = false;
+
+ u32 revision = s_recompiler->GetCurrentRevision();
+ if (m_recompiler_revision != revision) {
+ m_recompiler_revision = revision;
+ clear_all = true;
+ }
+
+ for (auto iter = m_address_to_executable.begin(); iter != m_address_to_executable.end();) {
+ auto tmp = iter;
+ iter++;
+ if (tmp->second.num_hits == 0 || clear_all) {
+ m_address_to_executable.erase(tmp);
+ s_recompiler->ReleaseExecutable(tmp->first, tmp->second.revision);
+ } else {
+ tmp->second.num_hits = 0;
+ }
+ }
+
+ m_last_cache_clear_time = now;
+ }
+
+ auto address_to_executable_iter = m_address_to_executable.find(address);
+ if (address_to_executable_iter == m_address_to_executable.end()) {
+ auto executable_and_revision = s_recompiler->GetExecutable(address);
+ if (executable_and_revision.first) {
+ ExecutableInfo executable_info;
+ executable_info.executable = executable_and_revision.first;
+ executable_info.revision = executable_and_revision.second;
+ executable_info.num_hits = 0;
+
+ address_to_executable_iter = m_address_to_executable.insert(m_address_to_executable.end(), std::make_pair(address, executable_info));
+ m_uncompiled.erase(address);
+ } else {
+ if (m_last_instr_was_branch) {
+ auto uncompiled_iter = m_uncompiled.find(address);
+ if (uncompiled_iter != m_uncompiled.end()) {
+ uncompiled_iter->second++;
+ if ((uncompiled_iter->second % 1000) == 0) {
+ s_recompiler->RequestCompilation(address);
+ }
+ } else {
+ m_uncompiled[address] = 0;
+ }
+ }
+ }
+ }
+
+ u8 ret = 0;
+ if (address_to_executable_iter != m_address_to_executable.end()) {
+ address_to_executable_iter->second.executable(&m_ppu, m_interpreter);
+ address_to_executable_iter->second.num_hits++;
+ m_last_instr_was_branch = true;
+ } else {
+ ret = m_decoder.DecodeMemory(address);
+ m_last_instr_was_branch = m_ppu.m_is_branch;
+ }
+
+ return ret;
+}
diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h
new file mode 100644
index 000000000..218dad972
--- /dev/null
+++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h
@@ -0,0 +1,789 @@
+#ifndef PPU_LLVM_RECOMPILER_H
+#define PPU_LLVM_RECOMPILER_H
+
+#include "Emu/Cell/PPUDecoder.h"
+#include "Emu/Cell/PPUThread.h"
+#include "Emu/Cell/PPUInterpreter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/PassManager.h"
+
+struct PPUState;
+
+/// PPU recompiler that uses LLVM for code generation and optimization
+class PPULLVMRecompiler : public ThreadBase, protected PPUOpcodes, protected PPCDecoder {
+public:
+ typedef void(*Executable)(PPUThread * ppu_state, PPUInterpreter * interpreter);
+
+ PPULLVMRecompiler();
+
+ PPULLVMRecompiler(const PPULLVMRecompiler & other) = delete;
+ PPULLVMRecompiler(PPULLVMRecompiler && other) = delete;
+
+ virtual ~PPULLVMRecompiler();
+
+ PPULLVMRecompiler & operator = (const PPULLVMRecompiler & other) = delete;
+ PPULLVMRecompiler & operator = (PPULLVMRecompiler && other) = delete;
+
+ /// Get the executable for the code starting at address
+ std::pair GetExecutable(u32 address);
+
+ /// Release an executable earlier obtained through GetExecutable
+ void ReleaseExecutable(u32 address, u32 revision);
+
+ /// Request the code at the sepcified address to be compiled
+ void RequestCompilation(u32 address);
+
+ /// Get the current revision
+ u32 GetCurrentRevision();
+
+ /// Execute all tests
+ void RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter);
+
+ void Task() override;
+
+protected:
+ void Decode(const u32 code) override;
+
+ void NULL_OP() override;
+ void NOP() override;
+
+ void TDI(u32 to, u32 ra, s32 simm16) override;
+ void TWI(u32 to, u32 ra, s32 simm16) override;
+
+ void MFVSCR(u32 vd) override;
+ void MTVSCR(u32 vb) override;
+ void VADDCUW(u32 vd, u32 va, u32 vb) override;
+ void VADDFP(u32 vd, u32 va, u32 vb) override;
+ void VADDSBS(u32 vd, u32 va, u32 vb) override;
+ void VADDSHS(u32 vd, u32 va, u32 vb) override;
+ void VADDSWS(u32 vd, u32 va, u32 vb) override;
+ void VADDUBM(u32 vd, u32 va, u32 vb) override;
+ void VADDUBS(u32 vd, u32 va, u32 vb) override;
+ void VADDUHM(u32 vd, u32 va, u32 vb) override;
+ void VADDUHS(u32 vd, u32 va, u32 vb) override;
+ void VADDUWM(u32 vd, u32 va, u32 vb) override;
+ void VADDUWS(u32 vd, u32 va, u32 vb) override;
+ void VAND(u32 vd, u32 va, u32 vb) override;
+ void VANDC(u32 vd, u32 va, u32 vb) override;
+ void VAVGSB(u32 vd, u32 va, u32 vb) override;
+ void VAVGSH(u32 vd, u32 va, u32 vb) override;
+ void VAVGSW(u32 vd, u32 va, u32 vb) override;
+ void VAVGUB(u32 vd, u32 va, u32 vb) override;
+ void VAVGUH(u32 vd, u32 va, u32 vb) override;
+ void VAVGUW(u32 vd, u32 va, u32 vb) override;
+ void VCFSX(u32 vd, u32 uimm5, u32 vb) override;
+ void VCFUX(u32 vd, u32 uimm5, u32 vb) override;
+ void VCMPBFP(u32 vd, u32 va, u32 vb) override;
+ void VCMPBFP_(u32 vd, u32 va, u32 vb) override;
+ void VCMPEQFP(u32 vd, u32 va, u32 vb) override;
+ void VCMPEQFP_(u32 vd, u32 va, u32 vb) override;
+ void VCMPEQUB(u32 vd, u32 va, u32 vb) override;
+ void VCMPEQUB_(u32 vd, u32 va, u32 vb) override;
+ void VCMPEQUH(u32 vd, u32 va, u32 vb) override;
+ void VCMPEQUH_(u32 vd, u32 va, u32 vb) override;
+ void VCMPEQUW(u32 vd, u32 va, u32 vb) override;
+ void VCMPEQUW_(u32 vd, u32 va, u32 vb) override;
+ void VCMPGEFP(u32 vd, u32 va, u32 vb) override;
+ void VCMPGEFP_(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTFP(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTFP_(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTSB(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTSB_(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTSH(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTSH_(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTSW(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTSW_(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTUB(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTUB_(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTUH(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTUH_(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTUW(u32 vd, u32 va, u32 vb) override;
+ void VCMPGTUW_(u32 vd, u32 va, u32 vb) override;
+ void VCTSXS(u32 vd, u32 uimm5, u32 vb) override;
+ void VCTUXS(u32 vd, u32 uimm5, u32 vb) override;
+ void VEXPTEFP(u32 vd, u32 vb) override;
+ void VLOGEFP(u32 vd, u32 vb) override;
+ void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) override;
+ void VMAXFP(u32 vd, u32 va, u32 vb) override;
+ void VMAXSB(u32 vd, u32 va, u32 vb) override;
+ void VMAXSH(u32 vd, u32 va, u32 vb) override;
+ void VMAXSW(u32 vd, u32 va, u32 vb) override;
+ void VMAXUB(u32 vd, u32 va, u32 vb) override;
+ void VMAXUH(u32 vd, u32 va, u32 vb) override;
+ void VMAXUW(u32 vd, u32 va, u32 vb) override;
+ void VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VMINFP(u32 vd, u32 va, u32 vb) override;
+ void VMINSB(u32 vd, u32 va, u32 vb) override;
+ void VMINSH(u32 vd, u32 va, u32 vb) override;
+ void VMINSW(u32 vd, u32 va, u32 vb) override;
+ void VMINUB(u32 vd, u32 va, u32 vb) override;
+ void VMINUH(u32 vd, u32 va, u32 vb) override;
+ void VMINUW(u32 vd, u32 va, u32 vb) override;
+ void VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VMRGHB(u32 vd, u32 va, u32 vb) override;
+ void VMRGHH(u32 vd, u32 va, u32 vb) override;
+ void VMRGHW(u32 vd, u32 va, u32 vb) override;
+ void VMRGLB(u32 vd, u32 va, u32 vb) override;
+ void VMRGLH(u32 vd, u32 va, u32 vb) override;
+ void VMRGLW(u32 vd, u32 va, u32 vb) override;
+ void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VMULESB(u32 vd, u32 va, u32 vb) override;
+ void VMULESH(u32 vd, u32 va, u32 vb) override;
+ void VMULEUB(u32 vd, u32 va, u32 vb) override;
+ void VMULEUH(u32 vd, u32 va, u32 vb) override;
+ void VMULOSB(u32 vd, u32 va, u32 vb) override;
+ void VMULOSH(u32 vd, u32 va, u32 vb) override;
+ void VMULOUB(u32 vd, u32 va, u32 vb) override;
+ void VMULOUH(u32 vd, u32 va, u32 vb) override;
+ void VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) override;
+ void VNOR(u32 vd, u32 va, u32 vb) override;
+ void VOR(u32 vd, u32 va, u32 vb) override;
+ void VPERM(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VPKPX(u32 vd, u32 va, u32 vb) override;
+ void VPKSHSS(u32 vd, u32 va, u32 vb) override;
+ void VPKSHUS(u32 vd, u32 va, u32 vb) override;
+ void VPKSWSS(u32 vd, u32 va, u32 vb) override;
+ void VPKSWUS(u32 vd, u32 va, u32 vb) override;
+ void VPKUHUM(u32 vd, u32 va, u32 vb) override;
+ void VPKUHUS(u32 vd, u32 va, u32 vb) override;
+ void VPKUWUM(u32 vd, u32 va, u32 vb) override;
+ void VPKUWUS(u32 vd, u32 va, u32 vb) override;
+ void VREFP(u32 vd, u32 vb) override;
+ void VRFIM(u32 vd, u32 vb) override;
+ void VRFIN(u32 vd, u32 vb) override;
+ void VRFIP(u32 vd, u32 vb) override;
+ void VRFIZ(u32 vd, u32 vb) override;
+ void VRLB(u32 vd, u32 va, u32 vb) override;
+ void VRLH(u32 vd, u32 va, u32 vb) override;
+ void VRLW(u32 vd, u32 va, u32 vb) override;
+ void VRSQRTEFP(u32 vd, u32 vb) override;
+ void VSEL(u32 vd, u32 va, u32 vb, u32 vc) override;
+ void VSL(u32 vd, u32 va, u32 vb) override;
+ void VSLB(u32 vd, u32 va, u32 vb) override;
+ void VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) override;
+ void VSLH(u32 vd, u32 va, u32 vb) override;
+ void VSLO(u32 vd, u32 va, u32 vb) override;
+ void VSLW(u32 vd, u32 va, u32 vb) override;
+ void VSPLTB(u32 vd, u32 uimm5, u32 vb) override;
+ void VSPLTH(u32 vd, u32 uimm5, u32 vb) override;
+ void VSPLTISB(u32 vd, s32 simm5) override;
+ void VSPLTISH(u32 vd, s32 simm5) override;
+ void VSPLTISW(u32 vd, s32 simm5) override;
+ void VSPLTW(u32 vd, u32 uimm5, u32 vb) override;
+ void VSR(u32 vd, u32 va, u32 vb) override;
+ void VSRAB(u32 vd, u32 va, u32 vb) override;
+ void VSRAH(u32 vd, u32 va, u32 vb) override;
+ void VSRAW(u32 vd, u32 va, u32 vb) override;
+ void VSRB(u32 vd, u32 va, u32 vb) override;
+ void VSRH(u32 vd, u32 va, u32 vb) override;
+ void VSRO(u32 vd, u32 va, u32 vb) override;
+ void VSRW(u32 vd, u32 va, u32 vb) override;
+ void VSUBCUW(u32 vd, u32 va, u32 vb) override;
+ void VSUBFP(u32 vd, u32 va, u32 vb) override;
+ void VSUBSBS(u32 vd, u32 va, u32 vb) override;
+ void VSUBSHS(u32 vd, u32 va, u32 vb) override;
+ void VSUBSWS(u32 vd, u32 va, u32 vb) override;
+ void VSUBUBM(u32 vd, u32 va, u32 vb) override;
+ void VSUBUBS(u32 vd, u32 va, u32 vb) override;
+ void VSUBUHM(u32 vd, u32 va, u32 vb) override;
+ void VSUBUHS(u32 vd, u32 va, u32 vb) override;
+ void VSUBUWM(u32 vd, u32 va, u32 vb) override;
+ void VSUBUWS(u32 vd, u32 va, u32 vb) override;
+ void VSUMSWS(u32 vd, u32 va, u32 vb) override;
+ void VSUM2SWS(u32 vd, u32 va, u32 vb) override;
+ void VSUM4SBS(u32 vd, u32 va, u32 vb) override;
+ void VSUM4SHS(u32 vd, u32 va, u32 vb) override;
+ void VSUM4UBS(u32 vd, u32 va, u32 vb) override;
+ void VUPKHPX(u32 vd, u32 vb) override;
+ void VUPKHSB(u32 vd, u32 vb) override;
+ void VUPKHSH(u32 vd, u32 vb) override;
+ void VUPKLPX(u32 vd, u32 vb) override;
+ void VUPKLSB(u32 vd, u32 vb) override;
+ void VUPKLSH(u32 vd, u32 vb) override;
+ void VXOR(u32 vd, u32 va, u32 vb) override;
+ void MULLI(u32 rd, u32 ra, s32 simm16) override;
+ void SUBFIC(u32 rd, u32 ra, s32 simm16) override;
+ void CMPLI(u32 bf, u32 l, u32 ra, u32 uimm16) override;
+ void CMPI(u32 bf, u32 l, u32 ra, s32 simm16) override;
+ void ADDIC(u32 rd, u32 ra, s32 simm16) override;
+ void ADDIC_(u32 rd, u32 ra, s32 simm16) override;
+ void ADDI(u32 rd, u32 ra, s32 simm16) override;
+ void ADDIS(u32 rd, u32 ra, s32 simm16) override;
+ void BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) override;
+ void SC(u32 sc_code) override;
+ void B(s32 ll, u32 aa, u32 lk) override;
+ void MCRF(u32 crfd, u32 crfs) override;
+ void BCLR(u32 bo, u32 bi, u32 bh, u32 lk) override;
+ void CRNOR(u32 bt, u32 ba, u32 bb) override;
+ void CRANDC(u32 bt, u32 ba, u32 bb) override;
+ void ISYNC() override;
+ void CRXOR(u32 bt, u32 ba, u32 bb) override;
+ void CRNAND(u32 bt, u32 ba, u32 bb) override;
+ void CRAND(u32 bt, u32 ba, u32 bb) override;
+ void CREQV(u32 bt, u32 ba, u32 bb) override;
+ void CRORC(u32 bt, u32 ba, u32 bb) override;
+ void CROR(u32 bt, u32 ba, u32 bb) override;
+ void BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) override;
+ void RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) override;
+ void RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) override;
+ void RLWNM(u32 ra, u32 rs, u32 rb, u32 MB, u32 ME, bool rc) override;
+ void ORI(u32 rs, u32 ra, u32 uimm16) override;
+ void ORIS(u32 rs, u32 ra, u32 uimm16) override;
+ void XORI(u32 ra, u32 rs, u32 uimm16) override;
+ void XORIS(u32 ra, u32 rs, u32 uimm16) override;
+ void ANDI_(u32 ra, u32 rs, u32 uimm16) override;
+ void ANDIS_(u32 ra, u32 rs, u32 uimm16) override;
+ void RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) override;
+ void RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) override;
+ void RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) override;
+ void RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) override;
+ void RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) override;
+ void CMP(u32 crfd, u32 l, u32 ra, u32 rb) override;
+ void TW(u32 to, u32 ra, u32 rb) override;
+ void LVSL(u32 vd, u32 ra, u32 rb) override;
+ void LVEBX(u32 vd, u32 ra, u32 rb) override;
+ void SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) override;
+ void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void MULHWU(u32 rd, u32 ra, u32 rb, bool rc) override;
+ void MFOCRF(u32 a, u32 rd, u32 crm) override;
+ void LWARX(u32 rd, u32 ra, u32 rb) override;
+ void LDX(u32 ra, u32 rs, u32 rb) override;
+ void LWZX(u32 rd, u32 ra, u32 rb) override;
+ void SLW(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void CNTLZW(u32 ra, u32 rs, bool rc) override;
+ void SLD(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void AND(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void CMPL(u32 bf, u32 l, u32 ra, u32 rb) override;
+ void LVSR(u32 vd, u32 ra, u32 rb) override;
+ void LVEHX(u32 vd, u32 ra, u32 rb) override;
+ void SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void LDUX(u32 rd, u32 ra, u32 rb) override;
+ void DCBST(u32 ra, u32 rb) override;
+ void LWZUX(u32 rd, u32 ra, u32 rb) override;
+ void CNTLZD(u32 ra, u32 rs, bool rc) override;
+ void ANDC(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void TD(u32 to, u32 ra, u32 rb) override;
+ void LVEWX(u32 vd, u32 ra, u32 rb) override;
+ void MULHD(u32 rd, u32 ra, u32 rb, bool rc) override;
+ void MULHW(u32 rd, u32 ra, u32 rb, bool rc) override;
+ void LDARX(u32 rd, u32 ra, u32 rb) override;
+ void DCBF(u32 ra, u32 rb) override;
+ void LBZX(u32 rd, u32 ra, u32 rb) override;
+ void LVX(u32 vd, u32 ra, u32 rb) override;
+ void NEG(u32 rd, u32 ra, u32 oe, bool rc) override;
+ void LBZUX(u32 rd, u32 ra, u32 rb) override;
+ void NOR(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void STVEBX(u32 vs, u32 ra, u32 rb) override;
+ void SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void MTOCRF(u32 l, u32 crm, u32 rs) override;
+ void STDX(u32 rs, u32 ra, u32 rb) override;
+ void STWCX_(u32 rs, u32 ra, u32 rb) override;
+ void STWX(u32 rs, u32 ra, u32 rb) override;
+ void STVEHX(u32 vs, u32 ra, u32 rb) override;
+ void STDUX(u32 rs, u32 ra, u32 rb) override;
+ void STWUX(u32 rs, u32 ra, u32 rb) override;
+ void STVEWX(u32 vs, u32 ra, u32 rb) override;
+ void SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) override;
+ void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) override;
+ void STDCX_(u32 rs, u32 ra, u32 rb) override;
+ void STBX(u32 rs, u32 ra, u32 rb) override;
+ void STVX(u32 vs, u32 ra, u32 rb) override;
+ void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void SUBFME(u32 rd, u32 ra, u32 oe, bool rc) override;
+ void ADDME(u32 rd, u32 ra, u32 oe, bool rc) override;
+ void MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void DCBTST(u32 ra, u32 rb, u32 th) override;
+ void STBUX(u32 rs, u32 ra, u32 rb) override;
+ void ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void DCBT(u32 ra, u32 rb, u32 th) override;
+ void LHZX(u32 rd, u32 ra, u32 rb) override;
+ void EQV(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void ECIWX(u32 rd, u32 ra, u32 rb) override;
+ void LHZUX(u32 rd, u32 ra, u32 rb) override;
+ void XOR(u32 rs, u32 ra, u32 rb, bool rc) override;
+ void MFSPR(u32 rd, u32 spr) override;
+ void LWAX(u32 rd, u32 ra, u32 rb) override;
+ void DST(u32 ra, u32 rb, u32 strm, u32 t) override;
+ void LHAX(u32 rd, u32 ra, u32 rb) override;
+ void LVXL(u32 vd, u32 ra, u32 rb) override;
+ void MFTB(u32 rd, u32 spr) override;
+ void LWAUX(u32 rd, u32 ra, u32 rb) override;
+ void DSTST(u32 ra, u32 rb, u32 strm, u32 t) override;
+ void LHAUX(u32 rd, u32 ra, u32 rb) override;
+ void STHX(u32 rs, u32 ra, u32 rb) override;
+ void ORC(u32 rs, u32 ra, u32 rb, bool rc) override;
+ void ECOWX(u32 rs, u32 ra, u32 rb) override;
+ void STHUX(u32 rs, u32 ra, u32 rb) override;
+ void OR(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void MTSPR(u32 spr, u32 rs) override;
+ //DCBI
+ void NAND(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void STVXL(u32 vs, u32 ra, u32 rb) override;
+ void DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) override;
+ void LVLX(u32 vd, u32 ra, u32 rb) override;
+ void LDBRX(u32 rd, u32 ra, u32 rb) override;
+ void LSWX(u32 rd, u32 ra, u32 rb) override;
+ void LWBRX(u32 rd, u32 ra, u32 rb) override;
+ void LFSX(u32 frd, u32 ra, u32 rb) override;
+ void SRW(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void SRD(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void LVRX(u32 vd, u32 ra, u32 rb) override;
+ void LSWI(u32 rd, u32 ra, u32 nb) override;
+ void LFSUX(u32 frd, u32 ra, u32 rb) override;
+ void SYNC(u32 l) override;
+ void LFDX(u32 frd, u32 ra, u32 rb) override;
+ void LFDUX(u32 frd, u32 ra, u32 rb) override;
+ void STVLX(u32 vs, u32 ra, u32 rb) override;
+ void STSWX(u32 rs, u32 ra, u32 rb) override;
+ void STWBRX(u32 rs, u32 ra, u32 rb) override;
+ void STFSX(u32 frs, u32 ra, u32 rb) override;
+ void STVRX(u32 vs, u32 ra, u32 rb) override;
+ void STFSUX(u32 frs, u32 ra, u32 rb) override;
+ void STSWI(u32 rd, u32 ra, u32 nb) override;
+ void STFDX(u32 frs, u32 ra, u32 rb) override;
+ void STFDUX(u32 frs, u32 ra, u32 rb) override;
+ void LVLXL(u32 vd, u32 ra, u32 rb) override;
+ void LHBRX(u32 rd, u32 ra, u32 rb) override;
+ void SRAW(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void SRAD(u32 ra, u32 rs, u32 rb, bool rc) override;
+ void LVRXL(u32 vd, u32 ra, u32 rb) override;
+ void DSS(u32 strm, u32 a) override;
+ void SRAWI(u32 ra, u32 rs, u32 sh, bool rc) override;
+ void SRADI1(u32 ra, u32 rs, u32 sh, bool rc) override;
+ void SRADI2(u32 ra, u32 rs, u32 sh, bool rc) override;
+ void EIEIO() override;
+ void STVLXL(u32 vs, u32 ra, u32 rb) override;
+ void STHBRX(u32 rs, u32 ra, u32 rb) override;
+ void EXTSH(u32 ra, u32 rs, bool rc) override;
+ void STVRXL(u32 sd, u32 ra, u32 rb) override;
+ void EXTSB(u32 ra, u32 rs, bool rc) override;
+ void STFIWX(u32 frs, u32 ra, u32 rb) override;
+ void EXTSW(u32 ra, u32 rs, bool rc) override;
+ void ICBI(u32 ra, u32 rb) override;
+ void DCBZ(u32 ra, u32 rb) override;
+ void LWZ(u32 rd, u32 ra, s32 d) override;
+ void LWZU(u32 rd, u32 ra, s32 d) override;
+ void LBZ(u32 rd, u32 ra, s32 d) override;
+ void LBZU(u32 rd, u32 ra, s32 d) override;
+ void STW(u32 rs, u32 ra, s32 d) override;
+ void STWU(u32 rs, u32 ra, s32 d) override;
+ void STB(u32 rs, u32 ra, s32 d) override;
+ void STBU(u32 rs, u32 ra, s32 d) override;
+ void LHZ(u32 rd, u32 ra, s32 d) override;
+ void LHZU(u32 rd, u32 ra, s32 d) override;
+ void LHA(u32 rs, u32 ra, s32 d) override;
+ void LHAU(u32 rs, u32 ra, s32 d) override;
+ void STH(u32 rs, u32 ra, s32 d) override;
+ void STHU(u32 rs, u32 ra, s32 d) override;
+ void LMW(u32 rd, u32 ra, s32 d) override;
+ void STMW(u32 rs, u32 ra, s32 d) override;
+ void LFS(u32 frd, u32 ra, s32 d) override;
+ void LFSU(u32 frd, u32 ra, s32 d) override;
+ void LFD(u32 frd, u32 ra, s32 d) override;
+ void LFDU(u32 frd, u32 ra, s32 d) override;
+ void STFS(u32 frs, u32 ra, s32 d) override;
+ void STFSU(u32 frs, u32 ra, s32 d) override;
+ void STFD(u32 frs, u32 ra, s32 d) override;
+ void STFDU(u32 frs, u32 ra, s32 d) override;
+ void LD(u32 rd, u32 ra, s32 ds) override;
+ void LDU(u32 rd, u32 ra, s32 ds) override;
+ void LWA(u32 rd, u32 ra, s32 ds) override;
+ void FDIVS(u32 frd, u32 fra, u32 frb, bool rc) override;
+ void FSUBS(u32 frd, u32 fra, u32 frb, bool rc) override;
+ void FADDS(u32 frd, u32 fra, u32 frb, bool rc) override;
+ void FSQRTS(u32 frd, u32 frb, bool rc) override;
+ void FRES(u32 frd, u32 frb, bool rc) override;
+ void FMULS(u32 frd, u32 fra, u32 frc, bool rc) override;
+ void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override;
+ void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override;
+ void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override;
+ void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override;
+ void STD(u32 rs, u32 ra, s32 ds) override;
+ void STDU(u32 rs, u32 ra, s32 ds) override;
+ void MTFSB1(u32 bt, bool rc) override;
+ void MCRFS(u32 bf, u32 bfa) override;
+ void MTFSB0(u32 bt, bool rc) override;
+ void MTFSFI(u32 crfd, u32 i, bool rc) override;
+ void MFFS(u32 frd, bool rc) override;
+ void MTFSF(u32 flm, u32 frb, bool rc) override;
+
+ void FCMPU(u32 bf, u32 fra, u32 frb) override;
+ void FRSP(u32 frd, u32 frb, bool rc) override;
+ void FCTIW(u32 frd, u32 frb, bool rc) override;
+ void FCTIWZ(u32 frd, u32 frb, bool rc) override;
+ void FDIV(u32 frd, u32 fra, u32 frb, bool rc) override;
+ void FSUB(u32 frd, u32 fra, u32 frb, bool rc) override;
+ void FADD(u32 frd, u32 fra, u32 frb, bool rc) override;
+ void FSQRT(u32 frd, u32 frb, bool rc) override;
+ void FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override;
+ void FMUL(u32 frd, u32 fra, u32 frc, bool rc) override;
+ void FRSQRTE(u32 frd, u32 frb, bool rc) override;
+ void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override;
+ void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override;
+ void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override;
+ void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) override;
+ void FCMPO(u32 crfd, u32 fra, u32 frb) override;
+ void FNEG(u32 frd, u32 frb, bool rc) override;
+ void FMR(u32 frd, u32 frb, bool rc) override;
+ void FNABS(u32 frd, u32 frb, bool rc) override;
+ void FABS(u32 frd, u32 frb, bool rc) override;
+ void FCTID(u32 frd, u32 frb, bool rc) override;
+ void FCTIDZ(u32 frd, u32 frb, bool rc) override;
+ void FCFID(u32 frd, u32 frb, bool rc) override;
+
+ void UNK(const u32 code, const u32 opcode, const u32 gcode) override;
+
+private:
+ struct ExecutableInfo {
+ /// Pointer to the executable
+ Executable executable;
+
+ /// Size of the executable
+ size_t size;
+
+ /// Number of PPU instructions compiled into this executable
+ u32 num_instructions;
+
+ /// List of blocks that this executable refers to that have not been hit yet
+ std::list unhit_blocks_list;
+
+ /// LLVM function corresponding to the executable
+ llvm::Function * llvm_function;
+ };
+
+ /// Lock for accessing m_compiled_shared
+ // TODO: Use a RW lock
+ std::mutex m_compiled_shared_lock;
+
+ /// Sections that have been compiled. This data store is shared with the execution threads.
+ /// Keys are starting address of the section and ~revision. Data is pointer to the executable and its reference count.
+ std::map, std::pair> m_compiled_shared;
+
+ /// Lock for accessing m_uncompiled_shared
+ std::mutex m_uncompiled_shared_lock;
+
+ /// Current revision. This is incremented everytime a section is compiled.
+ std::atomic m_revision;
+
+ /// Sections that have not been compiled yet. This data store is shared with the execution threads.
+ std::list m_uncompiled_shared;
+
+ /// Set of all blocks that have been hit
+ std::set m_hit_blocks;
+
+ /// Sections that have been compiled. Keys are starting address of the section and ~revision.
+ std::map, ExecutableInfo> m_compiled;
+
+ /// LLVM context
+ llvm::LLVMContext * m_llvm_context;
+
+ /// LLVM IR builder
+ llvm::IRBuilder<> * m_ir_builder;
+
+ /// Module to which all generated code is output to
+ llvm::Module * m_module;
+
+ /// JIT execution engine
+ llvm::ExecutionEngine * m_execution_engine;
+
+ /// Function pass manager
+ llvm::FunctionPassManager * m_fpm;
+
+ /// A flag used to detect branch instructions.
+ /// This is set to false at the start of compilation of a block.
+ /// When a branch instruction is encountered, this is set to true by the decode function.
+ bool m_hit_branch_instruction;
+
+ /// The function being compiled
+ llvm::Function * m_current_function;
+
+ /// List of blocks to be compiled in the current function being compiled
+ std::list m_current_function_uncompiled_blocks_list;
+
+ /// List of blocks that the current function refers to but have not been hit yet
+ std::list m_current_function_unhit_blocks_list;
+
+ /// Address of the current instruction
+ u32 m_current_instruction_address;
+
+ /// Number of instructions in this section
+ u32 m_num_instructions;
+
+ /// Time spent building the LLVM IR
+ std::chrono::nanoseconds m_ir_build_time;
+
+ /// Time spent optimizing
+ std::chrono::nanoseconds m_optimizing_time;
+
+ /// Time spent translating LLVM IR to machine code
+ std::chrono::nanoseconds m_translation_time;
+
+ /// Time spent compiling
+ std::chrono::nanoseconds m_compilation_time;
+
+ /// Time spent idling
+ std::chrono::nanoseconds m_idling_time;
+
+ /// Total time
+ std::chrono::nanoseconds m_total_time;
+
+ /// Contains the number of times the interpreter fallback was used
+ std::map m_interpreter_fallback_stats;
+
+ /// Get the block in function for the instruction at the specified address.
+ llvm::BasicBlock * GetBlockInFunction(u32 address, llvm::Function * function, bool create_if_not_exist = false);
+
+ /// Compile the section startin at address
+ void Compile(u32 address);
+
+ /// Remove old versions of executables that are no longer used by any execution thread
+ void RemoveUnusedOldVersions();
+
+ /// Test whether the blocks needs to be compiled
+ bool NeedsCompiling(u32 address);
+
+ /// Get PPU state pointer
+ llvm::Value * GetPPUState();
+
+ /// Get interpreter pointer
+ llvm::Value * GetInterpreter();
+
+ /// Get a bit
+ llvm::Value * GetBit(llvm::Value * val, u32 n);
+
+ /// Clear a bit
+ llvm::Value * ClrBit(llvm::Value * val, u32 n);
+
+ /// Set a bit
+ llvm::Value * SetBit(llvm::Value * val, u32 n, llvm::Value * bit, bool doClear = true);
+
+ /// Get a nibble
+ llvm::Value * GetNibble(llvm::Value * val, u32 n);
+
+ /// Clear a nibble
+ llvm::Value * ClrNibble(llvm::Value * val, u32 n);
+
+ /// Set a nibble
+ llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * nibble, bool doClear = true);
+
+ /// Set a nibble
+ llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3, bool doClear = true);
+
+ /// Load PC
+ llvm::Value * GetPc();
+
+ /// Set PC
+ void SetPc(llvm::Value * val_ix);
+
+ /// Load GPR
+ llvm::Value * GetGpr(u32 r, u32 num_bits = 64);
+
+ /// Set GPR
+ void SetGpr(u32 r, llvm::Value * val_x64);
+
+ /// Load CR
+ llvm::Value * GetCr();
+
+ /// Load CR and get field CRn
+ llvm::Value * GetCrField(u32 n);
+
+ /// Set CR
+ void SetCr(llvm::Value * val_x32);
+
+ /// Set CR field
+ void SetCrField(u32 n, llvm::Value * field);
+
+ /// Set CR field
+ void SetCrField(u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3);
+
+ /// Set CR field based on signed comparison
+ void SetCrFieldSignedCmp(u32 n, llvm::Value * a, llvm::Value * b);
+
+ /// Set CR field based on unsigned comparison
+ void SetCrFieldUnsignedCmp(u32 n, llvm::Value * a, llvm::Value * b);
+
+ /// Set CR6 based on the result of the vector compare instruction
+ void SetCr6AfterVectorCompare(u32 vr);
+
+ /// Get LR
+ llvm::Value * GetLr();
+
+ /// Set LR
+ void SetLr(llvm::Value * val_x64);
+
+ /// Get CTR
+ llvm::Value * GetCtr();
+
+ /// Set CTR
+ void SetCtr(llvm::Value * val_x64);
+
+ /// Load XER and convert it to an i64
+ llvm::Value * GetXer();
+
+ /// Load XER and return the CA bit
+ llvm::Value * GetXerCa();
+
+ /// Load XER and return the SO bit
+ llvm::Value * GetXerSo();
+
+ /// Set XER
+ void SetXer(llvm::Value * val_x64);
+
+ /// Set the CA bit of XER
+ void SetXerCa(llvm::Value * ca);
+
+ /// Set the SO bit of XER
+ void SetXerSo(llvm::Value * so);
+
+ /// Get USPRG0
+ llvm::Value * GetUsprg0();
+
+ /// Set USPRG0
+ void SetUsprg0(llvm::Value * val_x64);
+
+ /// Get FPR
+ llvm::Value * GetFpr(u32 r, u32 bits = 64, bool as_int = false);
+
+ /// Set FPR
+ void SetFpr(u32 r, llvm::Value * val);
+
+ /// Load VSCR
+ llvm::Value * GetVscr();
+
+ /// Set VSCR
+ void SetVscr(llvm::Value * val_x32);
+
+ /// Load VR
+ llvm::Value * GetVr(u32 vr);
+
+ /// Load VR and convert it to an integer vector
+ llvm::Value * GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits);
+
+ /// Load VR and convert it to a float vector with 4 elements
+ llvm::Value * GetVrAsFloatVec(u32 vr);
+
+ /// Load VR and convert it to a double vector with 2 elements
+ llvm::Value * GetVrAsDoubleVec(u32 vr);
+
+ /// Set VR to the specified value
+ void SetVr(u32 vr, llvm::Value * val_x128);
+
+ /// Check condition for branch instructions
+ llvm::Value * CheckBranchCondition(u32 bo, u32 bi);
+
+ /// Create IR for a branch instruction
+ void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i64, bool lk);
+
+ /// Read from memory
+ llvm::Value * ReadMemory(llvm::Value * addr_i64, u32 bits, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true);
+
+ /// Write to memory
+ void WriteMemory(llvm::Value * addr_i64, llvm::Value * val_ix, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true);
+
+ /// Call an interpreter function
+ template
+ llvm::Value * InterpreterCall(const char * name, Func function, Args... args);
+
+ /// Convert a C++ type to an LLVM type
+ template
+ llvm::Type * CppToLlvmType();
+
+ /// Call a function
+ template
+ llvm::Value * Call(const char * name, Func function, Args... args);
+
+ /// Test an instruction against the interpreter
+ template
+ void VerifyInstructionAgainstInterpreter(const char * name, PPULLVMRecompilerFn recomp_fn, PPUInterpreterFn interp_fn, PPUState & input_state, Args... args);
+
+ /// Excute a test
+ void RunTest(const char * name, std::function test_case, std::function input, std::function check_result);
+
+ /// A mask used in rotate instructions
+ static u64 s_rotate_mask[64][64];
+
+ /// A flag indicating whether s_rotate_mask has been initialised or not
+ static bool s_rotate_mask_inited;
+
+ /// Initialse s_rotate_mask
+ static void InitRotateMask();
+};
+
+/// PPU emulator that uses LLVM to convert PPU instructions to host CPU instructions
+class PPULLVMEmulator : public CPUDecoder {
+public:
+ PPULLVMEmulator(PPUThread & ppu);
+ PPULLVMEmulator() = delete;
+
+ PPULLVMEmulator(const PPULLVMEmulator & other) = delete;
+ PPULLVMEmulator(PPULLVMEmulator && other) = delete;
+
+ virtual ~PPULLVMEmulator();
+
+ PPULLVMEmulator & operator = (const PPULLVMEmulator & other) = delete;
+ PPULLVMEmulator & operator = (PPULLVMEmulator && other) = delete;
+
+ u8 DecodeMemory(const u32 address) override;
+
+private:
+ struct ExecutableInfo {
+ /// Pointer to the executable
+ PPULLVMRecompiler::Executable executable;
+
+ /// The revision of the executable
+ u32 revision;
+
+ /// Number of times the executable was hit
+ u32 num_hits;
+ };
+
+ /// PPU processor context
+ PPUThread & m_ppu;
+
+ /// PPU Interpreter
+ PPUInterpreter * m_interpreter;
+
+ /// PPU instruction Decoder
+ PPUDecoder m_decoder;
+
+ /// Set to true if the last executed instruction was a branch
+ bool m_last_instr_was_branch;
+
+ /// The time at which the m_address_to_executable cache was last cleared
+ std::chrono::high_resolution_clock::time_point m_last_cache_clear_time;
+
+ /// The revision of the recompiler to which this thread is synced
+ u32 m_recompiler_revision;
+
+ /// Address to executable map. Key is address.
+ std::unordered_map m_address_to_executable;
+
+ /// Sections that have not been compiled yet. Key is starting address of the section.
+ std::unordered_map m_uncompiled;
+
+ /// Number of instances of this class
+ static u32 s_num_instances;
+
+ /// Mutex used prevent multiple instances of the recompiler from being created
+ static std::mutex s_recompiler_mutex;
+
+ /// PPU to LLVM recompiler
+ static PPULLVMRecompiler * s_recompiler;
+};
+
+#endif // PPU_LLVM_RECOMPILER_H
diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp
new file mode 100644
index 000000000..d601503e1
--- /dev/null
+++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp
@@ -0,0 +1,769 @@
+#include "stdafx.h"
+#include "Utilities/Log.h"
+#include "Emu/Cell/PPULLVMRecompiler.h"
+#include "llvm/Support/Host.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCDisassembler.h"
+
+//#define PPU_LLVM_RECOMPILER_UNIT_TESTS 1
+
+using namespace llvm;
+
+#define VERIFY_INSTRUCTION_AGAINST_INTERPRETER(fn, tc, input, ...) \
+VerifyInstructionAgainstInterpreter(fmt::Format("%s.%d", #fn, tc).c_str(), &PPULLVMRecompiler::fn, &PPUInterpreter::fn, input, __VA_ARGS__)
+
+#define VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(fn, s, n, ...) { \
+ PPUState input; \
+ for (int i = s; i < (n + s); i++) { \
+ input.SetRandom(0x10000); \
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(fn, i, input, __VA_ARGS__); \
+ } \
+}
+
+/// Register state of a PPU
+struct PPUState {
+ /// Floating point registers
+ PPCdouble FPR[32];
+
+ ///Floating point status and control register
+ FPSCRhdr FPSCR;
+
+ /// General purpose reggisters
+ u64 GPR[32];
+
+ /// Vector purpose registers
+ u128 VPR[32];
+
+ /// Condition register
+ CRhdr CR;
+
+ /// Fixed point exception register
+ XERhdr XER;
+
+ /// Vector status and control register
+ VSCRhdr VSCR;
+
+ /// Link register
+ u64 LR;
+
+ /// Count register
+ u64 CTR;
+
+ /// SPR general purpose registers
+ u64 SPRG[8];
+
+ /// Time base register
+ u64 TB;
+
+ /// Reservations
+ u64 R_ADDR;
+ u64 R_VALUE;
+
+ /// Mmeory block
+ u32 address;
+ u64 mem_block[64];
+
+ void Load(PPUThread & ppu, u32 addr) {
+ for (int i = 0; i < 32; i++) {
+ FPR[i] = ppu.FPR[i];
+ GPR[i] = ppu.GPR[i];
+ VPR[i] = ppu.VPR[i];
+
+ if (i < 8) {
+ SPRG[i] = ppu.SPRG[i];
+ }
+ }
+
+ FPSCR = ppu.FPSCR;
+ CR = ppu.CR;
+ XER = ppu.XER;
+ VSCR = ppu.VSCR;
+ LR = ppu.LR;
+ CTR = ppu.CTR;
+ TB = ppu.TB;
+
+ R_ADDR = ppu.R_ADDR;
+ R_VALUE = ppu.R_VALUE;
+
+ address = addr;
+ for (int i = 0; i < (sizeof(mem_block) / 8); i++) {
+ mem_block[i] = vm::read64(address + (i * 8));
+ }
+ }
+
+ void Store(PPUThread & ppu) {
+ for (int i = 0; i < 32; i++) {
+ ppu.FPR[i] = FPR[i];
+ ppu.GPR[i] = GPR[i];
+ ppu.VPR[i] = VPR[i];
+
+ if (i < 8) {
+ ppu.SPRG[i] = SPRG[i];
+ }
+ }
+
+ ppu.FPSCR = FPSCR;
+ ppu.CR = CR;
+ ppu.XER = XER;
+ ppu.VSCR = VSCR;
+ ppu.LR = LR;
+ ppu.CTR = CTR;
+ ppu.TB = TB;
+
+ ppu.R_ADDR = R_ADDR;
+ ppu.R_VALUE = R_VALUE;
+
+ for (int i = 0; i < (sizeof(mem_block) / 8); i++) {
+ vm::write64(address + (i * 8), mem_block[i]);
+ }
+ }
+
+ void SetRandom(u32 addr) {
+ std::mt19937_64 rng;
+
+ rng.seed((u32)std::chrono::high_resolution_clock::now().time_since_epoch().count());
+ for (int i = 0; i < 32; i++) {
+ FPR[i] = (double)rng();
+ GPR[i] = rng();
+ VPR[i]._f[0] = (float)rng();
+ VPR[i]._f[1] = (float)rng();
+ VPR[i]._f[2] = (float)rng();
+ VPR[i]._f[3] = (float)rng();
+
+ if (i < 8) {
+ SPRG[i] = rng();
+ }
+ }
+
+ FPSCR.FPSCR = (u32)rng();
+ CR.CR = (u32)rng();
+ XER.XER = 0;
+ XER.CA = (u32)rng();
+ XER.SO = (u32)rng();
+ XER.OV = (u32)rng();
+ VSCR.VSCR = (u32)rng();
+ VSCR.X = 0;
+ VSCR.Y = 0;
+ LR = rng();
+ CTR = rng();
+ TB = rng();
+ R_ADDR = rng();
+ R_VALUE = rng();
+
+ address = addr;
+ for (int i = 0; i < (sizeof(mem_block) / 8); i++) {
+ mem_block[i] = rng();
+ }
+ }
+
+ std::string ToString() const {
+ std::string ret;
+
+ for (int i = 0; i < 32; i++) {
+ ret += fmt::Format("GPR[%02d] = 0x%016llx FPR[%02d] = %16g VPR[%02d] = 0x%s [%s]\n", i, GPR[i], i, FPR[i]._double, i, VPR[i].to_hex().c_str(), VPR[i].to_xyzw().c_str());
+ }
+
+ for (int i = 0; i < 8; i++) {
+ ret += fmt::Format("SPRG[%d] = 0x%016llx\n", i, SPRG[i]);
+ }
+
+ ret += fmt::Format("CR = 0x%08x LR = 0x%016llx CTR = 0x%016llx TB=0x%016llx\n", CR.CR, LR, CTR, TB);
+ ret += fmt::Format("XER = 0x%016llx [CA=%d | OV=%d | SO=%d]\n", XER.XER, fmt::by_value(XER.CA), fmt::by_value(XER.OV), fmt::by_value(XER.SO));
+ //ret += fmt::Format("FPSCR = 0x%08x " // TODO: Uncomment after implementing FPSCR
+ // "[RN=%d | NI=%d | XE=%d | ZE=%d | UE=%d | OE=%d | VE=%d | "
+ // "VXCVI=%d | VXSQRT=%d | VXSOFT=%d | FPRF=%d | "
+ // "FI=%d | FR=%d | VXVC=%d | VXIMZ=%d | "
+ // "VXZDZ=%d | VXIDI=%d | VXISI=%d | VXSNAN=%d | "
+ // "XX=%d | ZX=%d | UX=%d | OX=%d | VX=%d | FEX=%d | FX=%d]\n",
+ // FPSCR.FPSCR,
+ // fmt::by_value(FPSCR.RN),
+ // fmt::by_value(FPSCR.NI), fmt::by_value(FPSCR.XE), fmt::by_value(FPSCR.ZE), fmt::by_value(FPSCR.UE), fmt::by_value(FPSCR.OE), fmt::by_value(FPSCR.VE),
+ // fmt::by_value(FPSCR.VXCVI), fmt::by_value(FPSCR.VXSQRT), fmt::by_value(FPSCR.VXSOFT), fmt::by_value(FPSCR.FPRF),
+ // fmt::by_value(FPSCR.FI), fmt::by_value(FPSCR.FR), fmt::by_value(FPSCR.VXVC), fmt::by_value(FPSCR.VXIMZ),
+ // fmt::by_value(FPSCR.VXZDZ), fmt::by_value(FPSCR.VXIDI), fmt::by_value(FPSCR.VXISI), fmt::by_value(FPSCR.VXSNAN),
+ // fmt::by_value(FPSCR.XX), fmt::by_value(FPSCR.ZX), fmt::by_value(FPSCR.UX), fmt::by_value(FPSCR.OX), fmt::by_value(FPSCR.VX), fmt::by_value(FPSCR.FEX), fmt::by_value(FPSCR.FX));
+ //ret += fmt::Format("VSCR = 0x%08x [NJ=%d | SAT=%d]\n", VSCR.VSCR, fmt::by_value(VSCR.NJ), fmt::by_value(VSCR.SAT)); // TODO: Uncomment after implementing VSCR.SAT
+ ret += fmt::Format("R_ADDR = 0x%016llx R_VALUE = 0x%016llx\n", R_ADDR, R_VALUE);
+
+ for (int i = 0; i < (sizeof(mem_block) / 8); i += 2) {
+ ret += fmt::Format("mem_block[%d] = 0x%016llx mem_block[%d] = 0x%016llx\n", i, mem_block[i], i + 1, mem_block[i + 1]);
+ }
+
+ return ret;
+ }
+};
+
+#ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS
+static PPUThread * s_ppu_state = nullptr;
+static PPUInterpreter * s_interpreter = nullptr;
+#endif // PPU_LLVM_RECOMPILER_UNIT_TESTS
+
+template
+void PPULLVMRecompiler::VerifyInstructionAgainstInterpreter(const char * name, PPULLVMRecompilerFn recomp_fn, PPUInterpreterFn interp_fn, PPUState & input_state, Args... args) {
+#ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS
+ auto test_case = [&]() {
+ (this->*recomp_fn)(args...);
+ };
+ auto input = [&]() {
+ input_state.Store(*s_ppu_state);
+ };
+ auto check_result = [&](std::string & msg) {
+ PPUState recomp_output_state;
+ PPUState interp_output_state;
+
+ recomp_output_state.Load(*s_ppu_state, input_state.address);
+ input_state.Store(*s_ppu_state);
+ (s_interpreter->*interp_fn)(args...);
+ interp_output_state.Load(*s_ppu_state, input_state.address);
+
+ if (interp_output_state.ToString() != recomp_output_state.ToString()) {
+ msg = std::string("Input state:\n") + input_state.ToString() +
+ std::string("\nOutput state:\n") + recomp_output_state.ToString() +
+ std::string("\nInterpreter output state:\n") + interp_output_state.ToString();
+ return false;
+ }
+
+ return true;
+ };
+ RunTest(name, test_case, input, check_result);
+#endif // PPU_LLVM_RECOMPILER_UNIT_TESTS
+}
+
+void PPULLVMRecompiler::RunTest(const char * name, std::function test_case, std::function input, std::function check_result) {
+#ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS
+ // Create the unit test function
+ m_current_function = (Function *)m_module->getOrInsertFunction(name, m_ir_builder->getVoidTy(),
+ m_ir_builder->getInt8PtrTy() /*ppu_state*/,
+ m_ir_builder->getInt64Ty() /*base_addres*/,
+ m_ir_builder->getInt8PtrTy() /*interpreter*/, nullptr);
+ m_current_function->setCallingConv(CallingConv::X86_64_Win64);
+ auto arg_i = m_current_function->arg_begin();
+ arg_i->setName("ppu_state");
+ (++arg_i)->setName("base_address");
+ (++arg_i)->setName("interpreter");
+
+ auto block = BasicBlock::Create(*m_llvm_context, "start", m_current_function);
+ m_ir_builder->SetInsertPoint(block);
+
+ test_case();
+
+ m_ir_builder->CreateRetVoid();
+
+ // Print the IR
+ std::string ir;
+ raw_string_ostream ir_ostream(ir);
+ m_current_function->print(ir_ostream);
+ LOG_NOTICE(PPU, "[UT %s] LLVM IR:%s", name, ir.c_str());
+
+ std::string verify;
+ raw_string_ostream verify_ostream(verify);
+ if (verifyFunction(*m_current_function, &verify_ostream)) {
+ LOG_ERROR(PPU, "[UT %s] Verification Failed:%s", name, verify.c_str());
+ return;
+ }
+
+ // Optimize
+ m_fpm->run(*m_current_function);
+
+ // Print the optimized IR
+ ir = "";
+ m_current_function->print(ir_ostream);
+ LOG_NOTICE(PPU, "[UT %s] Optimized LLVM IR:%s", name, ir.c_str());
+
+ // Generate the function
+ MachineCodeInfo mci;
+ m_execution_engine->runJITOnFunction(m_current_function, &mci);
+
+ // Disassemble the generated function
+ auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr);
+
+ LOG_NOTICE(PPU, "[UT %s] Disassembly:", name);
+ for (uint64_t pc = 0; pc < mci.size();) {
+ char str[1024];
+
+ auto size = LLVMDisasmInstruction(disassembler, (uint8_t *)mci.address() + pc, mci.size() - pc, (uint64_t)((uint8_t *)mci.address() + pc), str, sizeof(str));
+ LOG_NOTICE(PPU, "[UT %s] %p: %s.", name, (uint8_t *)mci.address() + pc, str);
+ pc += size;
+ }
+
+ LLVMDisasmDispose(disassembler);
+
+ // Run the test
+ input();
+ std::vector args;
+ args.push_back(GenericValue(s_ppu_state));
+ args.push_back(GenericValue(s_interpreter));
+ m_execution_engine->runFunction(m_current_function, args);
+
+ // Verify results
+ std::string msg;
+ bool pass = check_result(msg);
+ if (pass) {
+ LOG_NOTICE(PPU, "[UT %s] Test passed. %s", name, msg.c_str());
+ } else {
+ LOG_ERROR(PPU, "[UT %s] Test failed. %s", name, msg.c_str());
+ }
+
+ m_execution_engine->freeMachineCodeForFunction(m_current_function);
+#endif // PPU_LLVM_RECOMPILER_UNIT_TESTS
+}
+
+void PPULLVMRecompiler::RunAllTests(PPUThread * ppu_state, PPUInterpreter * interpreter) {
+#ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS
+ s_ppu_state = ppu_state;
+ s_interpreter = interpreter;
+
+ PPUState initial_state;
+ initial_state.Load(*ppu_state, 0x10000);
+
+ LOG_NOTICE(PPU, "Running Unit Tests");
+
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFVSCR, 0, 5, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTVSCR, 0, 5, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDCUW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDFP, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDSBS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDSHS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDSWS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUBM, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUBS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUHM, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUHS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUWM, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUWS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAND, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VANDC, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGSB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGSH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGSW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGUB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGUH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGUW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFSX, 0, 5, 0, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFSX, 5, 5, 0, 3, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFUX, 0, 5, 0, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFUX, 5, 5, 0, 2, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW_, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW_, 5, 5, 0, 1, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMADDFP, 0, 5, 0, 1, 2, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXFP, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINFP, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMMBM, 0, 5, 0, 1, 2, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHM, 0, 5, 0, 1, 2, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUBM, 0, 5, 0, 1, 2, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHM, 0, 5, 0, 1, 2, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0, 1, 2, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNOR, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VOR, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPERM, 0, 5, 0, 1, 2, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VREFP, 0, 5, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSEL, 0, 5, 0, 1, 2, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSL, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLDOI, 0, 5, 0, 1, 2, 6);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLO, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTB, 0, 5, 0, 3, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTH, 0, 5, 0, 3, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTISB, 0, 5, 0, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTISH, 0, 5, 0, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTISW, 0, 5, 0, -12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTW, 0, 5, 0, 3, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSR, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRAB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRAH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRAW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRB, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRH, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRO, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRW, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBFP, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBSBS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBSHS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBSWS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUBM, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUBS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHM, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWM, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWS, 0, 5, 0, 1, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VXOR, 0, 5, 0, 1, 2);
+ // TODO: Rest of the vector instructions
+
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLI, 0, 5, 1, 2, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFIC, 0, 5, 1, 2, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPLI, 0, 5, 1, 0, 7, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPLI, 5, 5, 1, 1, 7, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPI, 0, 5, 5, 0, 7, -12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPI, 5, 5, 5, 1, 7, -12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIC, 0, 5, 1, 2, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIC_, 0, 5, 1, 2, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDI, 0, 5, 1, 2, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDI, 5, 5, 0, 2, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIS, 0, 5, 1, 2, -12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIS, 5, 5, 0, 2, -12345);
+ // TODO: BC
+ // TODO: SC
+ // TODO: B
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRF, 0, 5, 0, 7);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRF, 5, 5, 6, 2);
+ // TODO: BCLR
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRNOR, 0, 5, 0, 7, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRANDC, 0, 5, 5, 6, 7);
+ // TODO: ISYNC
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRXOR, 0, 5, 7, 7, 7);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRNAND, 0, 5, 3, 4, 5);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRAND, 0, 5, 1, 2, 3);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CREQV, 0, 5, 2, 1, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRORC, 0, 5, 3, 4, 5);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CROR, 0, 5, 6, 7, 0);
+ // TODO: BCCTR
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWIMI, 0, 5, 7, 8, 9, 12, 25, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWIMI, 5, 5, 21, 22, 21, 18, 24, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWINM, 0, 5, 7, 8, 9, 12, 25, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWINM, 5, 5, 21, 22, 21, 18, 24, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWNM, 0, 5, 7, 8, 9, 12, 25, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWNM, 5, 5, 21, 22, 21, 18, 24, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORI, 0, 5, 25, 29, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORIS, 0, 5, 7, 31, -12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XORI, 0, 5, 0, 19, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XORIS, 0, 5, 3, 14, -12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDI_, 0, 5, 16, 7, 12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDIS_, 0, 5, 23, 21, -12345);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICL, 0, 5, 7, 8, 9, 12, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICL, 5, 5, 21, 22, 43, 43, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICR, 0, 5, 7, 8, 0, 12, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICR, 5, 5, 21, 22, 63, 43, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIC, 0, 5, 7, 8, 9, 12, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIC, 5, 5, 21, 22, 23, 43, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIMI, 0, 5, 7, 8, 9, 12, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIMI, 5, 5, 21, 22, 23, 43, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDC_LR, 0, 5, 7, 8, 9, 12, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDC_LR, 5, 5, 21, 22, 23, 43, 1, 1);
+
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 0, 5, 7, 8, 9, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 5, 5, 21, 22, 23, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 0, 5, 7, 8, 9, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 5, 5, 21, 22, 23, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 0, 5, 7, 8, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 5, 5, 21, 22, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHDU, 0, 5, 7, 8, 9, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHDU, 5, 5, 21, 22, 23, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHWU, 0, 5, 7, 8, 9, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHWU, 5, 5, 21, 22, 23, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 0, 5, 7, 8, 9, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 5, 5, 21, 22, 23, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 0, 5, 7, 8, 9, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 5, 5, 21, 22, 23, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 0, 5, 7, 8, 9, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 5, 5, 21, 22, 23, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 0, 5, 7, 8, 9, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 5, 5, 21, 22, 23, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 0, 5, 7, 8, 9, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 5, 5, 21, 22, 23, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 0, 5, 7, 8, 9, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 5, 5, 21, 22, 23, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 0, 5, 7, 8, 9, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 5, 5, 21, 22, 23, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 0, 5, 7, 8, 9, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 5, 5, 21, 22, 23, 0, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 0, 5, 7, 8, 9, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 5, 5, 21, 22, 23, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 0, 5, 7, 8, 9, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 5, 5, 21, 22, 23, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 0, 5, 7, 8, 9, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 5, 5, 21, 22, 23, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 0, 5, 7, 8, 9, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 5, 5, 21, 22, 23, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 0, 5, 3, 0, 9, 31);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 5, 5, 6, 1, 23, 14);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 0, 5, 3, 0, 9, 31);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 5, 5, 6, 1, 23, 14);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 0, 5, 0, 1, 2, 0, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 5, 5, 0, 1, 2, 0, true);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 0, 5, 0, 1, 2, 0, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 5, 5, 0, 1, 2, 0, true);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 0, 5, 3, 5, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 5, 5, 3, 5, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 0, 5, 6, 9, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 5, 5, 6, 9, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 0, 5, 25, 29, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 5, 5, 25, 29, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 0, 5, 0x20, 5);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 5, 5, 0x100, 5);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 10, 5, 0x120, 5);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 15, 5, 0x8, 5);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 0, 5, 5, 0x20);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 5, 5, 5, 0x100);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 10, 5, 5, 0x120);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 15, 5, 5, 0x8);
+
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 0, 5, 5, 6, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 5, 5, 5, 6, 12, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 10, 5, 5, 6, 22, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 15, 5, 5, 6, 31, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 0, 5, 5, 6, 7, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 5, 5, 5, 6, 7, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 0, 5, 5, 6, 0, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 5, 5, 5, 6, 12, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 10, 5, 5, 6, 48, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 15, 5, 5, 6, 63, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 0, 5, 5, 6, 7, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 5, 5, 5, 6, 7, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLW, 0, 5, 5, 6, 7, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLW, 5, 5, 5, 6, 7, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 0, 5, 5, 6, 7, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 5, 5, 5, 6, 7, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 0, 5, 5, 6, 7, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 5, 5, 5, 6, 7, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 0, 5, 5, 6, 7, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 5, 5, 5, 6, 7, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZW, 0, 5, 5, 6, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZW, 5, 5, 5, 6, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 0, 5, 5, 6, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 5, 5, 5, 6, 1);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ISYNC, 0, 5);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EIEIO, 0, 5);
+
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRT, 0, 5, 0, 1, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRTS, 0, 5, 0, 1, false);
+
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FDIV, 0, 5, 0, 1, 2, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSUB, 0, 5, 0, 1, 2, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FADD, 0, 5, 0, 1, 2, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMUL, 0, 5, 0, 1, 2, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUB, 0, 5, 0, 1, 2, 3, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMADD, 0, 5, 0, 1, 2, 3, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUB, 0, 5, 0, 1, 2, 3, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADD, 0, 5, 0, 1, 2, 3, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNEG, 0, 5, 0, 1, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMR, 0, 5, 0, 1, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNABS, 0, 5, 0, 1, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FABS, 0, 5, 0, 1, false);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCFID, 0, 5, 0, 1, false);
+
+ PPUState input;
+ input.SetRandom(0x10000);
+ input.GPR[14] = 10;
+ input.GPR[21] = 15;
+ input.GPR[23] = 0x10000;
+
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 0, input, 5, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 1, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZU, 0, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZUX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZ, 0, input, 5, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZ, 1, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZU, 0, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZUX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHA, 0, input, 5, 0, 0x100F0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHA, 1, input, 5, 14, 0x100F0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAU, 0, input, 5, 14, 0x100F0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAUX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHBRX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZ, 0, input, 5, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZ, 1, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZU, 0, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZUX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWA, 0, input, 5, 0, 0x100F0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWA, 1, input, 5, 14, 0x100F0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWAX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWAX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWAUX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWBRX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LD, 0, input, 5, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LD, 1, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDU, 0, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDUX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDBRX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFS, 0, input, 5, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFS, 1, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSU, 0, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSUX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFD, 0, input, 5, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFD, 1, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDU, 0, input, 5, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDUX, 0, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWARX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWARX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDARX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDARX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 0, input, 5, 23, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 1, input, 5, 23, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 2, input, 5, 23, 7);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 3, input, 5, 23, 25);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LMW, 0, input, 5, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LMW, 1, input, 16, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVXL, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVXL, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSL, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSL, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSL, 2, input, 5, 21, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSR, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSR, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSR, 2, input, 5, 21, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEBX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEBX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEBX, 2, input, 5, 21, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEHX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEHX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEHX, 2, input, 5, 21, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEWX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEWX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEWX, 2, input, 5, 21, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVLX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVLX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVLX, 2, input, 5, 21, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVRX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVRX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVRX, 2, input, 5, 21, 23);
+
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 0, input, 3, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 1, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBU, 0, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 0, input, 3, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 1, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBUX, 0, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STH, 0, input, 3, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STH, 1, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHU, 0, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHX, 0, input, 3, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHX, 1, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHUX, 0, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHBRX, 0, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STW, 0, input, 3, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STW, 1, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWU, 0, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 0, input, 3, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 1, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWUX, 0, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWBRX, 0, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 0, input, 3, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 1, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDU, 0, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 0, input, 3, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 1, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDUX, 0, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 0, input, 3, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 1, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSU, 0, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 0, input, 3, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 1, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSUX, 0, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 0, input, 3, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 1, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDU, 0, input, 3, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDX, 0, input, 3, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDX, 1, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDUX, 0, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFIWX, 0, input, 3, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVXL, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVXL, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEBX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEBX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEHX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEHX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEWX, 0, input, 5, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEWX, 1, input, 5, 14, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STMW, 0, input, 5, 0, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STMW, 1, input, 16, 14, 0x10000);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 0, input, 5, 23, 0);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 1, input, 5, 23, 2);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 2, input, 5, 23, 7);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 3, input, 5, 23, 25);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 0, input, 0, 23);
+ VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 1, input, 14, 23);
+
+ initial_state.Store(*ppu_state);
+#endif // PPU_LLVM_RECOMPILER_UNIT_TESTS
+}
diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp
index 6cb068612..f9d01a75a 100644
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@@ -9,6 +9,7 @@
#include "Emu/SysCalls/Static.h"
#include "Emu/Cell/PPUDecoder.h"
#include "Emu/Cell/PPUInterpreter.h"
+#include "Emu/Cell/PPULLVMRecompiler.h"
PPUThread& GetCurrentPPUThread()
{
@@ -103,13 +104,18 @@ void PPUThread::DoRun()
break;
case 1:
- case 2:
{
auto ppui = new PPUInterpreter(*this);
m_dec = new PPUDecoder(ppui);
}
break;
+ case 2:
+ if (!m_dec) {
+ m_dec = new PPULLVMEmulator(*this);
+ }
+ break;
+
default:
LOG_ERROR(PPU, "Invalid CPU decoder mode: %d", Ini.CPUDecoderMode.GetValue());
Emu.Pause();
diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp
index a11a094c3..c1475e697 100644
--- a/rpcs3/Gui/MainFrame.cpp
+++ b/rpcs3/Gui/MainFrame.cpp
@@ -419,8 +419,8 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event))
wxCheckBox* chbox_dbg_ap_systemcall = new wxCheckBox(p_hle, wxID_ANY, "Auto Pause at System Call");
wxCheckBox* chbox_dbg_ap_functioncall = new wxCheckBox(p_hle, wxID_ANY, "Auto Pause at Function Call");
- cbox_cpu_decoder->Append("PPU Interpreter & DisAsm");
cbox_cpu_decoder->Append("PPU Interpreter");
+ cbox_cpu_decoder->Append("PPU JIT (LLVM)");
cbox_spu_decoder->Append("SPU Interpreter");
cbox_spu_decoder->Append("SPU JIT (asmjit)");
diff --git a/rpcs3/Ini.h b/rpcs3/Ini.h
index 831564807..7c2e61172 100644
--- a/rpcs3/Ini.h
+++ b/rpcs3/Ini.h
@@ -241,7 +241,7 @@ public:
void Load()
{
// Core
- CPUDecoderMode.Load(2);
+ CPUDecoderMode.Load(1);
SPUDecoderMode.Load(1);
// Graphics
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index f8227c3c0..007b95661 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -61,6 +61,7 @@
+
@@ -216,6 +217,7 @@
+
Create
Create
@@ -427,6 +429,7 @@
+
@@ -510,15 +513,15 @@
.\;..\;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);
- .\;..\;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);
+ .\;..\;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include
$(Platform)\$(Configuration)\emucore\
- .\;..\;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);
+ .\;..\;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include
$(Platform)\$(Configuration)\emucore\
- .\;..\;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);
+ .\;..\;..\asmjit\src\asmjit;..\wxWidgets\include\msvc;..\wxWidgets\include;.\OpenAL\include;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);..\llvm\include;..\llvm_build\include
$(Platform)\$(Configuration)\emucore\
diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters
index 072feed49..e164e1c53 100644
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@@ -626,6 +626,12 @@
Emu\SysCalls\Modules
+
+ Emu\Cell
+
+
+ Source Files
+
@@ -1234,5 +1240,8 @@
Emu\SysCalls\Modules
+
+ Emu\Cell
+
\ No newline at end of file
diff --git a/rpcs3/emucore.vcxproj.user b/rpcs3/emucore.vcxproj.user
index ef5ff2a1f..2a22e69ac 100644
--- a/rpcs3/emucore.vcxproj.user
+++ b/rpcs3/emucore.vcxproj.user
@@ -1,4 +1,4 @@
-
-
-
+
+
+
\ No newline at end of file
diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj
index 77d5435fb..d1c76abf3 100644
--- a/rpcs3/rpcs3.vcxproj
+++ b/rpcs3/rpcs3.vcxproj
@@ -87,10 +87,10 @@
true
- $(SolutionDir)$(Platform)\$(Configuration)\emucore.lib;wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;%(AdditionalDependencies)
+ $(SolutionDir)$(Platform)\$(Configuration)\emucore.lib;wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;LLVMJIT.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;
;%(AdditionalDependencies)
%(IgnoreSpecificDefaultLibraries)
false
- ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64
+ ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64;..\llvm_build\Debug\lib
"$(SolutionDir)\Utilities\git-version-gen.cmd"
@@ -113,10 +113,10 @@
true
- $(SolutionDir)$(Platform)\$(Configuration)\emucore.lib;wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;%(AdditionalDependencies)
+ $(SolutionDir)$(Platform)\$(Configuration)\emucore.lib;wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;LLVMJIT.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;
;%(AdditionalDependencies)
%(IgnoreSpecificDefaultLibraries)
false
- ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64
+ ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64;..\llvm_build\Debug\lib
"$(SolutionDir)\Utilities\git-version-gen.cmd"
@@ -148,12 +148,12 @@
true
true
true
- $(SolutionDir)$(Platform)\$(Configuration)\emucore.lib;wxmsw31u_adv.lib;wxbase31u.lib;wxmsw31u_core.lib;wxmsw31u_aui.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;wxtiff.lib;wxjpeg.lib;wxpng.lib;wxzlib.lib;wxregexu.lib;wxexpat.lib;wsock32.lib;wininet.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;%(AdditionalDependencies)
+ $(SolutionDir)$(Platform)\$(Configuration)\emucore.lib;wxmsw31u_adv.lib;wxbase31u.lib;wxmsw31u_core.lib;wxmsw31u_aui.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;wxtiff.lib;wxjpeg.lib;wxpng.lib;wxzlib.lib;wxregexu.lib;wxexpat.lib;wsock32.lib;wininet.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;libOpenAL32.dll.a;asmjit.lib;LLVMJIT.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;
;%(AdditionalDependencies)
%(IgnoreSpecificDefaultLibraries)
false
- ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64
+ ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64;..\llvm_build\Release\lib
diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h
index e1c4825f8..7e67a2723 100644
--- a/rpcs3/stdafx.h
+++ b/rpcs3/stdafx.h
@@ -32,6 +32,7 @@
#include
#include
#include
+#include
#include
#include "Utilities/GNU.h"