From b2469039afce9cafb979dd6191bc87ec627d7888 Mon Sep 17 00:00:00 2001 From: Malcolm Date: Sun, 1 Feb 2026 03:00:09 +0000 Subject: [PATCH] ARM64: Detect some arm features and let LLVM know if they are or aren't present via attributes - On x86, LLVM has robust detection for the CPU name. If a CPU like skylake has AVX disabled, it will fall back to something without AVX (nehalem) - On ARM, detection is not as robust. For instance, on my snapdragon 8 gen 2, it assumes that we have SVE support, as the cortex-x3 supports SVE. - If an ARM cpu is paired with other cpus from another generation which doesn't support the same instructions as the cortex-x3, or if the cortex-x3 just has SVE disabled for no apparant reason (in the case of the snapdragon 8 gen 2) - We need to actually detect that ourselves. - Beyond SVE also detect support for some instructions that might be useful SPU LLVM when optimized with intrinsics. --- Utilities/JITLLVM.cpp | 25 +++++++++ rpcs3/util/sysinfo.cpp | 114 ++++++++++++++++++++++++++++++++++++++++- rpcs3/util/sysinfo.hpp | 10 ++++ 3 files changed, 148 insertions(+), 1 deletion(-) diff --git a/Utilities/JITLLVM.cpp b/Utilities/JITLLVM.cpp index 166ee7cec2..34e1572185 100644 --- a/Utilities/JITLLVM.cpp +++ b/Utilities/JITLLVM.cpp @@ -688,6 +688,30 @@ jit_compiler::jit_compiler(const std::unordered_map& _link, co mem = std::make_unique(std::move(symbols_cement)); } + std::vector attributes; + +#if defined(ARCH_ARM64) + if (utils::has_sha3()) + attributes.push_back("+sha3"); + else + attributes.push_back("-sha3"); + + if (utils::has_dotprod()) + attributes.push_back("+dotprod"); + else + attributes.push_back("-dotprod"); + + if (utils::has_sve()) + attributes.push_back("+sve"); + else + attributes.push_back("-sve"); + + if (utils::has_sve2()) + attributes.push_back("+sve2"); + else + attributes.push_back("-sve2"); +#endif + { m_engine.reset(llvm::EngineBuilder(std::move(null_mod)) .setErrorStr(&result) @@ -699,6 +723,7 @@ jit_compiler::jit_compiler(const std::unordered_map& _link, co //.setCodeModel(llvm::CodeModel::Large) #endif .setRelocationModel(llvm::Reloc::Model::PIC_) + .setMAttrs(attributes) .setMCPU(m_cpu) .create()); } diff --git a/rpcs3/util/sysinfo.cpp b/rpcs3/util/sysinfo.cpp index 94563e8d10..e1e8ab8404 100755 --- a/rpcs3/util/sysinfo.cpp +++ b/rpcs3/util/sysinfo.cpp @@ -16,9 +16,15 @@ #else #include #include -#ifndef __APPLE__ +#ifdef __APPLE__ +#include +#else #include #include +#if defined(ARCH_ARM64) && defined(__linux__) +#include +#include +#endif #endif #endif @@ -444,6 +450,100 @@ u32 utils::get_rep_movsb_threshold() return g_value; } +#ifdef ARCH_ARM64 + +bool utils::has_neon() +{ + static const bool g_value = []() -> bool + { +#if defined(__linux__) + return (getauxval(AT_HWCAP) & HWCAP_ASIMD) != 0; +#elif defined(__APPLE__) + int val = 0; + size_t len = sizeof(val); + sysctlbyname("hw.optional.AdvSIMD", &val, &len, nullptr, 0); + return val != 0; +#elif defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != 0; +#endif + }(); + return g_value; +} + +bool utils::has_sha3() +{ + static const bool g_value = []() -> bool + { +#if defined(__linux__) + return (getauxval(AT_HWCAP) & HWCAP_SHA3) != 0; +#elif defined(__APPLE__) + int val = 0; + size_t len = sizeof(val); + sysctlbyname("hw.optional.arm.FEAT_SHA3", &val, &len, nullptr, 0); + return val != 0; +#elif defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE) != 0; +#endif + }(); + return g_value; +} + +bool utils::has_dotprod() +{ + static const bool g_value = []() -> bool + { +#if defined(__linux__) + return (getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0; +#elif defined(__APPLE__) + int val = 0; + size_t len = sizeof(val); + sysctlbyname("hw.optional.arm.FEAT_DotProd", &val, &len, nullptr, 0); + return val != 0; +#elif defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0; +#endif + }(); + return g_value; +} + +bool utils::has_sve() +{ + static const bool g_value = []() -> bool + { +#if defined(__linux__) + return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0; +#elif defined(__APPLE__) + int val = 0; + size_t len = sizeof(val); + sysctlbyname("hw.optional.arm.FEAT_SVE", &val, &len, nullptr, 0); + return val != 0; +#elif defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) != 0; +#endif + }(); + return g_value; +} + +bool utils::has_sve2() +{ + static const bool g_value = []() -> bool + { +#if defined(__linux__) + return (getauxval(AT_HWCAP2) & HWCAP2_SVE2) != 0; +#elif defined(__APPLE__) + int val = 0; + size_t len = sizeof(val); + sysctlbyname("hw.optional.arm.FEAT_SVE2", &val, &len, nullptr, 0); + return val != 0; +#elif defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) != 0; +#endif + }(); + return g_value; +} + +#endif + std::string utils::get_cpu_brand() { #if defined(ARCH_X64) @@ -496,6 +596,17 @@ std::string utils::get_system_info() { fmt::append(result, " | TSC: Disabled"); } +#ifdef ARCH_ARM64 + + if (has_neon()) + { + result += " | Neon"; + } + else + { + fmt::throw_exception("Neon support not present"); + } +#else if (has_avx()) { @@ -562,6 +673,7 @@ std::string utils::get_system_info() { result += " | TSX disabled via microcode"; } +#endif return result; } diff --git a/rpcs3/util/sysinfo.hpp b/rpcs3/util/sysinfo.hpp index fd7e810f67..d9bd0c6660 100755 --- a/rpcs3/util/sysinfo.hpp +++ b/rpcs3/util/sysinfo.hpp @@ -54,7 +54,17 @@ namespace utils bool has_appropriate_um_wait(); bool has_um_wait(); +#ifdef ARCH_ARM64 + bool has_neon(); + bool has_sha3(); + + bool has_dotprod(); + + bool has_sve(); + + bool has_sve2(); +#endif std::string get_cpu_brand(); std::string get_system_info();