ARM64: Detect some arm features and let LLVM know if they are or aren't present via attributes
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (0, 51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (1, 8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang ${{ matrix.arch }} (aarch64, clang, clangarm64, ARM64, windows-11-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang ${{ matrix.arch }} (x86_64, clang, clang64, X64, windows-2025) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run

- On x86, LLVM has robust detection for the CPU name. If a CPU like skylake has AVX disabled, it will fall back to something without AVX (nehalem)
- On ARM, detection is not as robust. For instance, on my snapdragon 8 gen 2, it assumes that we have SVE support, as the cortex-x3 supports SVE.
- If an ARM cpu is paired with other cpus from another generation which doesn't support the same instructions as the cortex-x3, or if the cortex-x3 just has SVE disabled for no apparant reason (in the case of the snapdragon 8 gen 2)
- We need to actually detect that ourselves.
- Beyond SVE also detect support for some instructions that might be useful SPU LLVM when optimized with intrinsics.
This commit is contained in:
Malcolm 2026-02-01 03:00:09 +00:00 committed by Elad
parent 8f066541a3
commit b2469039af
3 changed files with 148 additions and 1 deletions

View file

@ -688,6 +688,30 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
mem = std::make_unique<MemoryManager1>(std::move(symbols_cement));
}
std::vector<std::string> attributes;
#if defined(ARCH_ARM64)
if (utils::has_sha3())
attributes.push_back("+sha3");
else
attributes.push_back("-sha3");
if (utils::has_dotprod())
attributes.push_back("+dotprod");
else
attributes.push_back("-dotprod");
if (utils::has_sve())
attributes.push_back("+sve");
else
attributes.push_back("-sve");
if (utils::has_sve2())
attributes.push_back("+sve2");
else
attributes.push_back("-sve2");
#endif
{
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
.setErrorStr(&result)
@ -699,6 +723,7 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
//.setCodeModel(llvm::CodeModel::Large)
#endif
.setRelocationModel(llvm::Reloc::Model::PIC_)
.setMAttrs(attributes)
.setMCPU(m_cpu)
.create());
}

View file

@ -16,9 +16,15 @@
#else
#include <unistd.h>
#include <sys/resource.h>
#ifndef __APPLE__
#ifdef __APPLE__
#include <sys/sysctl.h>
#else
#include <sys/utsname.h>
#include <errno.h>
#if defined(ARCH_ARM64) && defined(__linux__)
#include <sys/auxv.h>
#include <asm/hwcap.h>
#endif
#endif
#endif
@ -444,6 +450,100 @@ u32 utils::get_rep_movsb_threshold()
return g_value;
}
#ifdef ARCH_ARM64
bool utils::has_neon()
{
static const bool g_value = []() -> bool
{
#if defined(__linux__)
return (getauxval(AT_HWCAP) & HWCAP_ASIMD) != 0;
#elif defined(__APPLE__)
int val = 0;
size_t len = sizeof(val);
sysctlbyname("hw.optional.AdvSIMD", &val, &len, nullptr, 0);
return val != 0;
#elif defined(_WIN32)
return IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != 0;
#endif
}();
return g_value;
}
bool utils::has_sha3()
{
static const bool g_value = []() -> bool
{
#if defined(__linux__)
return (getauxval(AT_HWCAP) & HWCAP_SHA3) != 0;
#elif defined(__APPLE__)
int val = 0;
size_t len = sizeof(val);
sysctlbyname("hw.optional.arm.FEAT_SHA3", &val, &len, nullptr, 0);
return val != 0;
#elif defined(_WIN32)
return IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE) != 0;
#endif
}();
return g_value;
}
bool utils::has_dotprod()
{
static const bool g_value = []() -> bool
{
#if defined(__linux__)
return (getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0;
#elif defined(__APPLE__)
int val = 0;
size_t len = sizeof(val);
sysctlbyname("hw.optional.arm.FEAT_DotProd", &val, &len, nullptr, 0);
return val != 0;
#elif defined(_WIN32)
return IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0;
#endif
}();
return g_value;
}
bool utils::has_sve()
{
static const bool g_value = []() -> bool
{
#if defined(__linux__)
return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0;
#elif defined(__APPLE__)
int val = 0;
size_t len = sizeof(val);
sysctlbyname("hw.optional.arm.FEAT_SVE", &val, &len, nullptr, 0);
return val != 0;
#elif defined(_WIN32)
return IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) != 0;
#endif
}();
return g_value;
}
bool utils::has_sve2()
{
static const bool g_value = []() -> bool
{
#if defined(__linux__)
return (getauxval(AT_HWCAP2) & HWCAP2_SVE2) != 0;
#elif defined(__APPLE__)
int val = 0;
size_t len = sizeof(val);
sysctlbyname("hw.optional.arm.FEAT_SVE2", &val, &len, nullptr, 0);
return val != 0;
#elif defined(_WIN32)
return IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) != 0;
#endif
}();
return g_value;
}
#endif
std::string utils::get_cpu_brand()
{
#if defined(ARCH_X64)
@ -496,6 +596,17 @@ std::string utils::get_system_info()
{
fmt::append(result, " | TSC: Disabled");
}
#ifdef ARCH_ARM64
if (has_neon())
{
result += " | Neon";
}
else
{
fmt::throw_exception("Neon support not present");
}
#else
if (has_avx())
{
@ -562,6 +673,7 @@ std::string utils::get_system_info()
{
result += " | TSX disabled via microcode";
}
#endif
return result;
}

View file

@ -54,7 +54,17 @@ namespace utils
bool has_appropriate_um_wait();
bool has_um_wait();
#ifdef ARCH_ARM64
bool has_neon();
bool has_sha3();
bool has_dotprod();
bool has_sve();
bool has_sve2();
#endif
std::string get_cpu_brand();
std::string get_system_info();