From 99dfff25ac9ddd191cdffeafcb4fa06a6c8393fe Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 24 Oct 2023 21:52:07 +0100 Subject: [PATCH] oaknut: feature_detection: Read ID registers --- CMakeLists.txt | 1 + .../feature_detection/feature_detection.hpp | 11 +- .../feature_detection_apple.hpp | 7 + .../feature_detection_freebsd.hpp | 12 + .../feature_detection_generic.hpp | 8 + .../feature_detection_hwcaps.hpp | 2 +- .../feature_detection_idregs.hpp | 167 +++++++++ .../feature_detection_linux.hpp | 13 + .../feature_detection_w32.hpp | 52 +++ .../oaknut/feature_detection/id_registers.hpp | 317 ++++++++++++++++++ .../read_id_registers_directly.hpp | 52 +++ tests/_feature_detect.cpp | 48 ++- 12 files changed, 685 insertions(+), 5 deletions(-) create mode 100644 include/oaknut/feature_detection/feature_detection_idregs.hpp create mode 100644 include/oaknut/feature_detection/id_registers.hpp create mode 100644 include/oaknut/feature_detection/read_id_registers_directly.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5f07284..a79d708 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,7 @@ set(header_files ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/cpu_feature.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/feature_detection.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/id_registers.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/cpu_feature.inc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp diff --git a/include/oaknut/feature_detection/feature_detection.hpp b/include/oaknut/feature_detection/feature_detection.hpp index b779939..9561bcd 100644 --- a/include/oaknut/feature_detection/feature_detection.hpp +++ b/include/oaknut/feature_detection/feature_detection.hpp @@ -5,18 +5,23 @@ #if defined(__APPLE__) # define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0 # include "oaknut/feature_detection/feature_detection_apple.hpp" -#elif defined(__linux__) -# define OAKNUT_CPU_FEATURE_DETECTION 1 -# include "oaknut/feature_detection/feature_detection_linux.hpp" #elif defined(__FreeBSD__) # define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1 # include "oaknut/feature_detection/feature_detection_freebsd.hpp" +#elif defined(__linux__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1 +# include "oaknut/feature_detection/feature_detection_linux.hpp" #elif defined(_WIN32) # define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2 # include "oaknut/feature_detection/feature_detection_w32.hpp" #else # define OAKNUT_CPU_FEATURE_DETECTION 0 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0 # warning "Unsupported operating system for CPU feature detection" # include "oaknut/feature_detection/feature_detection_generic.hpp" #endif diff --git a/include/oaknut/feature_detection/feature_detection_apple.hpp b/include/oaknut/feature_detection/feature_detection_apple.hpp index 145ceff..4c17825 100644 --- a/include/oaknut/feature_detection/feature_detection_apple.hpp +++ b/include/oaknut/feature_detection/feature_detection_apple.hpp @@ -4,10 +4,12 @@ #pragma once #include +#include #include #include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" namespace oaknut { @@ -102,4 +104,9 @@ inline CpuFeatures detect_features() return detect_features_via_sysctlbyname(); } +inline std::optional read_id_registers() +{ + return std::nullopt; +} + } // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_freebsd.hpp b/include/oaknut/feature_detection/feature_detection_freebsd.hpp index 91a15d0..0a07188 100644 --- a/include/oaknut/feature_detection/feature_detection_freebsd.hpp +++ b/include/oaknut/feature_detection/feature_detection_freebsd.hpp @@ -3,10 +3,15 @@ #pragma once +#include +#include + #include #include "oaknut/feature_detection/cpu_feature.hpp" #include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/id_registers.hpp" +#include "oaknut/feature_detection/read_id_registers_directly.hpp" #ifndef AT_HWCAP # define AT_HWCAP 16 @@ -37,6 +42,13 @@ inline CpuFeatures detect_features_via_hwcap() return detect_features_via_hwcap(hwcap, hwcap2); } +inline std::optional read_id_registers() +{ + // HWCAP_CPUID is falsely not set on many FreeBSD kernel versions, + // so we don't bother checking it. + return id::read_id_registers_directly(); +} + inline CpuFeatures detect_features() { return detect_features_via_hwcap(); diff --git a/include/oaknut/feature_detection/feature_detection_generic.hpp b/include/oaknut/feature_detection/feature_detection_generic.hpp index dc69ab7..405a9b6 100644 --- a/include/oaknut/feature_detection/feature_detection_generic.hpp +++ b/include/oaknut/feature_detection/feature_detection_generic.hpp @@ -3,7 +3,10 @@ #pragma once +#include + #include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" namespace oaknut { @@ -12,4 +15,9 @@ inline CpuFeatures detect_features() return CpuFeatures{CpuFeature::FP, CpuFeature::ASIMD}; } +inline std::optional read_id_registers() +{ + return std::nullopt; +} + } // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_hwcaps.hpp b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp index a886b56..0985525 100644 --- a/include/oaknut/feature_detection/feature_detection_hwcaps.hpp +++ b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp @@ -96,7 +96,7 @@ inline CpuFeatures detect_features_via_hwcap(unsigned long hwcap, unsigned long OAKNUT_DETECT_CAP2(SME_B16F32, 28) // HWCAP2_SME_B16F32 OAKNUT_DETECT_CAP2(SME_F32F32, 29) // HWCAP2_SME_F32F32 OAKNUT_DETECT_CAP2(SME_FA64, 30) // HWCAP2_SME_FA64 - OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFxT + OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFXT OAKNUT_DETECT_CAP2(EBF16, 32) // HWCAP2_EBF16 OAKNUT_DETECT_CAP2(SVE_EBF16, 33) // HWCAP2_SVE_EBF16 OAKNUT_DETECT_CAP2(CSSC, 34) // HWCAP2_CSSC diff --git a/include/oaknut/feature_detection/feature_detection_idregs.hpp b/include/oaknut/feature_detection/feature_detection_idregs.hpp new file mode 100644 index 0000000..c26e7a9 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_idregs.hpp @@ -0,0 +1,167 @@ +// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +CpuFeatures detect_features_via_id_registers(id::IdRegisters regs) +{ + CpuFeatures result; + + if (regs.pfr0.FP() >= 0) + result |= CpuFeatures{CpuFeature::FP}; + if (regs.pfr0.AdvSIMD() >= 0) + result |= CpuFeatures{CpuFeature::ASIMD}; + if (regs.isar0.AES() >= 1) + result |= CpuFeatures{CpuFeature::AES}; + if (regs.isar0.AES() >= 2) + result |= CpuFeatures{CpuFeature::PMULL}; + if (regs.isar0.SHA1() >= 1) + result |= CpuFeatures{CpuFeature::SHA1}; + if (regs.isar0.SHA2() >= 1) + result |= CpuFeatures{CpuFeature::SHA256}; + if (regs.isar0.CRC32() >= 1) + result |= CpuFeatures{CpuFeature::CRC32}; + if (regs.isar0.Atomic() >= 2) + result |= CpuFeatures{CpuFeature::LSE}; + if (regs.pfr0.FP() >= 1 && regs.pfr0.AdvSIMD() >= 1) + result |= CpuFeatures{CpuFeature::FP16Conv, CpuFeature::FP16}; + if (regs.isar0.RDM() >= 1) + result |= CpuFeatures{CpuFeature::RDM}; + if (regs.isar1.JSCVT() >= 1) + result |= CpuFeatures{CpuFeature::JSCVT}; + if (regs.isar1.FCMA() >= 1) + result |= CpuFeatures{CpuFeature::FCMA}; + if (regs.isar1.LRCPC() >= 1) + result |= CpuFeatures{CpuFeature::LRCPC}; + if (regs.isar1.DPB() >= 1) + result |= CpuFeatures{CpuFeature::DPB}; + if (regs.isar0.SHA3() >= 1) + result |= CpuFeatures{CpuFeature::SHA3}; + if (regs.isar0.SM3() >= 1) + result |= CpuFeatures{CpuFeature::SM3}; + if (regs.isar0.SM4() >= 1) + result |= CpuFeatures{CpuFeature::SM4}; + if (regs.isar0.DP() >= 1) + result |= CpuFeatures{CpuFeature::DotProd}; + if (regs.isar0.SHA2() >= 2) + result |= CpuFeatures{CpuFeature::SHA512}; + if (regs.pfr0.SVE() >= 1) + result |= CpuFeatures{CpuFeature::SVE}; + if (regs.isar0.FHM() >= 1) + result |= CpuFeatures{CpuFeature::FHM}; + if (regs.pfr0.DIT() >= 1) + result |= CpuFeatures{CpuFeature::DIT}; + if (regs.mmfr2.AT() >= 1) + result |= CpuFeatures{CpuFeature::LSE2}; + if (regs.isar1.LRCPC() >= 2) + result |= CpuFeatures{CpuFeature::LRCPC2}; + if (regs.isar0.TS() >= 1) + result |= CpuFeatures{CpuFeature::FlagM}; + if (regs.pfr1.SSBS() >= 2) + result |= CpuFeatures{CpuFeature::SSBS}; + if (regs.isar1.SB() >= 1) + result |= CpuFeatures{CpuFeature::SB}; + if (regs.isar1.APA() >= 1 || regs.isar1.API() >= 1) + result |= CpuFeatures{CpuFeature::PACA}; + if (regs.isar1.GPA() >= 1 || regs.isar1.GPI() >= 1) + result |= CpuFeatures{CpuFeature::PACG}; + if (regs.isar1.DPB() >= 2) + result |= CpuFeatures{CpuFeature::DPB2}; + if (regs.zfr0.SVEver() >= 1) + result |= CpuFeatures{CpuFeature::SVE2}; + if (regs.zfr0.AES() >= 1) + result |= CpuFeatures{CpuFeature::SVE_AES}; + if (regs.zfr0.AES() >= 2) + result |= CpuFeatures{CpuFeature::SVE_PMULL128}; + if (regs.zfr0.BitPerm() >= 1) + result |= CpuFeatures{CpuFeature::SVE_BITPERM}; + if (regs.zfr0.SHA3() >= 1) + result |= CpuFeatures{CpuFeature::SVE_SHA3}; + if (regs.zfr0.SM4() >= 1) + result |= CpuFeatures{CpuFeature::SVE_SM4}; + if (regs.isar0.TS() >= 2) + result |= CpuFeatures{CpuFeature::FlagM2}; + if (regs.isar1.FRINTTS() >= 1) + result |= CpuFeatures{CpuFeature::FRINTTS}; + if (regs.zfr0.I8MM() >= 1) + result |= CpuFeatures{CpuFeature::SVE_I8MM}; + if (regs.zfr0.F32MM() >= 1) + result |= CpuFeatures{CpuFeature::SVE_F32MM}; + if (regs.zfr0.F64MM() >= 1) + result |= CpuFeatures{CpuFeature::SVE_F64MM}; + if (regs.zfr0.BF16() >= 1) + result |= CpuFeatures{CpuFeature::SVE_BF16}; + if (regs.isar1.I8MM() >= 1) + result |= CpuFeatures{CpuFeature::I8MM}; + if (regs.isar1.BF16() >= 1) + result |= CpuFeatures{CpuFeature::BF16}; + if (regs.isar1.DGH() >= 1) + result |= CpuFeatures{CpuFeature::DGH}; + if (regs.isar0.RNDR() >= 1) + result |= CpuFeatures{CpuFeature::RNG}; + if (regs.pfr1.BT() >= 1) + result |= CpuFeatures{CpuFeature::BTI}; + if (regs.pfr1.MTE() >= 2) + result |= CpuFeatures{CpuFeature::MTE}; + if (regs.mmfr0.ECV() >= 1) + result |= CpuFeatures{CpuFeature::ECV}; + if (regs.mmfr1.AFP() >= 1) + result |= CpuFeatures{CpuFeature::AFP}; + if (regs.isar2.RPRES() >= 1) + result |= CpuFeatures{CpuFeature::RPRES}; + if (regs.pfr1.MTE() >= 3) + result |= CpuFeatures{CpuFeature::MTE3}; + if (regs.pfr1.SME() >= 1) + result |= CpuFeatures{CpuFeature::SME}; + if (regs.smfr0.I16I64() == 0b1111) + result |= CpuFeatures{CpuFeature::SME_I16I64}; + if (regs.smfr0.F64F64() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F64F64}; + if (regs.smfr0.I8I32() == 0b1111) + result |= CpuFeatures{CpuFeature::SME_I8I32}; + if (regs.smfr0.F16F32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F16F32}; + if (regs.smfr0.B16F32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_B16F32}; + if (regs.smfr0.F32F32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F32F32}; + if (regs.smfr0.FA64() == 0b1) + result |= CpuFeatures{CpuFeature::SME_FA64}; + if (regs.isar2.WFxT() >= 2) + result |= CpuFeatures{CpuFeature::WFxT}; + if (regs.isar1.BF16() >= 2) + result |= CpuFeatures{CpuFeature::EBF16}; + if (regs.zfr0.BF16() >= 2) + result |= CpuFeatures{CpuFeature::SVE_EBF16}; + if (regs.isar2.CSSC() >= 1) + result |= CpuFeatures{CpuFeature::CSSC}; + if (regs.isar2.RPRFM() >= 1) + result |= CpuFeatures{CpuFeature::RPRFM}; + if (regs.zfr0.SVEver() >= 2) + result |= CpuFeatures{CpuFeature::SVE2p1}; + if (regs.smfr0.SMEver() >= 1) + result |= CpuFeatures{CpuFeature::SME2}; + if (regs.smfr0.SMEver() >= 2) + result |= CpuFeatures{CpuFeature::SME2p1}; + if (regs.smfr0.I16I32() == 0b0101) + result |= CpuFeatures{CpuFeature::SME_I16I32}; + if (regs.smfr0.BI32I32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_BI32I32}; + if (regs.smfr0.B16B16() == 0b1) + result |= CpuFeatures{CpuFeature::SME_B16B16}; + if (regs.smfr0.F16F16() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F16F16}; + if (regs.isar2.MOPS() >= 1) + result |= CpuFeatures{CpuFeature::MOPS}; + if (regs.isar2.BC() >= 1) + result |= CpuFeatures{CpuFeature::HBC}; + + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_linux.hpp b/include/oaknut/feature_detection/feature_detection_linux.hpp index d674102..6310eac 100644 --- a/include/oaknut/feature_detection/feature_detection_linux.hpp +++ b/include/oaknut/feature_detection/feature_detection_linux.hpp @@ -3,10 +3,14 @@ #pragma once +#include + #include #include "oaknut/feature_detection/cpu_feature.hpp" #include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/id_registers.hpp" +#include "oaknut/feature_detection/read_id_registers_directly.hpp" #ifndef AT_HWCAP # define AT_HWCAP 16 @@ -29,4 +33,13 @@ inline CpuFeatures detect_features() return detect_features_via_hwcap(); } +inline std::optional read_id_registers() +{ + constexpr unsigned long hwcap_cpuid = (1 << 11); + if (::getauxval(AT_HWCAP) & hwcap_cpuid) { + return id::read_id_registers_directly(); + } + return std::nullopt; +} + } // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_w32.hpp b/include/oaknut/feature_detection/feature_detection_w32.hpp index 46fd1d1..366a260 100644 --- a/include/oaknut/feature_detection/feature_detection_w32.hpp +++ b/include/oaknut/feature_detection/feature_detection_w32.hpp @@ -9,12 +9,36 @@ #include +#include +#include +#include + #include #include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" namespace oaknut { +namespace detail { + +inline std::optional read_registry_hklm(const std::string& subkey, const std::string& name) +{ + std::uint64_t value; + DWORD value_len = sizeof(value); + if (::RegGetValueA(HKEY_LOCAL_MACHINE, subkey.c_str(), name.c_str(), RRF_RT_REG_QWORD, nullptr, &value, &value_len) == ERROR_SUCCESS) { + return value; + } + return std::nullopt; +} + +inline std::uint64_t read_id_register(std::size_t core_index, const std::string& name) +{ + return read_registry_hklm("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\" + std::to_string(core_index), "CP " + name).value_or(0); +} + +} // namespace detail + // Ref: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent inline CpuFeatures detect_features_via_IsProcessorFeaturePresent() @@ -44,4 +68,32 @@ inline CpuFeatures detect_features() return result; } +inline std::size_t get_core_count() +{ + ::SYSTEM_INFO sys_info; + ::GetSystemInfo(&sys_info); + return sys_info.dwNumberOfProcessors; +} + +inline std::optional read_id_registers(std::size_t core_index) +{ + return id::IdRegisters{ + detail::read_id_register(core_index, "4000"), + id::Pfr0Register{detail::read_id_register(core_index, "4020")}, + id::Pfr1Register{detail::read_id_register(core_index, "4021")}, + id::Pfr2Register{detail::read_id_register(core_index, "4022")}, + id::Zfr0Register{detail::read_id_register(core_index, "4024")}, + id::Smfr0Register{detail::read_id_register(core_index, "4025")}, + id::Isar0Register{detail::read_id_register(core_index, "4030")}, + id::Isar1Register{detail::read_id_register(core_index, "4031")}, + id::Isar2Register{detail::read_id_register(core_index, "4032")}, + id::Isar3Register{detail::read_id_register(core_index, "4033")}, + id::Mmfr0Register{detail::read_id_register(core_index, "4038")}, + id::Mmfr1Register{detail::read_id_register(core_index, "4039")}, + id::Mmfr2Register{detail::read_id_register(core_index, "403A")}, + id::Mmfr3Register{detail::read_id_register(core_index, "403B")}, + id::Mmfr4Register{detail::read_id_register(core_index, "403C")}, + }; +} + } // namespace oaknut diff --git a/include/oaknut/feature_detection/id_registers.hpp b/include/oaknut/feature_detection/id_registers.hpp new file mode 100644 index 0000000..985d78c --- /dev/null +++ b/include/oaknut/feature_detection/id_registers.hpp @@ -0,0 +1,317 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +namespace oaknut::id { + +namespace detail { + +template +constexpr unsigned extract_bit(std::uint64_t value) +{ + return (value >> lsb) & 1; +} + +template +constexpr unsigned extract_field(std::uint64_t value) +{ + return (value >> lsb) & 0xf; +} + +template +constexpr signed extract_signed_field(std::uint64_t value) +{ + return static_cast(static_cast(value << (60 - lsb)) >> 60); +} + +} // namespace detail + +struct Pfr0Register { + std::uint64_t value; + + constexpr signed FP() const { return detail::extract_signed_field<16>(value); } + constexpr signed AdvSIMD() const { return detail::extract_signed_field<20>(value); } + constexpr unsigned GIC() const { return detail::extract_field<24>(value); } + constexpr unsigned RAS() const { return detail::extract_field<28>(value); } + constexpr unsigned SVE() const { return detail::extract_field<32>(value); } + constexpr unsigned SEL2() const { return detail::extract_field<36>(value); } + constexpr unsigned MPAM() const { return detail::extract_field<40>(value); } + constexpr unsigned AMU() const { return detail::extract_field<44>(value); } + constexpr unsigned DIT() const { return detail::extract_field<48>(value); } + constexpr unsigned RME() const { return detail::extract_field<52>(value); } + constexpr unsigned CSV2() const { return detail::extract_field<56>(value); } + constexpr unsigned CSV3() const { return detail::extract_field<60>(value); } +}; + +struct Pfr1Register { + std::uint64_t value; + + constexpr unsigned BT() const { return detail::extract_field<0>(value); } + constexpr unsigned SSBS() const { return detail::extract_field<4>(value); } + constexpr unsigned MTE() const { return detail::extract_field<8>(value); } + constexpr unsigned RAS_frac() const { return detail::extract_field<12>(value); } + constexpr unsigned MPAM_frac() const { return detail::extract_field<16>(value); } + // [20:23] - reserved + constexpr unsigned SME() const { return detail::extract_field<24>(value); } + constexpr unsigned RNDR_trap() const { return detail::extract_field<28>(value); } + constexpr unsigned CSV2_frac() const { return detail::extract_field<32>(value); } + constexpr unsigned NMI() const { return detail::extract_field<36>(value); } + constexpr unsigned MTE_frac() const { return detail::extract_field<40>(value); } + constexpr unsigned GCS() const { return detail::extract_field<44>(value); } + constexpr unsigned THE() const { return detail::extract_field<48>(value); } + constexpr unsigned MTEX() const { return detail::extract_field<52>(value); } + constexpr unsigned DF2() const { return detail::extract_field<56>(value); } + constexpr unsigned PFAR() const { return detail::extract_field<60>(value); } +}; + +struct Pfr2Register { + std::uint64_t value; + + constexpr unsigned MTEPERM() const { return detail::extract_field<0>(value); } + constexpr unsigned MTESTOREONLY() const { return detail::extract_field<4>(value); } + constexpr unsigned MTEFAR() const { return detail::extract_field<8>(value); } + // [12:31] reserved + constexpr unsigned FPMR() const { return detail::extract_field<32>(value); } + // [36:63] reserved +}; + +struct Zfr0Register { + std::uint64_t value; + + constexpr unsigned SVEver() const { return detail::extract_field<0>(value); } + constexpr unsigned AES() const { return detail::extract_field<4>(value); } + // [8:15] reserved + constexpr unsigned BitPerm() const { return detail::extract_field<16>(value); } + constexpr unsigned BF16() const { return detail::extract_field<20>(value); } + constexpr unsigned B16B16() const { return detail::extract_field<24>(value); } + // [28:31] reserved + constexpr unsigned SHA3() const { return detail::extract_field<32>(value); } + // [36:39] reserved + constexpr unsigned SM4() const { return detail::extract_field<40>(value); } + constexpr unsigned I8MM() const { return detail::extract_field<44>(value); } + // [48:51] reserved + constexpr unsigned F32MM() const { return detail::extract_field<52>(value); } + constexpr unsigned F64MM() const { return detail::extract_field<56>(value); } + // [60:63] reserved +}; + +struct Smfr0Register { + std::uint64_t value; + + // [0:27] reserved + constexpr unsigned SF8DP2() const { return detail::extract_bit<28>(value); } + constexpr unsigned SF8DP4() const { return detail::extract_bit<29>(value); } + constexpr unsigned SF8FMA() const { return detail::extract_bit<30>(value); } + // [31] reserved + constexpr unsigned F32F32() const { return detail::extract_bit<32>(value); } + constexpr unsigned BI32I32() const { return detail::extract_bit<33>(value); } + constexpr unsigned B16F32() const { return detail::extract_bit<34>(value); } + constexpr unsigned F16F32() const { return detail::extract_bit<35>(value); } + constexpr unsigned I8I32() const { return detail::extract_field<36>(value); } + constexpr unsigned F8F32() const { return detail::extract_bit<40>(value); } + constexpr unsigned F8F16() const { return detail::extract_bit<41>(value); } + constexpr unsigned F16F16() const { return detail::extract_bit<42>(value); } + constexpr unsigned B16B16() const { return detail::extract_bit<43>(value); } + constexpr unsigned I16I32() const { return detail::extract_field<44>(value); } + constexpr unsigned F64F64() const { return detail::extract_bit<48>(value); } + // [49:51] reserved + constexpr unsigned I16I64() const { return detail::extract_field<52>(value); } + constexpr unsigned SMEver() const { return detail::extract_field<56>(value); } + constexpr unsigned LUTv2() const { return detail::extract_bit<60>(value); } + // [61:62] reserved + constexpr unsigned FA64() const { return detail::extract_bit<63>(value); } +}; + +struct Isar0Register { + std::uint64_t value; + + // [0:3] reserved + constexpr unsigned AES() const { return detail::extract_field<4>(value); } + constexpr unsigned SHA1() const { return detail::extract_field<8>(value); } + constexpr unsigned SHA2() const { return detail::extract_field<12>(value); } + constexpr unsigned CRC32() const { return detail::extract_field<16>(value); } + constexpr unsigned Atomic() const { return detail::extract_field<20>(value); } + constexpr unsigned TME() const { return detail::extract_field<24>(value); } + constexpr unsigned RDM() const { return detail::extract_field<28>(value); } + constexpr unsigned SHA3() const { return detail::extract_field<32>(value); } + constexpr unsigned SM3() const { return detail::extract_field<36>(value); } + constexpr unsigned SM4() const { return detail::extract_field<40>(value); } + constexpr unsigned DP() const { return detail::extract_field<44>(value); } + constexpr unsigned FHM() const { return detail::extract_field<48>(value); } + constexpr unsigned TS() const { return detail::extract_field<52>(value); } + constexpr unsigned TLB() const { return detail::extract_field<56>(value); } + constexpr unsigned RNDR() const { return detail::extract_field<60>(value); } +}; + +struct Isar1Register { + std::uint64_t value; + + constexpr unsigned DPB() const { return detail::extract_field<0>(value); } + constexpr unsigned APA() const { return detail::extract_field<4>(value); } + constexpr unsigned API() const { return detail::extract_field<8>(value); } + constexpr unsigned JSCVT() const { return detail::extract_field<12>(value); } + constexpr unsigned FCMA() const { return detail::extract_field<16>(value); } + constexpr unsigned LRCPC() const { return detail::extract_field<20>(value); } + constexpr unsigned GPA() const { return detail::extract_field<24>(value); } + constexpr unsigned GPI() const { return detail::extract_field<28>(value); } + constexpr unsigned FRINTTS() const { return detail::extract_field<32>(value); } + constexpr unsigned SB() const { return detail::extract_field<36>(value); } + constexpr unsigned SPECRES() const { return detail::extract_field<40>(value); } + constexpr unsigned BF16() const { return detail::extract_field<44>(value); } + constexpr unsigned DGH() const { return detail::extract_field<48>(value); } + constexpr unsigned I8MM() const { return detail::extract_field<52>(value); } + constexpr unsigned XS() const { return detail::extract_field<56>(value); } + constexpr unsigned LS64() const { return detail::extract_field<60>(value); } +}; + +struct Isar2Register { + std::uint64_t value; + + constexpr unsigned WFxT() const { return detail::extract_field<0>(value); } + constexpr unsigned RPRES() const { return detail::extract_field<4>(value); } + constexpr unsigned GPA3() const { return detail::extract_field<8>(value); } + constexpr unsigned APA3() const { return detail::extract_field<12>(value); } + constexpr unsigned MOPS() const { return detail::extract_field<16>(value); } + constexpr unsigned BC() const { return detail::extract_field<20>(value); } + constexpr unsigned PAC_frac() const { return detail::extract_field<24>(value); } + constexpr unsigned CLRBHB() const { return detail::extract_field<28>(value); } + constexpr unsigned SYSREG_128() const { return detail::extract_field<32>(value); } + constexpr unsigned SYSINSTR_128() const { return detail::extract_field<36>(value); } + constexpr unsigned PRFMSLC() const { return detail::extract_field<40>(value); } + // [44:47] reserved + constexpr unsigned RPRFM() const { return detail::extract_field<48>(value); } + constexpr unsigned CSSC() const { return detail::extract_field<52>(value); } + constexpr unsigned LUT() const { return detail::extract_field<56>(value); } + constexpr unsigned ATS1A() const { return detail::extract_field<60>(value); } +}; + +struct Isar3Register { + std::uint64_t value; + + constexpr unsigned CPA() const { return detail::extract_field<0>(value); } + constexpr unsigned FAMINMAX() const { return detail::extract_field<4>(value); } + constexpr unsigned TLBIW() const { return detail::extract_field<8>(value); } + // [12:63] reserved +}; + +struct Mmfr0Register { + std::uint64_t value; + + constexpr unsigned PARange() const { return detail::extract_field<0>(value); } + constexpr unsigned ASIDBits() const { return detail::extract_field<4>(value); } + constexpr unsigned BigEnd() const { return detail::extract_field<8>(value); } + constexpr unsigned SNSMem() const { return detail::extract_field<12>(value); } + constexpr unsigned BigEndEL0() const { return detail::extract_field<16>(value); } + constexpr unsigned TGran16() const { return detail::extract_field<20>(value); } + constexpr unsigned TGran64() const { return detail::extract_field<24>(value); } + constexpr unsigned TGran4() const { return detail::extract_field<28>(value); } + constexpr unsigned TGran16_2() const { return detail::extract_field<32>(value); } + constexpr unsigned TGran64_2() const { return detail::extract_field<36>(value); } + constexpr unsigned TGran4_2() const { return detail::extract_field<40>(value); } + constexpr unsigned ExS() const { return detail::extract_field<44>(value); } + // [48:55] reserved + constexpr unsigned FGT() const { return detail::extract_field<56>(value); } + constexpr unsigned ECV() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr1Register { + std::uint64_t value; + + constexpr unsigned HAFDBS() const { return detail::extract_field<0>(value); } + constexpr unsigned VMIDBits() const { return detail::extract_field<4>(value); } + constexpr unsigned VH() const { return detail::extract_field<8>(value); } + constexpr unsigned HPDS() const { return detail::extract_field<12>(value); } + constexpr unsigned LO() const { return detail::extract_field<16>(value); } + constexpr unsigned PAN() const { return detail::extract_field<20>(value); } + constexpr unsigned SpecSEI() const { return detail::extract_field<24>(value); } + constexpr unsigned XNX() const { return detail::extract_field<28>(value); } + constexpr unsigned TWED() const { return detail::extract_field<32>(value); } + constexpr unsigned ETS() const { return detail::extract_field<36>(value); } + constexpr unsigned HCX() const { return detail::extract_field<40>(value); } + constexpr unsigned AFP() const { return detail::extract_field<44>(value); } + constexpr unsigned nTLBPA() const { return detail::extract_field<48>(value); } + constexpr unsigned TIDCP1() const { return detail::extract_field<52>(value); } + constexpr unsigned CMOW() const { return detail::extract_field<56>(value); } + constexpr unsigned ECBHB() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr2Register { + std::uint64_t value; + + constexpr unsigned CnP() const { return detail::extract_field<0>(value); } + constexpr unsigned UAO() const { return detail::extract_field<4>(value); } + constexpr unsigned LSM() const { return detail::extract_field<8>(value); } + constexpr unsigned IESB() const { return detail::extract_field<12>(value); } + constexpr unsigned VARange() const { return detail::extract_field<16>(value); } + constexpr unsigned CCIDX() const { return detail::extract_field<20>(value); } + constexpr unsigned NV() const { return detail::extract_field<24>(value); } + constexpr unsigned ST() const { return detail::extract_field<28>(value); } + constexpr unsigned AT() const { return detail::extract_field<32>(value); } + constexpr unsigned IDS() const { return detail::extract_field<36>(value); } + constexpr unsigned FWB() const { return detail::extract_field<40>(value); } + // [44:47] reserved + constexpr unsigned TTL() const { return detail::extract_field<48>(value); } + constexpr unsigned BBM() const { return detail::extract_field<52>(value); } + constexpr unsigned EVT() const { return detail::extract_field<56>(value); } + constexpr unsigned E0PD() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr3Register { + std::uint64_t value; + + constexpr unsigned TCRX() const { return detail::extract_field<0>(value); } + constexpr unsigned SCTLRX() const { return detail::extract_field<4>(value); } + constexpr unsigned S1PIE() const { return detail::extract_field<8>(value); } + constexpr unsigned S2PIE() const { return detail::extract_field<12>(value); } + constexpr unsigned S1POE() const { return detail::extract_field<16>(value); } + constexpr unsigned S2POE() const { return detail::extract_field<20>(value); } + constexpr unsigned AIE() const { return detail::extract_field<24>(value); } + constexpr unsigned MEC() const { return detail::extract_field<28>(value); } + constexpr unsigned D128() const { return detail::extract_field<32>(value); } + constexpr unsigned D128_2() const { return detail::extract_field<36>(value); } + constexpr unsigned SNERR() const { return detail::extract_field<40>(value); } + constexpr unsigned ANERR() const { return detail::extract_field<44>(value); } + // [48:51] reserved + constexpr unsigned SDERR() const { return detail::extract_field<52>(value); } + constexpr unsigned ADERR() const { return detail::extract_field<56>(value); } + constexpr unsigned Spec_FPACC() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr4Register { + std::uint64_t value; + + // [0:3] reserved + constexpr unsigned EIESB() const { return detail::extract_field<4>(value); } + constexpr unsigned ASID2() const { return detail::extract_field<8>(value); } + constexpr unsigned HACDBS() const { return detail::extract_field<12>(value); } + constexpr unsigned FGWTE3() const { return detail::extract_field<16>(value); } + constexpr unsigned NV_frac() const { return detail::extract_field<20>(value); } + constexpr unsigned E2H0() const { return detail::extract_field<24>(value); } + // [28:35] reserved + constexpr unsigned E3DSE() const { return detail::extract_field<36>(value); } + // [40:63] reserved +}; + +struct IdRegisters { + std::uint64_t midr; + Pfr0Register pfr0; + Pfr1Register pfr1; + Pfr2Register pfr2; + Zfr0Register zfr0; + Smfr0Register smfr0; + Isar0Register isar0; + Isar1Register isar1; + Isar2Register isar2; + Isar3Register isar3; + Mmfr0Register mmfr0; + Mmfr1Register mmfr1; + Mmfr2Register mmfr2; + Mmfr3Register mmfr3; + Mmfr4Register mmfr4; +}; + +} // namespace oaknut::id diff --git a/include/oaknut/feature_detection/read_id_registers_directly.hpp b/include/oaknut/feature_detection/read_id_registers_directly.hpp new file mode 100644 index 0000000..04db518 --- /dev/null +++ b/include/oaknut/feature_detection/read_id_registers_directly.hpp @@ -0,0 +1,52 @@ +#include + +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut::id { + +inline IdRegisters read_id_registers_directly() +{ + std::uint64_t midr, pfr0, pfr1, pfr2, isar0, isar1, isar2, isar3, mmfr0, mmfr1, mmfr2, mmfr3, mmfr4, zfr0, smfr0; + +#define OAKNUT_READ_REGISTER(reg, var) \ + __asm__("mrs %0, " #reg \ + : "=r"(var)) + + OAKNUT_READ_REGISTER(s3_0_c0_c0_0, midr); + OAKNUT_READ_REGISTER(s3_0_c0_c4_0, pfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c4_1, pfr1); + OAKNUT_READ_REGISTER(s3_0_c0_c4_2, pfr2); + OAKNUT_READ_REGISTER(s3_0_c0_c4_4, zfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c4_5, smfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c6_0, isar0); + OAKNUT_READ_REGISTER(s3_0_c0_c6_1, isar1); + OAKNUT_READ_REGISTER(s3_0_c0_c6_2, isar2); + OAKNUT_READ_REGISTER(s3_0_c0_c6_3, isar3); + OAKNUT_READ_REGISTER(s3_0_c0_c7_0, mmfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c7_1, mmfr1); + OAKNUT_READ_REGISTER(s3_0_c0_c7_2, mmfr2); + OAKNUT_READ_REGISTER(s3_0_c0_c7_3, mmfr3); + OAKNUT_READ_REGISTER(s3_0_c0_c7_4, mmfr4); + +#undef OAKNUT_READ_ID_REGISTER + + return IdRegisters{ + midr, + Pfr0Register{pfr0}, + Pfr1Register{pfr1}, + Pfr2Register{pfr2}, + Zfr0Register{zfr0}, + Smfr0Register{smfr0}, + Isar0Register{isar0}, + Isar1Register{isar1}, + Isar2Register{isar2}, + Isar3Register{isar3}, + Mmfr0Register{mmfr0}, + Mmfr1Register{mmfr1}, + Mmfr2Register{mmfr2}, + Mmfr3Register{mmfr3}, + Mmfr4Register{mmfr4}, + }; +} + +} // namespace oaknut::id diff --git a/tests/_feature_detect.cpp b/tests/_feature_detect.cpp index 84993f0..875abac 100644 --- a/tests/_feature_detect.cpp +++ b/tests/_feature_detect.cpp @@ -6,10 +6,11 @@ #include #include "oaknut/feature_detection/feature_detection.hpp" +#include "oaknut/feature_detection/feature_detection_idregs.hpp" using namespace oaknut; -TEST_CASE("Print CPU features") +TEST_CASE("Print CPU features (Default)") { CpuFeatures features = detect_features(); @@ -23,3 +24,48 @@ TEST_CASE("Print CPU features") std::fputs("\n", stdout); } + +#if OAKNUT_SUPPORTS_READING_ID_REGISTERS == 1 + +TEST_CASE("Print CPU features (Using CPUID)") +{ + std::optional id_regs = read_id_registers(); + REQUIRE(!!id_regs); + + CpuFeatures features = detect_features_via_id_registers(*id_regs); + + std::fputs("CPU Features (CPUID method): ", stdout); + +# define OAKNUT_CPU_FEATURE(name) \ + if (features.has(CpuFeature::name)) \ + std::fputs(#name " ", stdout); +# include "oaknut/impl/cpu_feature.inc.hpp" +# undef OAKNUT_CPU_FEATURE + + std::fputs("\n", stdout); +} + +#elif OAKNUT_SUPPORTS_READING_ID_REGISTERS == 2 + +TEST_CASE("Print CPU features (Using CPUID)") +{ + const std::size_t core_count = get_core_count(); + for (std::size_t core_index = 0; core_index < core_count; core_index++) { + std::optional id_regs = read_id_registers(core_index); + REQUIRE(!!id_regs); + + CpuFeatures features = detect_features_via_id_registers(*id_regs); + + std::printf("CPU Features (CPUID method - Core %zu): ", core_index); + +# define OAKNUT_CPU_FEATURE(name) \ + if (features.has(CpuFeature::name)) \ + std::fputs(#name " ", stdout); +# include "oaknut/impl/cpu_feature.inc.hpp" +# undef OAKNUT_CPU_FEATURE + + std::fputs("\n", stdout); + } +} + +#endif