diff --git a/CMakeLists.txt b/CMakeLists.txt index 35f8ef9..5f07284 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,11 @@ endif() # Source project files set(header_files + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/cpu_feature.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/feature_detection.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/cpu_feature.inc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp @@ -32,7 +36,6 @@ set(header_files ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/overloaded.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/string_literal.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut_exception.hpp ) @@ -54,6 +57,7 @@ if (MASTER_PROJECT) endif() add_executable(oaknut-tests + tests/_feature_detect.cpp tests/basic.cpp tests/fpsimd.cpp tests/general.cpp diff --git a/include/oaknut/feature_detection/cpu_feature.hpp b/include/oaknut/feature_detection/cpu_feature.hpp new file mode 100644 index 0000000..9f70c5b --- /dev/null +++ b/include/oaknut/feature_detection/cpu_feature.hpp @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include + +#if defined(__cpp_lib_constexpr_bitset) && __cpp_lib_constexpr_bitset >= 202207L +# define OAKNUT_CPU_FEATURES_CONSTEXPR constexpr +#else +# define OAKNUT_CPU_FEATURES_CONSTEXPR +#endif + +namespace oaknut { + +// NOTE: This file contains code that can be compiled on non-arm64 systems. +// For run-time CPU feature detection, include feature_detection.hpp + +enum class CpuFeature { +#define OAKNUT_CPU_FEATURE(name) name, +#include "oaknut/impl/cpu_feature.inc.hpp" +#undef OAKNUT_CPU_FEATURE +}; + +constexpr std::size_t cpu_feature_count = 0 +#define OAKNUT_CPU_FEATURE(name) +1 +#include "oaknut/impl/cpu_feature.inc.hpp" +#undef OAKNUT_CPU_FEATURE + ; + +class CpuFeatures final { +public: + constexpr CpuFeatures() = default; + + OAKNUT_CPU_FEATURES_CONSTEXPR explicit CpuFeatures(std::initializer_list features) + { + for (CpuFeature f : features) { + m_bitset.set(static_cast(f)); + } + } + + constexpr bool has(CpuFeature feature) const + { + if (static_cast(feature) >= cpu_feature_count) + return false; + return m_bitset[static_cast(feature)]; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator&=(const CpuFeatures& other) noexcept + { + m_bitset &= other.m_bitset; + return *this; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator|=(const CpuFeatures& other) noexcept + { + m_bitset |= other.m_bitset; + return *this; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator^=(const CpuFeatures& other) noexcept + { + m_bitset ^= other.m_bitset; + return *this; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator~() const noexcept + { + CpuFeatures result; + result.m_bitset = ~m_bitset; + return result; + } + +private: + using bitset = std::bitset; + + friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept; + friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept; + friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept; + + bitset m_bitset; +}; + +OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept +{ + CpuFeatures result; + result.m_bitset = a.m_bitset & b.m_bitset; + return result; +} + +OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept +{ + CpuFeatures result; + result.m_bitset = a.m_bitset | b.m_bitset; + return result; +} + +OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept +{ + CpuFeatures result; + result.m_bitset = a.m_bitset ^ b.m_bitset; + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection.hpp b/include/oaknut/feature_detection/feature_detection.hpp new file mode 100644 index 0000000..b779939 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection.hpp @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#if defined(__APPLE__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# include "oaknut/feature_detection/feature_detection_apple.hpp" +#elif defined(__linux__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# include "oaknut/feature_detection/feature_detection_linux.hpp" +#elif defined(__FreeBSD__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# include "oaknut/feature_detection/feature_detection_freebsd.hpp" +#elif defined(_WIN32) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# include "oaknut/feature_detection/feature_detection_w32.hpp" +#else +# define OAKNUT_CPU_FEATURE_DETECTION 0 +# warning "Unsupported operating system for CPU feature detection" +# include "oaknut/feature_detection/feature_detection_generic.hpp" +#endif diff --git a/include/oaknut/feature_detection/feature_detection_apple.hpp b/include/oaknut/feature_detection/feature_detection_apple.hpp new file mode 100644 index 0000000..145ceff --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_apple.hpp @@ -0,0 +1,105 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" + +namespace oaknut { + +// Ref: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics + +namespace detail { + +inline bool detect_feature(const char* const sysctl_name) +{ + int result = 0; + std::size_t result_size = sizeof(result); + if (::sysctlbyname(sysctl_name, &result, &result_size, nullptr, 0) == 0) { + return result != 0; + } + return false; +} + +} // namespace detail + +inline CpuFeatures detect_features_via_sysctlbyname() +{ + CpuFeatures result; + + if (detail::detect_feature("hw.optional.AdvSIMD") || detail::detect_feature("hw.optional.neon")) + result |= CpuFeatures{CpuFeature::ASIMD}; + if (detail::detect_feature("hw.optional.floatingpoint")) + result |= CpuFeatures{CpuFeature::FP}; + if (detail::detect_feature("hw.optional.AdvSIMD_HPFPCvt") || detail::detect_feature("hw.optional.neon_hpfp")) + result |= CpuFeatures{CpuFeature::FP16Conv}; + if (detail::detect_feature("hw.optional.arm.FEAT_BF16")) + result |= CpuFeatures{CpuFeature::BF16}; + if (detail::detect_feature("hw.optional.arm.FEAT_DotProd")) + result |= CpuFeatures{CpuFeature::DotProd}; + if (detail::detect_feature("hw.optional.arm.FEAT_FCMA") || detail::detect_feature("hw.optional.armv8_3_compnum")) + result |= CpuFeatures{CpuFeature::FCMA}; + if (detail::detect_feature("hw.optional.arm.FEAT_FHM") || detail::detect_feature("hw.optional.armv8_2_fhm")) + result |= CpuFeatures{CpuFeature::FHM}; + if (detail::detect_feature("hw.optional.arm.FEAT_FP16") || detail::detect_feature("hw.optional.neon_fp16")) + result |= CpuFeatures{CpuFeature::FP16}; + if (detail::detect_feature("hw.optional.arm.FEAT_FRINTTS")) + result |= CpuFeatures{CpuFeature::FRINTTS}; + if (detail::detect_feature("hw.optional.arm.FEAT_I8MM")) + result |= CpuFeatures{CpuFeature::I8MM}; + if (detail::detect_feature("hw.optional.arm.FEAT_JSCVT")) + result |= CpuFeatures{CpuFeature::JSCVT}; + if (detail::detect_feature("hw.optional.arm.FEAT_RDM")) + result |= CpuFeatures{CpuFeature::RDM}; + if (detail::detect_feature("hw.optional.arm.FEAT_FlagM")) + result |= CpuFeatures{CpuFeature::FlagM}; + if (detail::detect_feature("hw.optional.arm.FEAT_FlagM2")) + result |= CpuFeatures{CpuFeature::FlagM2}; + if (detail::detect_feature("hw.optional.armv8_crc32")) + result |= CpuFeatures{CpuFeature::CRC32}; + if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC")) + result |= CpuFeatures{CpuFeature::LRCPC}; + if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC2")) + result |= CpuFeatures{CpuFeature::LRCPC2}; + if (detail::detect_feature("hw.optional.arm.FEAT_LSE") || detail::detect_feature("hw.optional.armv8_1_atomics")) + result |= CpuFeatures{CpuFeature::LSE}; + if (detail::detect_feature("hw.optional.arm.FEAT_LSE2")) + result |= CpuFeatures{CpuFeature::LSE2}; + if (detail::detect_feature("hw.optional.arm.FEAT_AES")) + result |= CpuFeatures{CpuFeature::AES}; + if (detail::detect_feature("hw.optional.arm.FEAT_PMULL")) + result |= CpuFeatures{CpuFeature::PMULL}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA1")) + result |= CpuFeatures{CpuFeature::SHA1}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA256")) + result |= CpuFeatures{CpuFeature::SHA256}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA512") || detail::detect_feature("hw.optional.armv8_2_sha512")) + result |= CpuFeatures{CpuFeature::SHA512}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA3") || detail::detect_feature("hw.optional.armv8_2_sha3")) + result |= CpuFeatures{CpuFeature::SHA3}; + if (detail::detect_feature("hw.optional.arm.FEAT_BTI")) + result |= CpuFeatures{CpuFeature::BTI}; + if (detail::detect_feature("hw.optional.arm.FEAT_DPB")) + result |= CpuFeatures{CpuFeature::DPB}; + if (detail::detect_feature("hw.optional.arm.FEAT_DPB2")) + result |= CpuFeatures{CpuFeature::DPB2}; + if (detail::detect_feature("hw.optional.arm.FEAT_ECV")) + result |= CpuFeatures{CpuFeature::ECV}; + if (detail::detect_feature("hw.optional.arm.FEAT_SB")) + result |= CpuFeatures{CpuFeature::SB}; + if (detail::detect_feature("hw.optional.arm.FEAT_SSBS")) + result |= CpuFeatures{CpuFeature::SSBS}; + + return result; +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_sysctlbyname(); +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_freebsd.hpp b/include/oaknut/feature_detection/feature_detection_freebsd.hpp new file mode 100644 index 0000000..91a15d0 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_freebsd.hpp @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" + +#ifndef AT_HWCAP +# define AT_HWCAP 16 +#endif +#ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +#endif + +namespace oaknut { + +namespace detail { + +unsigned long getauxval(int aux) +{ + unsigned long result = 0; + if (::elf_aux_info(aux, &result, static_cast(sizeof result)) == 0) { + return result; + } + return 0; +} + +} // namespace detail + +inline CpuFeatures detect_features_via_hwcap() +{ + const unsigned long hwcap = detail::getauxval(AT_HWCAP); + const unsigned long hwcap2 = detail::getauxval(AT_HWCAP2); + return detect_features_via_hwcap(hwcap, hwcap2); +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_hwcap(); +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_generic.hpp b/include/oaknut/feature_detection/feature_detection_generic.hpp new file mode 100644 index 0000000..dc69ab7 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_generic.hpp @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include "oaknut/feature_detection/cpu_feature.hpp" + +namespace oaknut { + +inline CpuFeatures detect_features() +{ + return CpuFeatures{CpuFeature::FP, CpuFeature::ASIMD}; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_hwcaps.hpp b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp new file mode 100644 index 0000000..a886b56 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" + +namespace oaknut { + +namespace detail { + +template +constexpr bool bit_test(unsigned long value) +{ + return (((value >> bits) & 1) && ...); +} + +} // namespace detail + +inline CpuFeatures detect_features_via_hwcap(unsigned long hwcap, unsigned long hwcap2) +{ + CpuFeatures result; + +#define OAKNUT_DETECT_CAP(FEAT, ...) \ + if (detail::bit_test<__VA_ARGS__>(hwcap)) { \ + result |= CpuFeatures{CpuFeature::FEAT}; \ + } +#define OAKNUT_DETECT_CAP2(FEAT, ...) \ + if (detail::bit_test<__VA_ARGS__>(hwcap2)) { \ + result |= CpuFeatures{CpuFeature::FEAT}; \ + } + + OAKNUT_DETECT_CAP(FP, 0) // HWCAP_FP + OAKNUT_DETECT_CAP(ASIMD, 1) // HWCAP_ASIMD + // HWCAP_EVTSTRM (2) + OAKNUT_DETECT_CAP(AES, 3) // HWCAP_AES + OAKNUT_DETECT_CAP(PMULL, 4) // HWCAP_PMULL + OAKNUT_DETECT_CAP(SHA1, 5) // HWCAP_SHA1 + OAKNUT_DETECT_CAP(SHA256, 6) // HWCAP_SHA2 + OAKNUT_DETECT_CAP(CRC32, 7) // HWCAP_CRC32 + OAKNUT_DETECT_CAP(LSE, 8) // HWCAP_ATOMICS + OAKNUT_DETECT_CAP(FP16Conv, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP + OAKNUT_DETECT_CAP(FP16, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP + // HWCAP_CPUID (11) + OAKNUT_DETECT_CAP(RDM, 12) // HWCAP_ASIMDRDM + OAKNUT_DETECT_CAP(JSCVT, 13) // HWCAP_JSCVT + OAKNUT_DETECT_CAP(FCMA, 14) // HWCAP_FCMA + OAKNUT_DETECT_CAP(LRCPC, 15) // HWCAP_LRCPC + OAKNUT_DETECT_CAP(DPB, 16) // HWCAP_DCPOP + OAKNUT_DETECT_CAP(SHA3, 17) // HWCAP_SHA3 + OAKNUT_DETECT_CAP(SM3, 18) // HWCAP_SM3 + OAKNUT_DETECT_CAP(SM4, 19) // HWCAP_SM4 + OAKNUT_DETECT_CAP(DotProd, 20) // HWCAP_ASIMDDP + OAKNUT_DETECT_CAP(SHA512, 21) // HWCAP_SHA512 + OAKNUT_DETECT_CAP(SVE, 22) // HWCAP_SVE + OAKNUT_DETECT_CAP(FHM, 23) // HWCAP_ASIMDFHM + OAKNUT_DETECT_CAP(DIT, 24) // HWCAP_DIT + OAKNUT_DETECT_CAP(LSE2, 25) // HWCAP_USCAT + OAKNUT_DETECT_CAP(LRCPC2, 26) // HWCAP_ILRCPC + OAKNUT_DETECT_CAP(FlagM, 27) // HWCAP_FLAGM + OAKNUT_DETECT_CAP(SSBS, 28) // HWCAP_SSBS + OAKNUT_DETECT_CAP(SB, 29) // HWCAP_SB + OAKNUT_DETECT_CAP(PACA, 30) // HWCAP_PACA + OAKNUT_DETECT_CAP(PACG, 31) // HWCAP_PACG + + OAKNUT_DETECT_CAP2(DPB2, 0) // HWCAP2_DCPODP + OAKNUT_DETECT_CAP2(SVE2, 1) // HWCAP2_SVE2 + OAKNUT_DETECT_CAP2(SVE_AES, 2) // HWCAP2_SVEAES + OAKNUT_DETECT_CAP2(SVE_PMULL128, 3) // HWCAP2_SVEPMULL + OAKNUT_DETECT_CAP2(SVE_BITPERM, 4) // HWCAP2_SVEBITPERM + OAKNUT_DETECT_CAP2(SVE_SHA3, 5) // HWCAP2_SVESHA3 + OAKNUT_DETECT_CAP2(SVE_SM4, 6) // HWCAP2_SVESM4 + OAKNUT_DETECT_CAP2(FlagM2, 7) // HWCAP2_FLAGM2 + OAKNUT_DETECT_CAP2(FRINTTS, 8) // HWCAP2_FRINT + OAKNUT_DETECT_CAP2(SVE_I8MM, 9) // HWCAP2_SVEI8MM + OAKNUT_DETECT_CAP2(SVE_F32MM, 10) // HWCAP2_SVEF32MM + OAKNUT_DETECT_CAP2(SVE_F64MM, 11) // HWCAP2_SVEF64MM + OAKNUT_DETECT_CAP2(SVE_BF16, 12) // HWCAP2_SVEBF16 + OAKNUT_DETECT_CAP2(I8MM, 13) // HWCAP2_I8MM + OAKNUT_DETECT_CAP2(BF16, 14) // HWCAP2_BF16 + OAKNUT_DETECT_CAP2(DGH, 15) // HWCAP2_DGH + OAKNUT_DETECT_CAP2(RNG, 16) // HWCAP2_RNG + OAKNUT_DETECT_CAP2(BTI, 17) // HWCAP2_BTI + OAKNUT_DETECT_CAP2(MTE, 18) // HWCAP2_MTE + OAKNUT_DETECT_CAP2(ECV, 19) // HWCAP2_ECV + OAKNUT_DETECT_CAP2(AFP, 20) // HWCAP2_AFP + OAKNUT_DETECT_CAP2(RPRES, 21) // HWCAP2_RPRES + OAKNUT_DETECT_CAP2(MTE3, 22) // HWCAP2_MTE3 + OAKNUT_DETECT_CAP2(SME, 23) // HWCAP2_SME + OAKNUT_DETECT_CAP2(SME_I16I64, 24) // HWCAP2_SME_I16I64 + OAKNUT_DETECT_CAP2(SME_F64F64, 25) // HWCAP2_SME_F64F64 + OAKNUT_DETECT_CAP2(SME_I8I32, 26) // HWCAP2_SME_I8I32 + OAKNUT_DETECT_CAP2(SME_F16F32, 27) // HWCAP2_SME_F16F32 + OAKNUT_DETECT_CAP2(SME_B16F32, 28) // HWCAP2_SME_B16F32 + OAKNUT_DETECT_CAP2(SME_F32F32, 29) // HWCAP2_SME_F32F32 + OAKNUT_DETECT_CAP2(SME_FA64, 30) // HWCAP2_SME_FA64 + OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFxT + OAKNUT_DETECT_CAP2(EBF16, 32) // HWCAP2_EBF16 + OAKNUT_DETECT_CAP2(SVE_EBF16, 33) // HWCAP2_SVE_EBF16 + OAKNUT_DETECT_CAP2(CSSC, 34) // HWCAP2_CSSC + OAKNUT_DETECT_CAP2(RPRFM, 35) // HWCAP2_RPRFM + OAKNUT_DETECT_CAP2(SVE2p1, 36) // HWCAP2_SVE2P1 + OAKNUT_DETECT_CAP2(SME2, 37) // HWCAP2_SME2 + OAKNUT_DETECT_CAP2(SME2p1, 38) // HWCAP2_SME2P1 + OAKNUT_DETECT_CAP2(SME_I16I32, 39) // HWCAP2_SME_I16I32 + OAKNUT_DETECT_CAP2(SME_BI32I32, 40) // HWCAP2_SME_BI32I32 + OAKNUT_DETECT_CAP2(SME_B16B16, 41) // HWCAP2_SME_B16B16 + OAKNUT_DETECT_CAP2(SME_F16F16, 42) // HWCAP2_SME_F16F16 + OAKNUT_DETECT_CAP2(MOPS, 43) // HWCAP2_MOPS + OAKNUT_DETECT_CAP2(HBC, 44) // HWCAP2_HBC + +#undef OAKNUT_DETECT_CAP +#undef OAKNUT_DETECT_CAP2 + + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_linux.hpp b/include/oaknut/feature_detection/feature_detection_linux.hpp new file mode 100644 index 0000000..d674102 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_linux.hpp @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" + +#ifndef AT_HWCAP +# define AT_HWCAP 16 +#endif +#ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +#endif + +namespace oaknut { + +inline CpuFeatures detect_features_via_hwcap() +{ + const unsigned long hwcap = ::getauxval(AT_HWCAP); + const unsigned long hwcap2 = ::getauxval(AT_HWCAP2); + return detect_features_via_hwcap(hwcap, hwcap2); +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_hwcap(); +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_w32.hpp b/include/oaknut/feature_detection/feature_detection_w32.hpp new file mode 100644 index 0000000..46fd1d1 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_w32.hpp @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#include + +#include + +#include "oaknut/feature_detection/cpu_feature.hpp" + +namespace oaknut { + +// Ref: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + +inline CpuFeatures detect_features_via_IsProcessorFeaturePresent() +{ + CpuFeatures result; + + if (::IsProcessorFeaturePresent(30)) // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::AES, CpuFeature::PMULL, CpuFeature::SHA1, CpuFeature::SHA256}; + if (::IsProcessorFeaturePresent(31)) // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::CRC32}; + if (::IsProcessorFeaturePresent(34)) // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::LSE}; + if (::IsProcessorFeaturePresent(43)) // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::DotProd}; + if (::IsProcessorFeaturePresent(44)) // PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::JSCVT}; + if (::IsProcessorFeaturePresent(45)) // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::LRCPC}; + + return result; +} + +inline CpuFeatures detect_features() +{ + CpuFeatures result{CpuFeature::FP, CpuFeature::ASIMD}; + result |= detect_features_via_IsProcessorFeaturePresent(); + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/impl/cpu_feature.inc.hpp b/include/oaknut/impl/cpu_feature.inc.hpp new file mode 100644 index 0000000..1f7cd87 --- /dev/null +++ b/include/oaknut/impl/cpu_feature.inc.hpp @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime +// SPDX-License-Identifier: MIT + +OAKNUT_CPU_FEATURE(FP) +OAKNUT_CPU_FEATURE(ASIMD) +OAKNUT_CPU_FEATURE(AES) +OAKNUT_CPU_FEATURE(PMULL) +OAKNUT_CPU_FEATURE(SHA1) +OAKNUT_CPU_FEATURE(SHA256) +OAKNUT_CPU_FEATURE(CRC32) +OAKNUT_CPU_FEATURE(LSE) +OAKNUT_CPU_FEATURE(FP16Conv) +OAKNUT_CPU_FEATURE(FP16) +OAKNUT_CPU_FEATURE(RDM) +OAKNUT_CPU_FEATURE(JSCVT) +OAKNUT_CPU_FEATURE(FCMA) +OAKNUT_CPU_FEATURE(LRCPC) +OAKNUT_CPU_FEATURE(DPB) +OAKNUT_CPU_FEATURE(SHA3) +OAKNUT_CPU_FEATURE(SM3) +OAKNUT_CPU_FEATURE(SM4) +OAKNUT_CPU_FEATURE(DotProd) +OAKNUT_CPU_FEATURE(SHA512) +OAKNUT_CPU_FEATURE(SVE) +OAKNUT_CPU_FEATURE(FHM) +OAKNUT_CPU_FEATURE(DIT) +OAKNUT_CPU_FEATURE(LSE2) +OAKNUT_CPU_FEATURE(LRCPC2) +OAKNUT_CPU_FEATURE(FlagM) +OAKNUT_CPU_FEATURE(SSBS) +OAKNUT_CPU_FEATURE(SB) +OAKNUT_CPU_FEATURE(PACA) +OAKNUT_CPU_FEATURE(PACG) +OAKNUT_CPU_FEATURE(DPB2) +OAKNUT_CPU_FEATURE(SVE2) +OAKNUT_CPU_FEATURE(SVE_AES) +OAKNUT_CPU_FEATURE(SVE_PMULL128) +OAKNUT_CPU_FEATURE(SVE_BITPERM) +OAKNUT_CPU_FEATURE(SVE_SHA3) +OAKNUT_CPU_FEATURE(SVE_SM4) +OAKNUT_CPU_FEATURE(FlagM2) +OAKNUT_CPU_FEATURE(FRINTTS) +OAKNUT_CPU_FEATURE(SVE_I8MM) +OAKNUT_CPU_FEATURE(SVE_F32MM) +OAKNUT_CPU_FEATURE(SVE_F64MM) +OAKNUT_CPU_FEATURE(SVE_BF16) +OAKNUT_CPU_FEATURE(I8MM) +OAKNUT_CPU_FEATURE(BF16) +OAKNUT_CPU_FEATURE(DGH) +OAKNUT_CPU_FEATURE(RNG) +OAKNUT_CPU_FEATURE(BTI) +OAKNUT_CPU_FEATURE(MTE) +OAKNUT_CPU_FEATURE(ECV) +OAKNUT_CPU_FEATURE(AFP) +OAKNUT_CPU_FEATURE(RPRES) +OAKNUT_CPU_FEATURE(MTE3) +OAKNUT_CPU_FEATURE(SME) +OAKNUT_CPU_FEATURE(SME_I16I64) +OAKNUT_CPU_FEATURE(SME_F64F64) +OAKNUT_CPU_FEATURE(SME_I8I32) +OAKNUT_CPU_FEATURE(SME_F16F32) +OAKNUT_CPU_FEATURE(SME_B16F32) +OAKNUT_CPU_FEATURE(SME_F32F32) +OAKNUT_CPU_FEATURE(SME_FA64) +OAKNUT_CPU_FEATURE(WFxT) +OAKNUT_CPU_FEATURE(EBF16) +OAKNUT_CPU_FEATURE(SVE_EBF16) +OAKNUT_CPU_FEATURE(CSSC) +OAKNUT_CPU_FEATURE(RPRFM) +OAKNUT_CPU_FEATURE(SVE2p1) +OAKNUT_CPU_FEATURE(SME2) +OAKNUT_CPU_FEATURE(SME2p1) +OAKNUT_CPU_FEATURE(SME_I16I32) +OAKNUT_CPU_FEATURE(SME_BI32I32) +OAKNUT_CPU_FEATURE(SME_B16B16) +OAKNUT_CPU_FEATURE(SME_F16F16) +OAKNUT_CPU_FEATURE(MOPS) +OAKNUT_CPU_FEATURE(HBC) diff --git a/tests/_feature_detect.cpp b/tests/_feature_detect.cpp new file mode 100644 index 0000000..84993f0 --- /dev/null +++ b/tests/_feature_detect.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime +// SPDX-License-Identifier: MIT + +#include + +#include + +#include "oaknut/feature_detection/feature_detection.hpp" + +using namespace oaknut; + +TEST_CASE("Print CPU features") +{ + CpuFeatures features = detect_features(); + + std::fputs("CPU Features: ", stdout); + +#define OAKNUT_CPU_FEATURE(name) \ + if (features.has(CpuFeature::name)) \ + std::fputs(#name " ", stdout); +#include "oaknut/impl/cpu_feature.inc.hpp" +#undef OAKNUT_CPU_FEATURE + + std::fputs("\n", stdout); +}