oaknut: Add basic CPU feature detection

This commit is contained in:
Merry 2023-10-22 11:44:09 +01:00 committed by merry
parent 23e9ddb4c4
commit 319b3d2c9f
11 changed files with 601 additions and 1 deletions

View file

@ -17,7 +17,11 @@ endif()
# Source project files
set(header_files
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/cpu_feature.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/feature_detection.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/cpu_feature.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp
@ -32,7 +36,6 @@ set(header_files
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/overloaded.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/string_literal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut_exception.hpp
)
@ -54,6 +57,7 @@ if (MASTER_PROJECT)
endif()
add_executable(oaknut-tests
tests/_feature_detect.cpp
tests/basic.cpp
tests/fpsimd.cpp
tests/general.cpp

View file

@ -0,0 +1,107 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <bitset>
#include <cstddef>
#include <initializer_list>
#if defined(__cpp_lib_constexpr_bitset) && __cpp_lib_constexpr_bitset >= 202207L
# define OAKNUT_CPU_FEATURES_CONSTEXPR constexpr
#else
# define OAKNUT_CPU_FEATURES_CONSTEXPR
#endif
namespace oaknut {
// NOTE: This file contains code that can be compiled on non-arm64 systems.
// For run-time CPU feature detection, include feature_detection.hpp
enum class CpuFeature {
#define OAKNUT_CPU_FEATURE(name) name,
#include "oaknut/impl/cpu_feature.inc.hpp"
#undef OAKNUT_CPU_FEATURE
};
constexpr std::size_t cpu_feature_count = 0
#define OAKNUT_CPU_FEATURE(name) +1
#include "oaknut/impl/cpu_feature.inc.hpp"
#undef OAKNUT_CPU_FEATURE
;
class CpuFeatures final {
public:
constexpr CpuFeatures() = default;
OAKNUT_CPU_FEATURES_CONSTEXPR explicit CpuFeatures(std::initializer_list<CpuFeature> features)
{
for (CpuFeature f : features) {
m_bitset.set(static_cast<std::size_t>(f));
}
}
constexpr bool has(CpuFeature feature) const
{
if (static_cast<std::size_t>(feature) >= cpu_feature_count)
return false;
return m_bitset[static_cast<std::size_t>(feature)];
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator&=(const CpuFeatures& other) noexcept
{
m_bitset &= other.m_bitset;
return *this;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator|=(const CpuFeatures& other) noexcept
{
m_bitset |= other.m_bitset;
return *this;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator^=(const CpuFeatures& other) noexcept
{
m_bitset ^= other.m_bitset;
return *this;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator~() const noexcept
{
CpuFeatures result;
result.m_bitset = ~m_bitset;
return result;
}
private:
using bitset = std::bitset<cpu_feature_count>;
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept;
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept;
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept;
bitset m_bitset;
};
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept
{
CpuFeatures result;
result.m_bitset = a.m_bitset & b.m_bitset;
return result;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept
{
CpuFeatures result;
result.m_bitset = a.m_bitset | b.m_bitset;
return result;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept
{
CpuFeatures result;
result.m_bitset = a.m_bitset ^ b.m_bitset;
return result;
}
} // namespace oaknut

View file

@ -0,0 +1,22 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#if defined(__APPLE__)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# include "oaknut/feature_detection/feature_detection_apple.hpp"
#elif defined(__linux__)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# include "oaknut/feature_detection/feature_detection_linux.hpp"
#elif defined(__FreeBSD__)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# include "oaknut/feature_detection/feature_detection_freebsd.hpp"
#elif defined(_WIN32)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# include "oaknut/feature_detection/feature_detection_w32.hpp"
#else
# define OAKNUT_CPU_FEATURE_DETECTION 0
# warning "Unsupported operating system for CPU feature detection"
# include "oaknut/feature_detection/feature_detection_generic.hpp"
#endif

View file

@ -0,0 +1,105 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <sys/sysctl.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
namespace oaknut {
// Ref: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
namespace detail {
inline bool detect_feature(const char* const sysctl_name)
{
int result = 0;
std::size_t result_size = sizeof(result);
if (::sysctlbyname(sysctl_name, &result, &result_size, nullptr, 0) == 0) {
return result != 0;
}
return false;
}
} // namespace detail
inline CpuFeatures detect_features_via_sysctlbyname()
{
CpuFeatures result;
if (detail::detect_feature("hw.optional.AdvSIMD") || detail::detect_feature("hw.optional.neon"))
result |= CpuFeatures{CpuFeature::ASIMD};
if (detail::detect_feature("hw.optional.floatingpoint"))
result |= CpuFeatures{CpuFeature::FP};
if (detail::detect_feature("hw.optional.AdvSIMD_HPFPCvt") || detail::detect_feature("hw.optional.neon_hpfp"))
result |= CpuFeatures{CpuFeature::FP16Conv};
if (detail::detect_feature("hw.optional.arm.FEAT_BF16"))
result |= CpuFeatures{CpuFeature::BF16};
if (detail::detect_feature("hw.optional.arm.FEAT_DotProd"))
result |= CpuFeatures{CpuFeature::DotProd};
if (detail::detect_feature("hw.optional.arm.FEAT_FCMA") || detail::detect_feature("hw.optional.armv8_3_compnum"))
result |= CpuFeatures{CpuFeature::FCMA};
if (detail::detect_feature("hw.optional.arm.FEAT_FHM") || detail::detect_feature("hw.optional.armv8_2_fhm"))
result |= CpuFeatures{CpuFeature::FHM};
if (detail::detect_feature("hw.optional.arm.FEAT_FP16") || detail::detect_feature("hw.optional.neon_fp16"))
result |= CpuFeatures{CpuFeature::FP16};
if (detail::detect_feature("hw.optional.arm.FEAT_FRINTTS"))
result |= CpuFeatures{CpuFeature::FRINTTS};
if (detail::detect_feature("hw.optional.arm.FEAT_I8MM"))
result |= CpuFeatures{CpuFeature::I8MM};
if (detail::detect_feature("hw.optional.arm.FEAT_JSCVT"))
result |= CpuFeatures{CpuFeature::JSCVT};
if (detail::detect_feature("hw.optional.arm.FEAT_RDM"))
result |= CpuFeatures{CpuFeature::RDM};
if (detail::detect_feature("hw.optional.arm.FEAT_FlagM"))
result |= CpuFeatures{CpuFeature::FlagM};
if (detail::detect_feature("hw.optional.arm.FEAT_FlagM2"))
result |= CpuFeatures{CpuFeature::FlagM2};
if (detail::detect_feature("hw.optional.armv8_crc32"))
result |= CpuFeatures{CpuFeature::CRC32};
if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC"))
result |= CpuFeatures{CpuFeature::LRCPC};
if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC2"))
result |= CpuFeatures{CpuFeature::LRCPC2};
if (detail::detect_feature("hw.optional.arm.FEAT_LSE") || detail::detect_feature("hw.optional.armv8_1_atomics"))
result |= CpuFeatures{CpuFeature::LSE};
if (detail::detect_feature("hw.optional.arm.FEAT_LSE2"))
result |= CpuFeatures{CpuFeature::LSE2};
if (detail::detect_feature("hw.optional.arm.FEAT_AES"))
result |= CpuFeatures{CpuFeature::AES};
if (detail::detect_feature("hw.optional.arm.FEAT_PMULL"))
result |= CpuFeatures{CpuFeature::PMULL};
if (detail::detect_feature("hw.optional.arm.FEAT_SHA1"))
result |= CpuFeatures{CpuFeature::SHA1};
if (detail::detect_feature("hw.optional.arm.FEAT_SHA256"))
result |= CpuFeatures{CpuFeature::SHA256};
if (detail::detect_feature("hw.optional.arm.FEAT_SHA512") || detail::detect_feature("hw.optional.armv8_2_sha512"))
result |= CpuFeatures{CpuFeature::SHA512};
if (detail::detect_feature("hw.optional.arm.FEAT_SHA3") || detail::detect_feature("hw.optional.armv8_2_sha3"))
result |= CpuFeatures{CpuFeature::SHA3};
if (detail::detect_feature("hw.optional.arm.FEAT_BTI"))
result |= CpuFeatures{CpuFeature::BTI};
if (detail::detect_feature("hw.optional.arm.FEAT_DPB"))
result |= CpuFeatures{CpuFeature::DPB};
if (detail::detect_feature("hw.optional.arm.FEAT_DPB2"))
result |= CpuFeatures{CpuFeature::DPB2};
if (detail::detect_feature("hw.optional.arm.FEAT_ECV"))
result |= CpuFeatures{CpuFeature::ECV};
if (detail::detect_feature("hw.optional.arm.FEAT_SB"))
result |= CpuFeatures{CpuFeature::SB};
if (detail::detect_feature("hw.optional.arm.FEAT_SSBS"))
result |= CpuFeatures{CpuFeature::SSBS};
return result;
}
inline CpuFeatures detect_features()
{
return detect_features_via_sysctlbyname();
}
} // namespace oaknut

View file

@ -0,0 +1,45 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <sys/auxv.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
#ifndef AT_HWCAP
# define AT_HWCAP 16
#endif
#ifndef AT_HWCAP2
# define AT_HWCAP2 26
#endif
namespace oaknut {
namespace detail {
unsigned long getauxval(int aux)
{
unsigned long result = 0;
if (::elf_aux_info(aux, &result, static_cast<int>(sizeof result)) == 0) {
return result;
}
return 0;
}
} // namespace detail
inline CpuFeatures detect_features_via_hwcap()
{
const unsigned long hwcap = detail::getauxval(AT_HWCAP);
const unsigned long hwcap2 = detail::getauxval(AT_HWCAP2);
return detect_features_via_hwcap(hwcap, hwcap2);
}
inline CpuFeatures detect_features()
{
return detect_features_via_hwcap();
}
} // namespace oaknut

View file

@ -0,0 +1,15 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include "oaknut/feature_detection/cpu_feature.hpp"
namespace oaknut {
inline CpuFeatures detect_features()
{
return CpuFeatures{CpuFeature::FP, CpuFeature::ASIMD};
}
} // namespace oaknut

View file

@ -0,0 +1,120 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include "oaknut/feature_detection/cpu_feature.hpp"
namespace oaknut {
namespace detail {
template<std::size_t... bits>
constexpr bool bit_test(unsigned long value)
{
return (((value >> bits) & 1) && ...);
}
} // namespace detail
inline CpuFeatures detect_features_via_hwcap(unsigned long hwcap, unsigned long hwcap2)
{
CpuFeatures result;
#define OAKNUT_DETECT_CAP(FEAT, ...) \
if (detail::bit_test<__VA_ARGS__>(hwcap)) { \
result |= CpuFeatures{CpuFeature::FEAT}; \
}
#define OAKNUT_DETECT_CAP2(FEAT, ...) \
if (detail::bit_test<__VA_ARGS__>(hwcap2)) { \
result |= CpuFeatures{CpuFeature::FEAT}; \
}
OAKNUT_DETECT_CAP(FP, 0) // HWCAP_FP
OAKNUT_DETECT_CAP(ASIMD, 1) // HWCAP_ASIMD
// HWCAP_EVTSTRM (2)
OAKNUT_DETECT_CAP(AES, 3) // HWCAP_AES
OAKNUT_DETECT_CAP(PMULL, 4) // HWCAP_PMULL
OAKNUT_DETECT_CAP(SHA1, 5) // HWCAP_SHA1
OAKNUT_DETECT_CAP(SHA256, 6) // HWCAP_SHA2
OAKNUT_DETECT_CAP(CRC32, 7) // HWCAP_CRC32
OAKNUT_DETECT_CAP(LSE, 8) // HWCAP_ATOMICS
OAKNUT_DETECT_CAP(FP16Conv, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
OAKNUT_DETECT_CAP(FP16, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
// HWCAP_CPUID (11)
OAKNUT_DETECT_CAP(RDM, 12) // HWCAP_ASIMDRDM
OAKNUT_DETECT_CAP(JSCVT, 13) // HWCAP_JSCVT
OAKNUT_DETECT_CAP(FCMA, 14) // HWCAP_FCMA
OAKNUT_DETECT_CAP(LRCPC, 15) // HWCAP_LRCPC
OAKNUT_DETECT_CAP(DPB, 16) // HWCAP_DCPOP
OAKNUT_DETECT_CAP(SHA3, 17) // HWCAP_SHA3
OAKNUT_DETECT_CAP(SM3, 18) // HWCAP_SM3
OAKNUT_DETECT_CAP(SM4, 19) // HWCAP_SM4
OAKNUT_DETECT_CAP(DotProd, 20) // HWCAP_ASIMDDP
OAKNUT_DETECT_CAP(SHA512, 21) // HWCAP_SHA512
OAKNUT_DETECT_CAP(SVE, 22) // HWCAP_SVE
OAKNUT_DETECT_CAP(FHM, 23) // HWCAP_ASIMDFHM
OAKNUT_DETECT_CAP(DIT, 24) // HWCAP_DIT
OAKNUT_DETECT_CAP(LSE2, 25) // HWCAP_USCAT
OAKNUT_DETECT_CAP(LRCPC2, 26) // HWCAP_ILRCPC
OAKNUT_DETECT_CAP(FlagM, 27) // HWCAP_FLAGM
OAKNUT_DETECT_CAP(SSBS, 28) // HWCAP_SSBS
OAKNUT_DETECT_CAP(SB, 29) // HWCAP_SB
OAKNUT_DETECT_CAP(PACA, 30) // HWCAP_PACA
OAKNUT_DETECT_CAP(PACG, 31) // HWCAP_PACG
OAKNUT_DETECT_CAP2(DPB2, 0) // HWCAP2_DCPODP
OAKNUT_DETECT_CAP2(SVE2, 1) // HWCAP2_SVE2
OAKNUT_DETECT_CAP2(SVE_AES, 2) // HWCAP2_SVEAES
OAKNUT_DETECT_CAP2(SVE_PMULL128, 3) // HWCAP2_SVEPMULL
OAKNUT_DETECT_CAP2(SVE_BITPERM, 4) // HWCAP2_SVEBITPERM
OAKNUT_DETECT_CAP2(SVE_SHA3, 5) // HWCAP2_SVESHA3
OAKNUT_DETECT_CAP2(SVE_SM4, 6) // HWCAP2_SVESM4
OAKNUT_DETECT_CAP2(FlagM2, 7) // HWCAP2_FLAGM2
OAKNUT_DETECT_CAP2(FRINTTS, 8) // HWCAP2_FRINT
OAKNUT_DETECT_CAP2(SVE_I8MM, 9) // HWCAP2_SVEI8MM
OAKNUT_DETECT_CAP2(SVE_F32MM, 10) // HWCAP2_SVEF32MM
OAKNUT_DETECT_CAP2(SVE_F64MM, 11) // HWCAP2_SVEF64MM
OAKNUT_DETECT_CAP2(SVE_BF16, 12) // HWCAP2_SVEBF16
OAKNUT_DETECT_CAP2(I8MM, 13) // HWCAP2_I8MM
OAKNUT_DETECT_CAP2(BF16, 14) // HWCAP2_BF16
OAKNUT_DETECT_CAP2(DGH, 15) // HWCAP2_DGH
OAKNUT_DETECT_CAP2(RNG, 16) // HWCAP2_RNG
OAKNUT_DETECT_CAP2(BTI, 17) // HWCAP2_BTI
OAKNUT_DETECT_CAP2(MTE, 18) // HWCAP2_MTE
OAKNUT_DETECT_CAP2(ECV, 19) // HWCAP2_ECV
OAKNUT_DETECT_CAP2(AFP, 20) // HWCAP2_AFP
OAKNUT_DETECT_CAP2(RPRES, 21) // HWCAP2_RPRES
OAKNUT_DETECT_CAP2(MTE3, 22) // HWCAP2_MTE3
OAKNUT_DETECT_CAP2(SME, 23) // HWCAP2_SME
OAKNUT_DETECT_CAP2(SME_I16I64, 24) // HWCAP2_SME_I16I64
OAKNUT_DETECT_CAP2(SME_F64F64, 25) // HWCAP2_SME_F64F64
OAKNUT_DETECT_CAP2(SME_I8I32, 26) // HWCAP2_SME_I8I32
OAKNUT_DETECT_CAP2(SME_F16F32, 27) // HWCAP2_SME_F16F32
OAKNUT_DETECT_CAP2(SME_B16F32, 28) // HWCAP2_SME_B16F32
OAKNUT_DETECT_CAP2(SME_F32F32, 29) // HWCAP2_SME_F32F32
OAKNUT_DETECT_CAP2(SME_FA64, 30) // HWCAP2_SME_FA64
OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFxT
OAKNUT_DETECT_CAP2(EBF16, 32) // HWCAP2_EBF16
OAKNUT_DETECT_CAP2(SVE_EBF16, 33) // HWCAP2_SVE_EBF16
OAKNUT_DETECT_CAP2(CSSC, 34) // HWCAP2_CSSC
OAKNUT_DETECT_CAP2(RPRFM, 35) // HWCAP2_RPRFM
OAKNUT_DETECT_CAP2(SVE2p1, 36) // HWCAP2_SVE2P1
OAKNUT_DETECT_CAP2(SME2, 37) // HWCAP2_SME2
OAKNUT_DETECT_CAP2(SME2p1, 38) // HWCAP2_SME2P1
OAKNUT_DETECT_CAP2(SME_I16I32, 39) // HWCAP2_SME_I16I32
OAKNUT_DETECT_CAP2(SME_BI32I32, 40) // HWCAP2_SME_BI32I32
OAKNUT_DETECT_CAP2(SME_B16B16, 41) // HWCAP2_SME_B16B16
OAKNUT_DETECT_CAP2(SME_F16F16, 42) // HWCAP2_SME_F16F16
OAKNUT_DETECT_CAP2(MOPS, 43) // HWCAP2_MOPS
OAKNUT_DETECT_CAP2(HBC, 44) // HWCAP2_HBC
#undef OAKNUT_DETECT_CAP
#undef OAKNUT_DETECT_CAP2
return result;
}
} // namespace oaknut

View file

@ -0,0 +1,32 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <sys/auxv.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
#ifndef AT_HWCAP
# define AT_HWCAP 16
#endif
#ifndef AT_HWCAP2
# define AT_HWCAP2 26
#endif
namespace oaknut {
inline CpuFeatures detect_features_via_hwcap()
{
const unsigned long hwcap = ::getauxval(AT_HWCAP);
const unsigned long hwcap2 = ::getauxval(AT_HWCAP2);
return detect_features_via_hwcap(hwcap, hwcap2);
}
inline CpuFeatures detect_features()
{
return detect_features_via_hwcap();
}
} // namespace oaknut

View file

@ -0,0 +1,47 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <processthreadsapi.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
namespace oaknut {
// Ref: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
inline CpuFeatures detect_features_via_IsProcessorFeaturePresent()
{
CpuFeatures result;
if (::IsProcessorFeaturePresent(30)) // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::AES, CpuFeature::PMULL, CpuFeature::SHA1, CpuFeature::SHA256};
if (::IsProcessorFeaturePresent(31)) // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::CRC32};
if (::IsProcessorFeaturePresent(34)) // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::LSE};
if (::IsProcessorFeaturePresent(43)) // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::DotProd};
if (::IsProcessorFeaturePresent(44)) // PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::JSCVT};
if (::IsProcessorFeaturePresent(45)) // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::LRCPC};
return result;
}
inline CpuFeatures detect_features()
{
CpuFeatures result{CpuFeature::FP, CpuFeature::ASIMD};
result |= detect_features_via_IsProcessorFeaturePresent();
return result;
}
} // namespace oaknut

View file

@ -0,0 +1,78 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
OAKNUT_CPU_FEATURE(FP)
OAKNUT_CPU_FEATURE(ASIMD)
OAKNUT_CPU_FEATURE(AES)
OAKNUT_CPU_FEATURE(PMULL)
OAKNUT_CPU_FEATURE(SHA1)
OAKNUT_CPU_FEATURE(SHA256)
OAKNUT_CPU_FEATURE(CRC32)
OAKNUT_CPU_FEATURE(LSE)
OAKNUT_CPU_FEATURE(FP16Conv)
OAKNUT_CPU_FEATURE(FP16)
OAKNUT_CPU_FEATURE(RDM)
OAKNUT_CPU_FEATURE(JSCVT)
OAKNUT_CPU_FEATURE(FCMA)
OAKNUT_CPU_FEATURE(LRCPC)
OAKNUT_CPU_FEATURE(DPB)
OAKNUT_CPU_FEATURE(SHA3)
OAKNUT_CPU_FEATURE(SM3)
OAKNUT_CPU_FEATURE(SM4)
OAKNUT_CPU_FEATURE(DotProd)
OAKNUT_CPU_FEATURE(SHA512)
OAKNUT_CPU_FEATURE(SVE)
OAKNUT_CPU_FEATURE(FHM)
OAKNUT_CPU_FEATURE(DIT)
OAKNUT_CPU_FEATURE(LSE2)
OAKNUT_CPU_FEATURE(LRCPC2)
OAKNUT_CPU_FEATURE(FlagM)
OAKNUT_CPU_FEATURE(SSBS)
OAKNUT_CPU_FEATURE(SB)
OAKNUT_CPU_FEATURE(PACA)
OAKNUT_CPU_FEATURE(PACG)
OAKNUT_CPU_FEATURE(DPB2)
OAKNUT_CPU_FEATURE(SVE2)
OAKNUT_CPU_FEATURE(SVE_AES)
OAKNUT_CPU_FEATURE(SVE_PMULL128)
OAKNUT_CPU_FEATURE(SVE_BITPERM)
OAKNUT_CPU_FEATURE(SVE_SHA3)
OAKNUT_CPU_FEATURE(SVE_SM4)
OAKNUT_CPU_FEATURE(FlagM2)
OAKNUT_CPU_FEATURE(FRINTTS)
OAKNUT_CPU_FEATURE(SVE_I8MM)
OAKNUT_CPU_FEATURE(SVE_F32MM)
OAKNUT_CPU_FEATURE(SVE_F64MM)
OAKNUT_CPU_FEATURE(SVE_BF16)
OAKNUT_CPU_FEATURE(I8MM)
OAKNUT_CPU_FEATURE(BF16)
OAKNUT_CPU_FEATURE(DGH)
OAKNUT_CPU_FEATURE(RNG)
OAKNUT_CPU_FEATURE(BTI)
OAKNUT_CPU_FEATURE(MTE)
OAKNUT_CPU_FEATURE(ECV)
OAKNUT_CPU_FEATURE(AFP)
OAKNUT_CPU_FEATURE(RPRES)
OAKNUT_CPU_FEATURE(MTE3)
OAKNUT_CPU_FEATURE(SME)
OAKNUT_CPU_FEATURE(SME_I16I64)
OAKNUT_CPU_FEATURE(SME_F64F64)
OAKNUT_CPU_FEATURE(SME_I8I32)
OAKNUT_CPU_FEATURE(SME_F16F32)
OAKNUT_CPU_FEATURE(SME_B16F32)
OAKNUT_CPU_FEATURE(SME_F32F32)
OAKNUT_CPU_FEATURE(SME_FA64)
OAKNUT_CPU_FEATURE(WFxT)
OAKNUT_CPU_FEATURE(EBF16)
OAKNUT_CPU_FEATURE(SVE_EBF16)
OAKNUT_CPU_FEATURE(CSSC)
OAKNUT_CPU_FEATURE(RPRFM)
OAKNUT_CPU_FEATURE(SVE2p1)
OAKNUT_CPU_FEATURE(SME2)
OAKNUT_CPU_FEATURE(SME2p1)
OAKNUT_CPU_FEATURE(SME_I16I32)
OAKNUT_CPU_FEATURE(SME_BI32I32)
OAKNUT_CPU_FEATURE(SME_B16B16)
OAKNUT_CPU_FEATURE(SME_F16F16)
OAKNUT_CPU_FEATURE(MOPS)
OAKNUT_CPU_FEATURE(HBC)

25
tests/_feature_detect.cpp Normal file
View file

@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#include <cstdio>
#include <catch2/catch_test_macros.hpp>
#include "oaknut/feature_detection/feature_detection.hpp"
using namespace oaknut;
TEST_CASE("Print CPU features")
{
CpuFeatures features = detect_features();
std::fputs("CPU Features: ", stdout);
#define OAKNUT_CPU_FEATURE(name) \
if (features.has(CpuFeature::name)) \
std::fputs(#name " ", stdout);
#include "oaknut/impl/cpu_feature.inc.hpp"
#undef OAKNUT_CPU_FEATURE
std::fputs("\n", stdout);
}