mirror of
https://github.com/yuzu-mirror/oaknut.git
synced 2026-02-05 21:24:13 +01:00
oaknut: Add basic CPU feature detection
This commit is contained in:
parent
23e9ddb4c4
commit
319b3d2c9f
|
|
@ -17,7 +17,11 @@ endif()
|
|||
|
||||
# Source project files
|
||||
set(header_files
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/cpu_feature.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/feature_detection.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/cpu_feature.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp
|
||||
|
|
@ -32,7 +36,6 @@ set(header_files
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/overloaded.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/string_literal.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut_exception.hpp
|
||||
)
|
||||
|
|
@ -54,6 +57,7 @@ if (MASTER_PROJECT)
|
|||
endif()
|
||||
|
||||
add_executable(oaknut-tests
|
||||
tests/_feature_detect.cpp
|
||||
tests/basic.cpp
|
||||
tests/fpsimd.cpp
|
||||
tests/general.cpp
|
||||
|
|
|
|||
107
include/oaknut/feature_detection/cpu_feature.hpp
Normal file
107
include/oaknut/feature_detection/cpu_feature.hpp
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <cstddef>
|
||||
#include <initializer_list>
|
||||
|
||||
#if defined(__cpp_lib_constexpr_bitset) && __cpp_lib_constexpr_bitset >= 202207L
|
||||
# define OAKNUT_CPU_FEATURES_CONSTEXPR constexpr
|
||||
#else
|
||||
# define OAKNUT_CPU_FEATURES_CONSTEXPR
|
||||
#endif
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
// NOTE: This file contains code that can be compiled on non-arm64 systems.
|
||||
// For run-time CPU feature detection, include feature_detection.hpp
|
||||
|
||||
enum class CpuFeature {
|
||||
#define OAKNUT_CPU_FEATURE(name) name,
|
||||
#include "oaknut/impl/cpu_feature.inc.hpp"
|
||||
#undef OAKNUT_CPU_FEATURE
|
||||
};
|
||||
|
||||
constexpr std::size_t cpu_feature_count = 0
|
||||
#define OAKNUT_CPU_FEATURE(name) +1
|
||||
#include "oaknut/impl/cpu_feature.inc.hpp"
|
||||
#undef OAKNUT_CPU_FEATURE
|
||||
;
|
||||
|
||||
class CpuFeatures final {
|
||||
public:
|
||||
constexpr CpuFeatures() = default;
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR explicit CpuFeatures(std::initializer_list<CpuFeature> features)
|
||||
{
|
||||
for (CpuFeature f : features) {
|
||||
m_bitset.set(static_cast<std::size_t>(f));
|
||||
}
|
||||
}
|
||||
|
||||
constexpr bool has(CpuFeature feature) const
|
||||
{
|
||||
if (static_cast<std::size_t>(feature) >= cpu_feature_count)
|
||||
return false;
|
||||
return m_bitset[static_cast<std::size_t>(feature)];
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator&=(const CpuFeatures& other) noexcept
|
||||
{
|
||||
m_bitset &= other.m_bitset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator|=(const CpuFeatures& other) noexcept
|
||||
{
|
||||
m_bitset |= other.m_bitset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator^=(const CpuFeatures& other) noexcept
|
||||
{
|
||||
m_bitset ^= other.m_bitset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator~() const noexcept
|
||||
{
|
||||
CpuFeatures result;
|
||||
result.m_bitset = ~m_bitset;
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
using bitset = std::bitset<cpu_feature_count>;
|
||||
|
||||
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept;
|
||||
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept;
|
||||
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept;
|
||||
|
||||
bitset m_bitset;
|
||||
};
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept
|
||||
{
|
||||
CpuFeatures result;
|
||||
result.m_bitset = a.m_bitset & b.m_bitset;
|
||||
return result;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept
|
||||
{
|
||||
CpuFeatures result;
|
||||
result.m_bitset = a.m_bitset | b.m_bitset;
|
||||
return result;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept
|
||||
{
|
||||
CpuFeatures result;
|
||||
result.m_bitset = a.m_bitset ^ b.m_bitset;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
||||
22
include/oaknut/feature_detection/feature_detection.hpp
Normal file
22
include/oaknut/feature_detection/feature_detection.hpp
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(__APPLE__)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# include "oaknut/feature_detection/feature_detection_apple.hpp"
|
||||
#elif defined(__linux__)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# include "oaknut/feature_detection/feature_detection_linux.hpp"
|
||||
#elif defined(__FreeBSD__)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# include "oaknut/feature_detection/feature_detection_freebsd.hpp"
|
||||
#elif defined(_WIN32)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# include "oaknut/feature_detection/feature_detection_w32.hpp"
|
||||
#else
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 0
|
||||
# warning "Unsupported operating system for CPU feature detection"
|
||||
# include "oaknut/feature_detection/feature_detection_generic.hpp"
|
||||
#endif
|
||||
105
include/oaknut/feature_detection/feature_detection_apple.hpp
Normal file
105
include/oaknut/feature_detection/feature_detection_apple.hpp
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
// Ref: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
|
||||
|
||||
namespace detail {
|
||||
|
||||
inline bool detect_feature(const char* const sysctl_name)
|
||||
{
|
||||
int result = 0;
|
||||
std::size_t result_size = sizeof(result);
|
||||
if (::sysctlbyname(sysctl_name, &result, &result_size, nullptr, 0) == 0) {
|
||||
return result != 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
inline CpuFeatures detect_features_via_sysctlbyname()
|
||||
{
|
||||
CpuFeatures result;
|
||||
|
||||
if (detail::detect_feature("hw.optional.AdvSIMD") || detail::detect_feature("hw.optional.neon"))
|
||||
result |= CpuFeatures{CpuFeature::ASIMD};
|
||||
if (detail::detect_feature("hw.optional.floatingpoint"))
|
||||
result |= CpuFeatures{CpuFeature::FP};
|
||||
if (detail::detect_feature("hw.optional.AdvSIMD_HPFPCvt") || detail::detect_feature("hw.optional.neon_hpfp"))
|
||||
result |= CpuFeatures{CpuFeature::FP16Conv};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_BF16"))
|
||||
result |= CpuFeatures{CpuFeature::BF16};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_DotProd"))
|
||||
result |= CpuFeatures{CpuFeature::DotProd};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FCMA") || detail::detect_feature("hw.optional.armv8_3_compnum"))
|
||||
result |= CpuFeatures{CpuFeature::FCMA};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FHM") || detail::detect_feature("hw.optional.armv8_2_fhm"))
|
||||
result |= CpuFeatures{CpuFeature::FHM};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FP16") || detail::detect_feature("hw.optional.neon_fp16"))
|
||||
result |= CpuFeatures{CpuFeature::FP16};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FRINTTS"))
|
||||
result |= CpuFeatures{CpuFeature::FRINTTS};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_I8MM"))
|
||||
result |= CpuFeatures{CpuFeature::I8MM};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_JSCVT"))
|
||||
result |= CpuFeatures{CpuFeature::JSCVT};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_RDM"))
|
||||
result |= CpuFeatures{CpuFeature::RDM};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FlagM"))
|
||||
result |= CpuFeatures{CpuFeature::FlagM};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FlagM2"))
|
||||
result |= CpuFeatures{CpuFeature::FlagM2};
|
||||
if (detail::detect_feature("hw.optional.armv8_crc32"))
|
||||
result |= CpuFeatures{CpuFeature::CRC32};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC"))
|
||||
result |= CpuFeatures{CpuFeature::LRCPC};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC2"))
|
||||
result |= CpuFeatures{CpuFeature::LRCPC2};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_LSE") || detail::detect_feature("hw.optional.armv8_1_atomics"))
|
||||
result |= CpuFeatures{CpuFeature::LSE};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_LSE2"))
|
||||
result |= CpuFeatures{CpuFeature::LSE2};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_AES"))
|
||||
result |= CpuFeatures{CpuFeature::AES};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_PMULL"))
|
||||
result |= CpuFeatures{CpuFeature::PMULL};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SHA1"))
|
||||
result |= CpuFeatures{CpuFeature::SHA1};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SHA256"))
|
||||
result |= CpuFeatures{CpuFeature::SHA256};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SHA512") || detail::detect_feature("hw.optional.armv8_2_sha512"))
|
||||
result |= CpuFeatures{CpuFeature::SHA512};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SHA3") || detail::detect_feature("hw.optional.armv8_2_sha3"))
|
||||
result |= CpuFeatures{CpuFeature::SHA3};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_BTI"))
|
||||
result |= CpuFeatures{CpuFeature::BTI};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_DPB"))
|
||||
result |= CpuFeatures{CpuFeature::DPB};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_DPB2"))
|
||||
result |= CpuFeatures{CpuFeature::DPB2};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_ECV"))
|
||||
result |= CpuFeatures{CpuFeature::ECV};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SB"))
|
||||
result |= CpuFeatures{CpuFeature::SB};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SSBS"))
|
||||
result |= CpuFeatures{CpuFeature::SSBS};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
return detect_features_via_sysctlbyname();
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
|
||||
|
||||
#ifndef AT_HWCAP
|
||||
# define AT_HWCAP 16
|
||||
#endif
|
||||
#ifndef AT_HWCAP2
|
||||
# define AT_HWCAP2 26
|
||||
#endif
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
namespace detail {
|
||||
|
||||
unsigned long getauxval(int aux)
|
||||
{
|
||||
unsigned long result = 0;
|
||||
if (::elf_aux_info(aux, &result, static_cast<int>(sizeof result)) == 0) {
|
||||
return result;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
inline CpuFeatures detect_features_via_hwcap()
|
||||
{
|
||||
const unsigned long hwcap = detail::getauxval(AT_HWCAP);
|
||||
const unsigned long hwcap2 = detail::getauxval(AT_HWCAP2);
|
||||
return detect_features_via_hwcap(hwcap, hwcap2);
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
return detect_features_via_hwcap();
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
return CpuFeatures{CpuFeature::FP, CpuFeature::ASIMD};
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
||||
120
include/oaknut/feature_detection/feature_detection_hwcaps.hpp
Normal file
120
include/oaknut/feature_detection/feature_detection_hwcaps.hpp
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
namespace detail {
|
||||
|
||||
template<std::size_t... bits>
|
||||
constexpr bool bit_test(unsigned long value)
|
||||
{
|
||||
return (((value >> bits) & 1) && ...);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
inline CpuFeatures detect_features_via_hwcap(unsigned long hwcap, unsigned long hwcap2)
|
||||
{
|
||||
CpuFeatures result;
|
||||
|
||||
#define OAKNUT_DETECT_CAP(FEAT, ...) \
|
||||
if (detail::bit_test<__VA_ARGS__>(hwcap)) { \
|
||||
result |= CpuFeatures{CpuFeature::FEAT}; \
|
||||
}
|
||||
#define OAKNUT_DETECT_CAP2(FEAT, ...) \
|
||||
if (detail::bit_test<__VA_ARGS__>(hwcap2)) { \
|
||||
result |= CpuFeatures{CpuFeature::FEAT}; \
|
||||
}
|
||||
|
||||
OAKNUT_DETECT_CAP(FP, 0) // HWCAP_FP
|
||||
OAKNUT_DETECT_CAP(ASIMD, 1) // HWCAP_ASIMD
|
||||
// HWCAP_EVTSTRM (2)
|
||||
OAKNUT_DETECT_CAP(AES, 3) // HWCAP_AES
|
||||
OAKNUT_DETECT_CAP(PMULL, 4) // HWCAP_PMULL
|
||||
OAKNUT_DETECT_CAP(SHA1, 5) // HWCAP_SHA1
|
||||
OAKNUT_DETECT_CAP(SHA256, 6) // HWCAP_SHA2
|
||||
OAKNUT_DETECT_CAP(CRC32, 7) // HWCAP_CRC32
|
||||
OAKNUT_DETECT_CAP(LSE, 8) // HWCAP_ATOMICS
|
||||
OAKNUT_DETECT_CAP(FP16Conv, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
|
||||
OAKNUT_DETECT_CAP(FP16, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
|
||||
// HWCAP_CPUID (11)
|
||||
OAKNUT_DETECT_CAP(RDM, 12) // HWCAP_ASIMDRDM
|
||||
OAKNUT_DETECT_CAP(JSCVT, 13) // HWCAP_JSCVT
|
||||
OAKNUT_DETECT_CAP(FCMA, 14) // HWCAP_FCMA
|
||||
OAKNUT_DETECT_CAP(LRCPC, 15) // HWCAP_LRCPC
|
||||
OAKNUT_DETECT_CAP(DPB, 16) // HWCAP_DCPOP
|
||||
OAKNUT_DETECT_CAP(SHA3, 17) // HWCAP_SHA3
|
||||
OAKNUT_DETECT_CAP(SM3, 18) // HWCAP_SM3
|
||||
OAKNUT_DETECT_CAP(SM4, 19) // HWCAP_SM4
|
||||
OAKNUT_DETECT_CAP(DotProd, 20) // HWCAP_ASIMDDP
|
||||
OAKNUT_DETECT_CAP(SHA512, 21) // HWCAP_SHA512
|
||||
OAKNUT_DETECT_CAP(SVE, 22) // HWCAP_SVE
|
||||
OAKNUT_DETECT_CAP(FHM, 23) // HWCAP_ASIMDFHM
|
||||
OAKNUT_DETECT_CAP(DIT, 24) // HWCAP_DIT
|
||||
OAKNUT_DETECT_CAP(LSE2, 25) // HWCAP_USCAT
|
||||
OAKNUT_DETECT_CAP(LRCPC2, 26) // HWCAP_ILRCPC
|
||||
OAKNUT_DETECT_CAP(FlagM, 27) // HWCAP_FLAGM
|
||||
OAKNUT_DETECT_CAP(SSBS, 28) // HWCAP_SSBS
|
||||
OAKNUT_DETECT_CAP(SB, 29) // HWCAP_SB
|
||||
OAKNUT_DETECT_CAP(PACA, 30) // HWCAP_PACA
|
||||
OAKNUT_DETECT_CAP(PACG, 31) // HWCAP_PACG
|
||||
|
||||
OAKNUT_DETECT_CAP2(DPB2, 0) // HWCAP2_DCPODP
|
||||
OAKNUT_DETECT_CAP2(SVE2, 1) // HWCAP2_SVE2
|
||||
OAKNUT_DETECT_CAP2(SVE_AES, 2) // HWCAP2_SVEAES
|
||||
OAKNUT_DETECT_CAP2(SVE_PMULL128, 3) // HWCAP2_SVEPMULL
|
||||
OAKNUT_DETECT_CAP2(SVE_BITPERM, 4) // HWCAP2_SVEBITPERM
|
||||
OAKNUT_DETECT_CAP2(SVE_SHA3, 5) // HWCAP2_SVESHA3
|
||||
OAKNUT_DETECT_CAP2(SVE_SM4, 6) // HWCAP2_SVESM4
|
||||
OAKNUT_DETECT_CAP2(FlagM2, 7) // HWCAP2_FLAGM2
|
||||
OAKNUT_DETECT_CAP2(FRINTTS, 8) // HWCAP2_FRINT
|
||||
OAKNUT_DETECT_CAP2(SVE_I8MM, 9) // HWCAP2_SVEI8MM
|
||||
OAKNUT_DETECT_CAP2(SVE_F32MM, 10) // HWCAP2_SVEF32MM
|
||||
OAKNUT_DETECT_CAP2(SVE_F64MM, 11) // HWCAP2_SVEF64MM
|
||||
OAKNUT_DETECT_CAP2(SVE_BF16, 12) // HWCAP2_SVEBF16
|
||||
OAKNUT_DETECT_CAP2(I8MM, 13) // HWCAP2_I8MM
|
||||
OAKNUT_DETECT_CAP2(BF16, 14) // HWCAP2_BF16
|
||||
OAKNUT_DETECT_CAP2(DGH, 15) // HWCAP2_DGH
|
||||
OAKNUT_DETECT_CAP2(RNG, 16) // HWCAP2_RNG
|
||||
OAKNUT_DETECT_CAP2(BTI, 17) // HWCAP2_BTI
|
||||
OAKNUT_DETECT_CAP2(MTE, 18) // HWCAP2_MTE
|
||||
OAKNUT_DETECT_CAP2(ECV, 19) // HWCAP2_ECV
|
||||
OAKNUT_DETECT_CAP2(AFP, 20) // HWCAP2_AFP
|
||||
OAKNUT_DETECT_CAP2(RPRES, 21) // HWCAP2_RPRES
|
||||
OAKNUT_DETECT_CAP2(MTE3, 22) // HWCAP2_MTE3
|
||||
OAKNUT_DETECT_CAP2(SME, 23) // HWCAP2_SME
|
||||
OAKNUT_DETECT_CAP2(SME_I16I64, 24) // HWCAP2_SME_I16I64
|
||||
OAKNUT_DETECT_CAP2(SME_F64F64, 25) // HWCAP2_SME_F64F64
|
||||
OAKNUT_DETECT_CAP2(SME_I8I32, 26) // HWCAP2_SME_I8I32
|
||||
OAKNUT_DETECT_CAP2(SME_F16F32, 27) // HWCAP2_SME_F16F32
|
||||
OAKNUT_DETECT_CAP2(SME_B16F32, 28) // HWCAP2_SME_B16F32
|
||||
OAKNUT_DETECT_CAP2(SME_F32F32, 29) // HWCAP2_SME_F32F32
|
||||
OAKNUT_DETECT_CAP2(SME_FA64, 30) // HWCAP2_SME_FA64
|
||||
OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFxT
|
||||
OAKNUT_DETECT_CAP2(EBF16, 32) // HWCAP2_EBF16
|
||||
OAKNUT_DETECT_CAP2(SVE_EBF16, 33) // HWCAP2_SVE_EBF16
|
||||
OAKNUT_DETECT_CAP2(CSSC, 34) // HWCAP2_CSSC
|
||||
OAKNUT_DETECT_CAP2(RPRFM, 35) // HWCAP2_RPRFM
|
||||
OAKNUT_DETECT_CAP2(SVE2p1, 36) // HWCAP2_SVE2P1
|
||||
OAKNUT_DETECT_CAP2(SME2, 37) // HWCAP2_SME2
|
||||
OAKNUT_DETECT_CAP2(SME2p1, 38) // HWCAP2_SME2P1
|
||||
OAKNUT_DETECT_CAP2(SME_I16I32, 39) // HWCAP2_SME_I16I32
|
||||
OAKNUT_DETECT_CAP2(SME_BI32I32, 40) // HWCAP2_SME_BI32I32
|
||||
OAKNUT_DETECT_CAP2(SME_B16B16, 41) // HWCAP2_SME_B16B16
|
||||
OAKNUT_DETECT_CAP2(SME_F16F16, 42) // HWCAP2_SME_F16F16
|
||||
OAKNUT_DETECT_CAP2(MOPS, 43) // HWCAP2_MOPS
|
||||
OAKNUT_DETECT_CAP2(HBC, 44) // HWCAP2_HBC
|
||||
|
||||
#undef OAKNUT_DETECT_CAP
|
||||
#undef OAKNUT_DETECT_CAP2
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
||||
32
include/oaknut/feature_detection/feature_detection_linux.hpp
Normal file
32
include/oaknut/feature_detection/feature_detection_linux.hpp
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
|
||||
|
||||
#ifndef AT_HWCAP
|
||||
# define AT_HWCAP 16
|
||||
#endif
|
||||
#ifndef AT_HWCAP2
|
||||
# define AT_HWCAP2 26
|
||||
#endif
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
inline CpuFeatures detect_features_via_hwcap()
|
||||
{
|
||||
const unsigned long hwcap = ::getauxval(AT_HWCAP);
|
||||
const unsigned long hwcap2 = ::getauxval(AT_HWCAP2);
|
||||
return detect_features_via_hwcap(hwcap, hwcap2);
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
return detect_features_via_hwcap();
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
||||
47
include/oaknut/feature_detection/feature_detection_w32.hpp
Normal file
47
include/oaknut/feature_detection/feature_detection_w32.hpp
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#include <processthreadsapi.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
// Ref: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
|
||||
|
||||
inline CpuFeatures detect_features_via_IsProcessorFeaturePresent()
|
||||
{
|
||||
CpuFeatures result;
|
||||
|
||||
if (::IsProcessorFeaturePresent(30)) // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::AES, CpuFeature::PMULL, CpuFeature::SHA1, CpuFeature::SHA256};
|
||||
if (::IsProcessorFeaturePresent(31)) // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::CRC32};
|
||||
if (::IsProcessorFeaturePresent(34)) // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::LSE};
|
||||
if (::IsProcessorFeaturePresent(43)) // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::DotProd};
|
||||
if (::IsProcessorFeaturePresent(44)) // PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::JSCVT};
|
||||
if (::IsProcessorFeaturePresent(45)) // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::LRCPC};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
CpuFeatures result{CpuFeature::FP, CpuFeature::ASIMD};
|
||||
result |= detect_features_via_IsProcessorFeaturePresent();
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
||||
78
include/oaknut/impl/cpu_feature.inc.hpp
Normal file
78
include/oaknut/impl/cpu_feature.inc.hpp
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
OAKNUT_CPU_FEATURE(FP)
|
||||
OAKNUT_CPU_FEATURE(ASIMD)
|
||||
OAKNUT_CPU_FEATURE(AES)
|
||||
OAKNUT_CPU_FEATURE(PMULL)
|
||||
OAKNUT_CPU_FEATURE(SHA1)
|
||||
OAKNUT_CPU_FEATURE(SHA256)
|
||||
OAKNUT_CPU_FEATURE(CRC32)
|
||||
OAKNUT_CPU_FEATURE(LSE)
|
||||
OAKNUT_CPU_FEATURE(FP16Conv)
|
||||
OAKNUT_CPU_FEATURE(FP16)
|
||||
OAKNUT_CPU_FEATURE(RDM)
|
||||
OAKNUT_CPU_FEATURE(JSCVT)
|
||||
OAKNUT_CPU_FEATURE(FCMA)
|
||||
OAKNUT_CPU_FEATURE(LRCPC)
|
||||
OAKNUT_CPU_FEATURE(DPB)
|
||||
OAKNUT_CPU_FEATURE(SHA3)
|
||||
OAKNUT_CPU_FEATURE(SM3)
|
||||
OAKNUT_CPU_FEATURE(SM4)
|
||||
OAKNUT_CPU_FEATURE(DotProd)
|
||||
OAKNUT_CPU_FEATURE(SHA512)
|
||||
OAKNUT_CPU_FEATURE(SVE)
|
||||
OAKNUT_CPU_FEATURE(FHM)
|
||||
OAKNUT_CPU_FEATURE(DIT)
|
||||
OAKNUT_CPU_FEATURE(LSE2)
|
||||
OAKNUT_CPU_FEATURE(LRCPC2)
|
||||
OAKNUT_CPU_FEATURE(FlagM)
|
||||
OAKNUT_CPU_FEATURE(SSBS)
|
||||
OAKNUT_CPU_FEATURE(SB)
|
||||
OAKNUT_CPU_FEATURE(PACA)
|
||||
OAKNUT_CPU_FEATURE(PACG)
|
||||
OAKNUT_CPU_FEATURE(DPB2)
|
||||
OAKNUT_CPU_FEATURE(SVE2)
|
||||
OAKNUT_CPU_FEATURE(SVE_AES)
|
||||
OAKNUT_CPU_FEATURE(SVE_PMULL128)
|
||||
OAKNUT_CPU_FEATURE(SVE_BITPERM)
|
||||
OAKNUT_CPU_FEATURE(SVE_SHA3)
|
||||
OAKNUT_CPU_FEATURE(SVE_SM4)
|
||||
OAKNUT_CPU_FEATURE(FlagM2)
|
||||
OAKNUT_CPU_FEATURE(FRINTTS)
|
||||
OAKNUT_CPU_FEATURE(SVE_I8MM)
|
||||
OAKNUT_CPU_FEATURE(SVE_F32MM)
|
||||
OAKNUT_CPU_FEATURE(SVE_F64MM)
|
||||
OAKNUT_CPU_FEATURE(SVE_BF16)
|
||||
OAKNUT_CPU_FEATURE(I8MM)
|
||||
OAKNUT_CPU_FEATURE(BF16)
|
||||
OAKNUT_CPU_FEATURE(DGH)
|
||||
OAKNUT_CPU_FEATURE(RNG)
|
||||
OAKNUT_CPU_FEATURE(BTI)
|
||||
OAKNUT_CPU_FEATURE(MTE)
|
||||
OAKNUT_CPU_FEATURE(ECV)
|
||||
OAKNUT_CPU_FEATURE(AFP)
|
||||
OAKNUT_CPU_FEATURE(RPRES)
|
||||
OAKNUT_CPU_FEATURE(MTE3)
|
||||
OAKNUT_CPU_FEATURE(SME)
|
||||
OAKNUT_CPU_FEATURE(SME_I16I64)
|
||||
OAKNUT_CPU_FEATURE(SME_F64F64)
|
||||
OAKNUT_CPU_FEATURE(SME_I8I32)
|
||||
OAKNUT_CPU_FEATURE(SME_F16F32)
|
||||
OAKNUT_CPU_FEATURE(SME_B16F32)
|
||||
OAKNUT_CPU_FEATURE(SME_F32F32)
|
||||
OAKNUT_CPU_FEATURE(SME_FA64)
|
||||
OAKNUT_CPU_FEATURE(WFxT)
|
||||
OAKNUT_CPU_FEATURE(EBF16)
|
||||
OAKNUT_CPU_FEATURE(SVE_EBF16)
|
||||
OAKNUT_CPU_FEATURE(CSSC)
|
||||
OAKNUT_CPU_FEATURE(RPRFM)
|
||||
OAKNUT_CPU_FEATURE(SVE2p1)
|
||||
OAKNUT_CPU_FEATURE(SME2)
|
||||
OAKNUT_CPU_FEATURE(SME2p1)
|
||||
OAKNUT_CPU_FEATURE(SME_I16I32)
|
||||
OAKNUT_CPU_FEATURE(SME_BI32I32)
|
||||
OAKNUT_CPU_FEATURE(SME_B16B16)
|
||||
OAKNUT_CPU_FEATURE(SME_F16F16)
|
||||
OAKNUT_CPU_FEATURE(MOPS)
|
||||
OAKNUT_CPU_FEATURE(HBC)
|
||||
25
tests/_feature_detect.cpp
Normal file
25
tests/_feature_detect.cpp
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "oaknut/feature_detection/feature_detection.hpp"
|
||||
|
||||
using namespace oaknut;
|
||||
|
||||
TEST_CASE("Print CPU features")
|
||||
{
|
||||
CpuFeatures features = detect_features();
|
||||
|
||||
std::fputs("CPU Features: ", stdout);
|
||||
|
||||
#define OAKNUT_CPU_FEATURE(name) \
|
||||
if (features.has(CpuFeature::name)) \
|
||||
std::fputs(#name " ", stdout);
|
||||
#include "oaknut/impl/cpu_feature.inc.hpp"
|
||||
#undef OAKNUT_CPU_FEATURE
|
||||
|
||||
std::fputs("\n", stdout);
|
||||
}
|
||||
Loading…
Reference in a new issue