2014-07-11 07:51:27 +02:00
|
|
|
/**
|
|
|
|
|
******************************************************************************
|
|
|
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
|
|
|
******************************************************************************
|
|
|
|
|
* Copyright 2014 Ben Vanik. All rights reserved. *
|
|
|
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
|
|
|
******************************************************************************
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#ifndef POLY_MATH_H_
|
|
|
|
|
#define POLY_MATH_H_
|
|
|
|
|
|
|
|
|
|
#include <xmmintrin.h>
|
|
|
|
|
|
|
|
|
|
#include <cstdint>
|
|
|
|
|
#include <cstring>
|
|
|
|
|
|
2014-07-12 03:03:35 +02:00
|
|
|
#include <poly/config.h>
|
|
|
|
|
#include <poly/platform.h>
|
|
|
|
|
|
2014-07-11 07:51:27 +02:00
|
|
|
namespace poly {
|
|
|
|
|
|
|
|
|
|
// lzcnt instruction, typed for integers of all sizes.
|
|
|
|
|
// The number of leading zero bits in the value parameter. If value is zero, the
|
|
|
|
|
// return value is the size of the input operand (8, 16, 32, or 64). If the most
|
|
|
|
|
// significant bit of value is one, the return value is zero.
|
|
|
|
|
#if XE_COMPILER_MSVC
|
2014-07-13 01:51:52 +02:00
|
|
|
inline uint8_t lzcnt(uint8_t v) {
|
|
|
|
|
return static_cast<uint8_t>(__lzcnt16(v) - 8);
|
|
|
|
|
}
|
2014-07-12 03:03:35 +02:00
|
|
|
inline uint8_t lzcnt(uint16_t v) { return static_cast<uint8_t>(__lzcnt16(v)); }
|
|
|
|
|
inline uint8_t lzcnt(uint32_t v) { return static_cast<uint8_t>(__lzcnt(v)); }
|
|
|
|
|
inline uint8_t lzcnt(uint64_t v) { return static_cast<uint8_t>(__lzcnt64(v)); }
|
2014-07-11 07:51:27 +02:00
|
|
|
#else
|
2014-07-13 01:51:52 +02:00
|
|
|
inline uint8_t lzcnt(uint8_t v) {
|
|
|
|
|
return static_cast<uint8_t>(__builtin_clzs(v) - 8);
|
|
|
|
|
}
|
|
|
|
|
inline uint8_t lzcnt(uint16_t v) {
|
|
|
|
|
return static_cast<uint8_t>(__builtin_clzs(v));
|
|
|
|
|
}
|
|
|
|
|
inline uint8_t lzcnt(uint32_t v) {
|
|
|
|
|
return static_cast<uint8_t>(__builtin_clz(v));
|
|
|
|
|
}
|
|
|
|
|
inline uint8_t lzcnt(uint64_t v) {
|
|
|
|
|
return static_cast<uint8_t>(__builtin_clzll(v));
|
|
|
|
|
}
|
2014-07-11 07:51:27 +02:00
|
|
|
#endif // XE_COMPILER_MSVC
|
2014-07-12 03:03:35 +02:00
|
|
|
inline uint8_t lzcnt(int8_t v) { return lzcnt(static_cast<uint8_t>(v)); }
|
|
|
|
|
inline uint8_t lzcnt(int16_t v) { return lzcnt(static_cast<uint16_t>(v)); }
|
|
|
|
|
inline uint8_t lzcnt(int32_t v) { return lzcnt(static_cast<uint32_t>(v)); }
|
|
|
|
|
inline uint8_t lzcnt(int64_t v) { return lzcnt(static_cast<uint64_t>(v)); }
|
2014-07-11 07:51:27 +02:00
|
|
|
|
|
|
|
|
// BitScanForward (bsf).
|
|
|
|
|
// Search the value from least significant bit (LSB) to the most significant bit
|
|
|
|
|
// (MSB) for a set bit (1).
|
|
|
|
|
// Returns false if no bits are set and the output index is invalid.
|
|
|
|
|
#if XE_COMPILER_MSVC
|
2014-07-12 03:03:35 +02:00
|
|
|
inline bool bit_scan_forward(uint32_t v, uint32_t* out_first_set_index) {
|
|
|
|
|
return _BitScanForward(reinterpret_cast<DWORD*>(out_first_set_index), v) != 0;
|
2014-07-11 07:51:27 +02:00
|
|
|
}
|
2014-07-12 03:03:35 +02:00
|
|
|
inline bool bit_scan_forward(uint64_t v, uint32_t* out_first_set_index) {
|
2014-07-13 01:51:52 +02:00
|
|
|
return _BitScanForward64(reinterpret_cast<DWORD*>(out_first_set_index), v) !=
|
|
|
|
|
0;
|
2014-07-11 07:51:27 +02:00
|
|
|
}
|
|
|
|
|
#else
|
2014-07-12 03:03:35 +02:00
|
|
|
inline bool bit_scan_forward(uint32_t v, uint32_t* out_first_set_index) {
|
2014-07-11 07:51:27 +02:00
|
|
|
int i = ffs(v);
|
|
|
|
|
*out_first_set_index = i;
|
|
|
|
|
return i != 0;
|
|
|
|
|
}
|
2014-07-12 03:03:35 +02:00
|
|
|
inline bool bit_scan_forward(uint64_t v, uint32_t* out_first_set_index) {
|
2014-07-11 07:51:27 +02:00
|
|
|
int i = ffsll(v);
|
|
|
|
|
*out_first_set_index = i;
|
|
|
|
|
return i != 0;
|
|
|
|
|
}
|
|
|
|
|
#endif // XE_COMPILER_MSVC
|
2014-07-12 03:03:35 +02:00
|
|
|
inline bool bit_scan_forward(int32_t v, uint32_t* out_first_set_index) {
|
2014-07-11 07:51:27 +02:00
|
|
|
return bit_scan_forward(static_cast<uint32_t>(v), out_first_set_index);
|
|
|
|
|
}
|
2014-07-12 03:03:35 +02:00
|
|
|
inline bool bit_scan_forward(int64_t v, uint32_t* out_first_set_index) {
|
2014-07-11 07:51:27 +02:00
|
|
|
return bit_scan_forward(static_cast<uint64_t>(v), out_first_set_index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Utilities for SSE values.
|
|
|
|
|
template <int N>
|
|
|
|
|
float m128_f32(const __m128& v) {
|
|
|
|
|
float ret;
|
|
|
|
|
_mm_store_ss(&ret, _mm_shuffle_ps(v, v, _MM_SHUFFLE(N, N, N, N)));
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
template <int N>
|
|
|
|
|
int32_t m128_i32(const __m128& v) {
|
|
|
|
|
union {
|
|
|
|
|
float f;
|
|
|
|
|
int32_t i;
|
|
|
|
|
} ret;
|
|
|
|
|
_mm_store_ss(&ret.f, _mm_shuffle_ps(v, v, _MM_SHUFFLE(N, N, N, N)));
|
|
|
|
|
return ret.i;
|
|
|
|
|
}
|
|
|
|
|
template <int N>
|
2014-07-12 03:03:35 +02:00
|
|
|
double m128_f64(const __m128d& v) {
|
2014-07-11 07:51:27 +02:00
|
|
|
double ret;
|
2014-07-12 03:03:35 +02:00
|
|
|
_mm_store_sd(&ret, _mm_shuffle_pd(v, v, _MM_SHUFFLE2(N, N)));
|
2014-07-11 07:51:27 +02:00
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
template <int N>
|
2014-07-12 03:03:35 +02:00
|
|
|
double m128_f64(const __m128& v) {
|
|
|
|
|
return m128_f64<N>(_mm_castps_pd(v));
|
|
|
|
|
}
|
|
|
|
|
template <int N>
|
|
|
|
|
int64_t m128_i64(const __m128d& v) {
|
2014-07-11 07:51:27 +02:00
|
|
|
union {
|
|
|
|
|
double f;
|
|
|
|
|
int64_t i;
|
|
|
|
|
} ret;
|
2014-07-12 03:03:35 +02:00
|
|
|
_mm_store_sd(&ret.f, _mm_shuffle_pd(v, v, _MM_SHUFFLE2(N, N)));
|
2014-07-11 07:51:27 +02:00
|
|
|
return ret.i;
|
|
|
|
|
}
|
2014-07-12 03:03:35 +02:00
|
|
|
template <int N>
|
|
|
|
|
int64_t m128_i64(const __m128& v) {
|
|
|
|
|
return m128_i64<N>(_mm_castps_pd(v));
|
|
|
|
|
}
|
2014-07-11 07:51:27 +02:00
|
|
|
|
|
|
|
|
} // namespace poly
|
|
|
|
|
|
|
|
|
|
#endif // POLY_MATH_H_
|