2020-11-29 13:03:50 +01:00
|
|
|
#pragma once // No BOM and only basic ASCII in this header, or a neko will die
|
2013-10-06 14:07:42 +02:00
|
|
|
|
2020-12-12 13:01:29 +01:00
|
|
|
#include "util/types.hpp"
|
2015-08-17 17:14:29 +02:00
|
|
|
|
2020-12-13 14:34:45 +01:00
|
|
|
// 128-bit vector type
|
2016-02-01 22:55:43 +01:00
|
|
|
union alignas(16) v128
|
2014-09-06 18:30:13 +02:00
|
|
|
{
|
2020-06-20 13:12:19 +02:00
|
|
|
uchar _bytes[16];
|
|
|
|
|
char _chars[16];
|
2016-02-01 22:55:43 +01:00
|
|
|
|
2020-12-18 08:39:54 +01:00
|
|
|
template <typename T, usz N, usz M>
|
2016-02-01 22:55:43 +01:00
|
|
|
struct masked_array_t // array type accessed as (index ^ M)
|
2014-10-07 15:35:44 +02:00
|
|
|
{
|
2020-12-21 15:12:05 +01:00
|
|
|
T m_data[N];
|
2014-10-07 15:35:44 +02:00
|
|
|
|
2015-09-26 22:46:04 +02:00
|
|
|
public:
|
2020-12-18 08:39:54 +01:00
|
|
|
T& operator[](usz index)
|
2015-09-26 22:46:04 +02:00
|
|
|
{
|
2020-12-21 15:12:05 +01:00
|
|
|
return m_data[index ^ M];
|
2015-09-26 22:46:04 +02:00
|
|
|
}
|
2014-10-07 15:35:44 +02:00
|
|
|
|
2020-12-18 08:39:54 +01:00
|
|
|
const T& operator[](usz index) const
|
2015-09-26 22:46:04 +02:00
|
|
|
{
|
2020-12-21 15:12:05 +01:00
|
|
|
return m_data[index ^ M];
|
2015-09-26 22:46:04 +02:00
|
|
|
}
|
|
|
|
|
};
|
2014-10-07 15:35:44 +02:00
|
|
|
|
2020-12-18 08:39:54 +01:00
|
|
|
template <typename T, usz N = 16 / sizeof(T)>
|
2020-02-17 18:55:20 +01:00
|
|
|
using normal_array_t = masked_array_t<T, N, std::endian::little == std::endian::native ? 0 : N - 1>;
|
2020-12-18 08:39:54 +01:00
|
|
|
template <typename T, usz N = 16 / sizeof(T)>
|
2020-02-17 18:55:20 +01:00
|
|
|
using reversed_array_t = masked_array_t<T, N, std::endian::little == std::endian::native ? N - 1 : 0>;
|
2014-10-07 15:35:44 +02:00
|
|
|
|
2016-08-13 15:36:04 +02:00
|
|
|
normal_array_t<u64> _u64;
|
|
|
|
|
normal_array_t<s64> _s64;
|
2015-09-14 18:32:35 +02:00
|
|
|
reversed_array_t<u64> u64r;
|
|
|
|
|
reversed_array_t<s64> s64r;
|
2014-10-07 15:35:44 +02:00
|
|
|
|
2016-08-13 15:36:04 +02:00
|
|
|
normal_array_t<u32> _u32;
|
|
|
|
|
normal_array_t<s32> _s32;
|
2015-09-14 18:32:35 +02:00
|
|
|
reversed_array_t<u32> u32r;
|
|
|
|
|
reversed_array_t<s32> s32r;
|
2014-10-07 15:35:44 +02:00
|
|
|
|
2016-08-13 15:36:04 +02:00
|
|
|
normal_array_t<u16> _u16;
|
|
|
|
|
normal_array_t<s16> _s16;
|
2015-09-14 18:32:35 +02:00
|
|
|
reversed_array_t<u16> u16r;
|
|
|
|
|
reversed_array_t<s16> s16r;
|
2014-10-07 15:35:44 +02:00
|
|
|
|
2016-08-13 15:36:04 +02:00
|
|
|
normal_array_t<u8> _u8;
|
|
|
|
|
normal_array_t<s8> _s8;
|
|
|
|
|
reversed_array_t<u8> u8r;
|
|
|
|
|
reversed_array_t<s8> s8r;
|
2014-10-07 15:35:44 +02:00
|
|
|
|
2016-08-13 15:36:04 +02:00
|
|
|
normal_array_t<f32> _f;
|
|
|
|
|
normal_array_t<f64> _d;
|
2015-09-14 18:32:35 +02:00
|
|
|
reversed_array_t<f32> fr;
|
|
|
|
|
reversed_array_t<f64> dr;
|
2014-10-07 15:35:44 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
u128 _u;
|
|
|
|
|
//s128 _s;
|
|
|
|
|
|
|
|
|
|
#ifdef _MSC_VER
|
|
|
|
|
template <typename T>
|
|
|
|
|
struct opaque_wrapper
|
|
|
|
|
{
|
|
|
|
|
u128 m_data;
|
|
|
|
|
|
|
|
|
|
opaque_wrapper() = default;
|
|
|
|
|
|
|
|
|
|
opaque_wrapper(const T& value)
|
|
|
|
|
: m_data(std::bit_cast<u128>(value))
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
opaque_wrapper& operator=(const T& value)
|
|
|
|
|
{
|
|
|
|
|
m_data = std::bit_cast<u128>(value);
|
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
operator T() const
|
|
|
|
|
{
|
|
|
|
|
return std::bit_cast<T>(m_data);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
opaque_wrapper<__m128> vf;
|
|
|
|
|
opaque_wrapper<__m128i> vi;
|
|
|
|
|
opaque_wrapper<__m128d> vd;
|
|
|
|
|
#else
|
2014-10-07 23:37:04 +02:00
|
|
|
__m128 vf;
|
|
|
|
|
__m128i vi;
|
2015-03-21 15:29:33 +01:00
|
|
|
__m128d vd;
|
2020-12-21 15:12:05 +01:00
|
|
|
#endif
|
2014-09-06 18:30:13 +02:00
|
|
|
|
2016-02-01 22:55:43 +01:00
|
|
|
struct bit_array_128
|
2014-09-06 18:30:13 +02:00
|
|
|
{
|
2020-03-17 14:05:42 +01:00
|
|
|
char m_data[16];
|
2014-09-06 18:30:13 +02:00
|
|
|
|
|
|
|
|
public:
|
2020-12-21 15:12:05 +01:00
|
|
|
class bit_element;
|
2014-09-06 18:30:13 +02:00
|
|
|
|
2015-02-04 16:29:34 +01:00
|
|
|
// Index 0 returns the MSB and index 127 returns the LSB
|
2020-12-21 15:12:05 +01:00
|
|
|
[[deprecated]] bit_element operator[](u32 index);
|
2014-09-06 18:30:13 +02:00
|
|
|
|
2015-02-04 16:29:34 +01:00
|
|
|
// Index 0 returns the MSB and index 127 returns the LSB
|
2020-12-21 15:12:05 +01:00
|
|
|
[[deprecated]] bool operator[](u32 index) const;
|
2016-08-13 15:36:04 +02:00
|
|
|
} _bit;
|
2014-09-06 18:30:13 +02:00
|
|
|
|
2015-08-06 15:31:13 +02:00
|
|
|
static v128 from64(u64 _0, u64 _1 = 0)
|
2014-09-06 18:30:13 +02:00
|
|
|
{
|
2015-08-06 15:31:13 +02:00
|
|
|
v128 ret;
|
2014-09-15 00:17:24 +02:00
|
|
|
ret._u64[0] = _0;
|
|
|
|
|
ret._u64[1] = _1;
|
2014-09-06 18:30:13 +02:00
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-06 15:31:13 +02:00
|
|
|
static v128 from64r(u64 _1, u64 _0 = 0)
|
2014-10-07 15:35:44 +02:00
|
|
|
{
|
|
|
|
|
return from64(_0, _1);
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-06 15:31:13 +02:00
|
|
|
static v128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0)
|
2014-09-06 18:30:13 +02:00
|
|
|
{
|
2015-08-06 15:31:13 +02:00
|
|
|
v128 ret;
|
2014-09-15 00:17:24 +02:00
|
|
|
ret._u32[0] = _0;
|
|
|
|
|
ret._u32[1] = _1;
|
|
|
|
|
ret._u32[2] = _2;
|
|
|
|
|
ret._u32[3] = _3;
|
2014-09-06 18:30:13 +02:00
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-06 15:31:13 +02:00
|
|
|
static v128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0)
|
2014-10-02 12:29:20 +02:00
|
|
|
{
|
2014-10-07 15:35:44 +02:00
|
|
|
return from32(_0, _1, _2, _3);
|
2014-10-02 12:29:20 +02:00
|
|
|
}
|
|
|
|
|
|
2015-08-06 15:31:13 +02:00
|
|
|
static v128 from32p(u32 value)
|
2014-10-02 12:29:20 +02:00
|
|
|
{
|
2015-08-06 15:31:13 +02:00
|
|
|
v128 ret;
|
2020-12-21 15:12:05 +01:00
|
|
|
ret._u32[0] = value;
|
|
|
|
|
ret._u32[1] = value;
|
|
|
|
|
ret._u32[2] = value;
|
|
|
|
|
ret._u32[3] = value;
|
2015-03-29 13:00:10 +02:00
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-06 15:31:13 +02:00
|
|
|
static v128 from16p(u16 value)
|
2015-03-29 13:00:10 +02:00
|
|
|
{
|
2015-08-06 15:31:13 +02:00
|
|
|
v128 ret;
|
2020-12-21 15:12:05 +01:00
|
|
|
ret._u16[0] = value;
|
|
|
|
|
ret._u16[1] = value;
|
|
|
|
|
ret._u16[2] = value;
|
|
|
|
|
ret._u16[3] = value;
|
|
|
|
|
ret._u16[4] = value;
|
|
|
|
|
ret._u16[5] = value;
|
|
|
|
|
ret._u16[6] = value;
|
|
|
|
|
ret._u16[7] = value;
|
2014-10-07 23:37:04 +02:00
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-06 15:31:13 +02:00
|
|
|
static v128 from8p(u8 value)
|
2014-10-07 23:37:04 +02:00
|
|
|
{
|
2015-08-06 15:31:13 +02:00
|
|
|
v128 ret;
|
2020-12-21 15:12:05 +01:00
|
|
|
std::memset(&ret, value, sizeof(ret));
|
2014-10-02 12:29:20 +02:00
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 fromV(const __m128i& value);
|
2014-09-06 18:30:13 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 fromF(const __m128& value);
|
2014-10-07 23:37:04 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 fromD(const __m128d& value);
|
2015-03-21 15:29:33 +01:00
|
|
|
|
2020-04-10 09:05:23 +02:00
|
|
|
// Unaligned load with optional index offset
|
2020-12-18 08:39:54 +01:00
|
|
|
static v128 loadu(const void* ptr, usz index = 0)
|
2020-04-10 09:05:23 +02:00
|
|
|
{
|
|
|
|
|
v128 ret;
|
|
|
|
|
std::memcpy(&ret, static_cast<const u8*>(ptr) + index * sizeof(v128), sizeof(v128));
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Unaligned store with optional index offset
|
2020-12-18 08:39:54 +01:00
|
|
|
static void storeu(v128 value, void* ptr, usz index = 0)
|
2020-04-10 09:05:23 +02:00
|
|
|
{
|
|
|
|
|
std::memcpy(static_cast<u8*>(ptr) + index * sizeof(v128), &value, sizeof(v128));
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 add8(const v128& left, const v128& right);
|
2014-10-07 23:37:04 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 add16(const v128& left, const v128& right);
|
2015-03-21 00:36:05 +01:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 add32(const v128& left, const v128& right);
|
2015-03-21 00:36:05 +01:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 addfs(const v128& left, const v128& right);
|
2015-03-21 00:36:05 +01:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 addfd(const v128& left, const v128& right);
|
2015-03-21 15:29:33 +01:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 sub8(const v128& left, const v128& right);
|
2014-10-07 23:37:04 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 sub16(const v128& left, const v128& right);
|
2015-03-21 00:36:05 +01:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 sub32(const v128& left, const v128& right);
|
2015-03-21 00:36:05 +01:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 subfs(const v128& left, const v128& right);
|
2015-03-21 00:36:05 +01:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 subfd(const v128& left, const v128& right);
|
2015-03-21 15:29:33 +01:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 maxu8(const v128& left, const v128& right);
|
2015-03-21 00:36:05 +01:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 minu8(const v128& left, const v128& right);
|
2014-10-07 23:37:04 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 eq8(const v128& left, const v128& right);
|
2020-05-14 14:21:15 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 eq16(const v128& left, const v128& right);
|
2020-05-14 14:21:15 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 eq32(const v128& left, const v128& right);
|
2020-06-05 16:51:34 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 eq32f(const v128& left, const v128& right);
|
2020-06-05 16:51:34 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 fma32f(v128 a, const v128& b, const v128& c);
|
2020-06-05 16:51:34 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
bool operator==(const v128& right) const;
|
2014-09-06 18:30:13 +02:00
|
|
|
|
2020-12-21 15:12:05 +01:00
|
|
|
bool operator!=(const v128& right) const;
|
2014-09-06 18:30:13 +02:00
|
|
|
|
2014-10-07 23:37:04 +02:00
|
|
|
// result = (~left) & (right)
|
2020-12-21 15:12:05 +01:00
|
|
|
static inline v128 andnot(const v128& left, const v128& right);
|
2014-10-07 23:37:04 +02:00
|
|
|
|
2014-09-06 18:30:13 +02:00
|
|
|
void clear()
|
|
|
|
|
{
|
2020-06-15 16:24:04 +02:00
|
|
|
*this = {};
|
2014-09-06 18:30:13 +02:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2020-12-18 08:39:54 +01:00
|
|
|
template <typename T, usz N, usz M>
|
2017-04-19 13:31:56 +02:00
|
|
|
struct offset32_array<v128::masked_array_t<T, N, M>>
|
|
|
|
|
{
|
|
|
|
|
template <typename Arg>
|
|
|
|
|
static inline u32 index32(const Arg& arg)
|
|
|
|
|
{
|
2018-09-03 17:46:14 +02:00
|
|
|
return u32{sizeof(T)} * (static_cast<u32>(arg) ^ static_cast<u32>(M));
|
2017-04-19 13:31:56 +02:00
|
|
|
}
|
|
|
|
|
};
|