#pragma once #ifdef LLVM_AVAILABLE #include "restore_new.h" #ifdef _MSC_VER #pragma warning(push, 0) #endif #include "llvm/IR/LLVMContext.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #ifdef _MSC_VER #pragma warning(pop) #endif #include "define_new_memleakdetect.h" #include "../Utilities/types.h" #include "../Utilities/StrFmt.h" #include "../Utilities/BEType.h" #include "../Utilities/BitField.h" #include #include #include #include #include #include enum class i2 : char { }; enum class i4 : char { }; template struct llvm_value_t { static_assert(std::is_same::value, "llvm_value_t<> error: unknown type"); using type = void; using base = llvm_value_t; static constexpr uint esize = 0; static constexpr bool is_int = false; static constexpr bool is_sint = false; static constexpr bool is_uint = false; static constexpr bool is_float = false; static constexpr uint is_vector = false; static constexpr uint is_pointer = false; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getVoidTy(context); } llvm::Value* eval(llvm::IRBuilder<>* ir) const { return value; } llvm::Value* value; // llvm_value_t() = default; // llvm_value_t(llvm::Value* value) // : value(value) // { // } }; template <> struct llvm_value_t : llvm_value_t { using type = bool; using base = llvm_value_t; using base::base; static constexpr uint esize = 1; static constexpr bool is_int = true; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getInt1Ty(context); } }; template <> struct llvm_value_t : llvm_value_t { using type = i2; using base = llvm_value_t; using base::base; static constexpr uint esize = 2; static constexpr bool is_int = true; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getIntNTy(context, 2); } }; template <> struct llvm_value_t : llvm_value_t { using type = i4; using base = llvm_value_t; using base::base; static constexpr uint esize = 4; static constexpr bool is_int = true; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getIntNTy(context, 4); } }; template <> struct llvm_value_t : llvm_value_t { using type = char; using base = llvm_value_t; using base::base; static constexpr uint esize = 8; static constexpr bool is_int = true; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getInt8Ty(context); } }; template <> struct llvm_value_t : llvm_value_t { using type = s8; using base = llvm_value_t; using base::base; static constexpr bool is_sint = true; }; template <> struct llvm_value_t : llvm_value_t { using type = u8; using base = llvm_value_t; using base::base; static constexpr bool is_uint = true; }; template <> struct llvm_value_t : llvm_value_t { using type = s16; using base = llvm_value_t; using base::base; static constexpr uint esize = 16; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getInt16Ty(context); } }; template <> struct llvm_value_t : llvm_value_t { using type = u16; using base = llvm_value_t; using base::base; static constexpr bool is_sint = false; static constexpr bool is_uint = true; }; template <> struct llvm_value_t : llvm_value_t { using type = s32; using base = llvm_value_t; using base::base; static constexpr uint esize = 32; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getInt32Ty(context); } }; template <> struct llvm_value_t : llvm_value_t { using type = u32; using base = llvm_value_t; using base::base; static constexpr bool is_sint = false; static constexpr bool is_uint = true; }; template <> struct llvm_value_t : llvm_value_t { using type = s64; using base = llvm_value_t; using base::base; static constexpr uint esize = 64; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getInt64Ty(context); } }; template <> struct llvm_value_t : llvm_value_t { using type = u64; using base = llvm_value_t; using base::base; static constexpr bool is_sint = false; static constexpr bool is_uint = true; }; template <> struct llvm_value_t : llvm_value_t { using type = s128; using base = llvm_value_t; using base::base; static constexpr uint esize = 128; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getIntNTy(context, 128); } }; template <> struct llvm_value_t : llvm_value_t { using type = u128; using base = llvm_value_t; using base::base; static constexpr bool is_sint = false; static constexpr bool is_uint = true; }; template <> struct llvm_value_t : llvm_value_t { using type = f32; using base = llvm_value_t; using base::base; static constexpr uint esize = 32; static constexpr bool is_float = true; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getFloatTy(context); } }; template <> struct llvm_value_t : llvm_value_t { using type = f64; using base = llvm_value_t; using base::base; static constexpr uint esize = 64; static constexpr bool is_float = true; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::Type::getDoubleTy(context); } }; template struct llvm_value_t : llvm_value_t { static_assert(!std::is_void::value, "llvm_value_t<> error: invalid pointer to void type"); using type = T*; using base = llvm_value_t; using base::base; static constexpr uint is_pointer = llvm_value_t::is_pointer + 1; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm_value_t::get_type(context)->getPointerTo(); } }; template struct llvm_value_t : llvm_value_t { static_assert(!llvm_value_t::is_vector, "llvm_value_t<> error: invalid multidimensional vector"); static_assert(!llvm_value_t::is_pointer, "llvm_value_t<>: vector of pointers is not allowed"); using type = T[N]; using base = llvm_value_t; using base::base; static constexpr uint is_vector = N; static constexpr uint is_pointer = 0; static llvm::Type* get_type(llvm::LLVMContext& context) { return llvm::VectorType::get(llvm_value_t::get_type(context), N); } }; template struct llvm_add_t { using type = T; A1 a1; A2 a2; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_add_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_int) { return ir->CreateAdd(v1, v2); } if (llvm_value_t::is_float) { return ir->CreateFAdd(v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_add_t operator +(T1 a1, T2 a2) { return {a1, a2}; } template struct llvm_add_const_t { using type = T; A1 a1; u64 c; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_add_const_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { return ir->CreateAdd(a1.eval(ir), llvm::ConstantInt::get(llvm_value_t::get_type(ir->getContext()), c, llvm_value_t::is_sint)); } }; template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_add_const_t operator +(T1 a1, u64 c) { return {a1, c}; } template struct llvm_sub_t { using type = T; A1 a1; A2 a2; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_sub_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_int) { return ir->CreateSub(v1, v2); } if (llvm_value_t::is_float) { return ir->CreateFSub(v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_sub_t operator -(T1 a1, T2 a2) { return {a1, a2}; } template struct llvm_sub_const_t { using type = T; A1 a1; u64 c; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_sub_const_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { return ir->CreateSub(a1.eval(ir), llvm::ConstantInt::get(llvm_value_t::get_type(ir->getContext()), c, llvm_value_t::is_sint)); } }; template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_sub_const_t operator -(T1 a1, u64 c) { return {a1, c}; } template struct llvm_const_sub_t { using type = T; A1 a1; u64 c; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_const_sub_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { return ir->CreateSub(llvm::ConstantInt::get(llvm_value_t::get_type(ir->getContext()), c, llvm_value_t::is_sint), a1.eval(ir)); } }; template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_const_sub_t operator -(u64 c, T1 a1) { return {a1, c}; } template struct llvm_mul_t { using type = T; A1 a1; A2 a2; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_mul_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_int) { return ir->CreateMul(v1, v2); } if (llvm_value_t::is_float) { return ir->CreateFMul(v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_mul_t operator *(T1 a1, T2 a2) { return {a1, a2}; } template struct llvm_div_t { using type = T; A1 a1; A2 a2; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_div_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_sint) { return ir->CreateSDiv(v1, v2); } if (llvm_value_t::is_uint) { return ir->CreateUDiv(v1, v2); } if (llvm_value_t::is_float) { return ir->CreateFDiv(v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_div_t operator /(T1 a1, T2 a2) { return {a1, a2}; } template struct llvm_neg_t { using type = T; A1 a1; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || llvm_value_t::is_float, "llvm_neg_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); if (llvm_value_t::is_int) { return ir->CreateNeg(v1); } if (llvm_value_t::is_float) { return ir->CreateFNeg(v1); } } }; template ().eval(0)), typename = std::enable_if_t<(llvm_value_t::esize > 1)>> inline llvm_neg_t operator -(T1 a1) { return {a1}; } // Constant int helper struct llvm_int_t { u64 value; u64 eval(llvm::IRBuilder<>*) const { return value; } }; template struct llvm_shl_t { using type = T; A1 a1; A2 a2; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_shl_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_sint) { return ir->CreateShl(v1, v2); } if (llvm_value_t::is_uint) { return ir->CreateShl(v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_shl_t operator <<(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_shl_t operator <<(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template struct llvm_shr_t { using type = T; A1 a1; A2 a2; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_shr_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_sint) { return ir->CreateAShr(v1, v2); } if (llvm_value_t::is_uint) { return ir->CreateLShr(v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_shr_t operator >>(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_shr_t operator >>(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template struct llvm_and_t { using type = T; A1 a1; A2 a2; static_assert(llvm_value_t::is_int, "llvm_and_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_int) { return ir->CreateAnd(v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_and_t operator &(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_and_t operator &(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template struct llvm_or_t { using type = T; A1 a1; A2 a2; static_assert(llvm_value_t::is_int, "llvm_or_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_int) { return ir->CreateOr(v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_or_t operator |(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_or_t operator |(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template struct llvm_xor_t { using type = T; A1 a1; A2 a2; static_assert(llvm_value_t::is_int, "llvm_xor_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_int) { return ir->CreateXor(v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_xor_t operator ^(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_xor_t operator ^(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template struct llvm_not_t { using type = T; A1 a1; static_assert(llvm_value_t::is_int, "llvm_not_t<>: invalid type"); llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); if (llvm_value_t::is_int) { return ir->CreateNot(v1); } } }; template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_not_t operator ~(T1 a1) { return {a1}; } template struct llvm_icmp_t { using type = std::conditional_t::is_vector != 0, bool[llvm_value_t::is_vector], bool>; A1 a1; A2 a2; static_assert(llvm_value_t::is_int, "llvm_eq_t<>: invalid type"); // Convert unsigned comparison predicate to signed if necessary static constexpr llvm::CmpInst::Predicate pred = llvm_value_t::is_uint ? UPred : UPred == llvm::ICmpInst::ICMP_UGT ? llvm::ICmpInst::ICMP_SGT : UPred == llvm::ICmpInst::ICMP_UGE ? llvm::ICmpInst::ICMP_SGE : UPred == llvm::ICmpInst::ICMP_ULT ? llvm::ICmpInst::ICMP_SLT : UPred == llvm::ICmpInst::ICMP_ULE ? llvm::ICmpInst::ICMP_SLE : UPred; static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint || UPred == llvm::ICmpInst::ICMP_EQ || UPred == llvm::ICmpInst::ICMP_NE, "llvm_eq_t<>: invalid type(II)"); static inline llvm::Value* icmp(llvm::IRBuilder<>* ir, llvm::Value* lhs, llvm::Value* rhs) { return ir->CreateICmp(pred, lhs, rhs); } static inline llvm::Value* icmp(llvm::IRBuilder<>* ir, llvm::Value* lhs, u64 value) { return ir->CreateICmp(pred, lhs, llvm::ConstantInt::get(llvm_value_t::get_type(ir->getContext()), value, llvm_value_t::is_sint)); } llvm::Value* eval(llvm::IRBuilder<>* ir) const { const auto v1 = a1.eval(ir); const auto v2 = a2.eval(ir); if (llvm_value_t::is_int) { return icmp(ir, v1, v2); } } }; template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_icmp_t operator ==(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_icmp_t operator ==(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_icmp_t operator !=(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_icmp_t operator !=(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_icmp_t operator >(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_icmp_t operator >(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_icmp_t operator >=(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_icmp_t operator >=(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_icmp_t operator <(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_icmp_t operator <(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } template ().eval(0)), typename = decltype(std::declval().eval(0)), typename = std::enable_if_t::value>> inline llvm_icmp_t operator <=(T1 a1, T2 a2) { return {a1, a2}; } template ().eval(0)), typename = std::enable_if_t::is_int>> inline llvm_icmp_t operator <=(T1 a1, u64 a2) { return {a1, llvm_int_t{a2}}; } class cpu_translator { protected: cpu_translator(llvm::Module* module, bool is_be); // LLVM context std::reference_wrapper m_context; // Module to which all generated code is output to llvm::Module* m_module; // Endianness, affects vector element numbering (TODO) bool m_is_be; // Allow PSHUFB intrinsic bool m_use_ssse3; // IR builder llvm::IRBuilder<>* m_ir; public: // Convert a C++ type to an LLVM type (TODO: remove) template llvm::Type* GetType() { return llvm_value_t::get_type(m_context); } template llvm::Type* get_type() { return llvm_value_t::get_type(m_context); } template using value_t = llvm_value_t; template value_t value(llvm::Value* value) { if (!value || value->getType() != get_type()) { fmt::throw_exception("cpu_translator::value<>(): invalid value type"); } value_t result; result.value = value; return result; } template auto eval(T expr) { value_t result; result.value = expr.eval(m_ir); return result; } template value_t bitcast(T2 expr) { value_t result; result.value = m_ir->CreateBitCast(expr.eval(m_ir), result.get_type(m_context)); return result; } template value_t trunc(T2 expr) { value_t result; result.value = m_ir->CreateTrunc(expr.eval(m_ir), result.get_type(m_context)); return result; } template value_t sext(T2 expr) { value_t result; result.value = m_ir->CreateSExt(expr.eval(m_ir), result.get_type(m_context)); return result; } template value_t zext(T2 expr) { value_t result; result.value = m_ir->CreateZExt(expr.eval(m_ir), result.get_type(m_context)); return result; } // Get signed addition overflow into the sign bit (s = a + b) template static inline auto scarry(T a, T b, T s) { return (b ^ s) & ~(a ^ b); } // Get signed subtraction overflow into the sign bit (d = a - b) template static inline auto sborrow(T a, T b, T d) { return (a ^ b) & (a ^ d); } // Bitwise select (c ? a : b) template static inline auto merge(T c, T a, T b) { return (a & c) | (b & ~c); } // Rotate left template static inline auto rol(T a, T b) { static constexpr u64 mask = value_t::esize - 1; return a << (b & mask) | a >> (-b & mask); } // Rotate left template static inline auto rol(T a, u64 b) { static constexpr u64 mask = value_t::esize - 1; return a << (b & mask) | a >> ((0 - b) & mask); } // Average: (a + b + 1) >> 1 template inline auto avg(T a, T b) { //return (a >> 1) + (b >> 1) + ((a | b) & 1); value_t result; llvm::Instruction::CastOps cast_op = llvm::Instruction::BitCast; if (result.is_sint) cast_op = llvm::Instruction::SExt; if (result.is_uint) cast_op = llvm::Instruction::ZExt; llvm::Type* cast_t = m_ir->getIntNTy(result.esize * 2); if (result.is_vector != 0) cast_t = llvm::VectorType::get(cast_t, result.is_vector); const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_t); const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_t); const auto cxt = llvm::ConstantInt::get(cast_t, 1, false); const auto abc = m_ir->CreateAdd(m_ir->CreateAdd(axt, bxt), cxt); result.value = m_ir->CreateTrunc(m_ir->CreateLShr(abc, 1), result.get_type(m_context)); return result; } // Select (c ? a : b) template auto select(T2 c, T a, T b) { static_assert(value_t::esize == 1, "select: expected bool type (first argument)"); static_assert(value_t::is_vector == value_t::is_vector, "select: incompatible arguments (vectors)"); T result; result.value = m_ir->CreateSelect(c.eval(m_ir), a.eval(m_ir), b.eval(m_ir)); return result; } template auto insert(T v, u64 i, E e) { value_t result; result.value = m_ir->CreateInsertElement(v.eval(m_ir), e.eval(m_ir), i); return result; } template auto extract(T v, u64 i) { typename value_t::base result; result.value = m_ir->CreateExtractElement(v.eval(m_ir), i); return result; } template auto splat(u64 c) { value_t result; result.value = llvm::ConstantInt::get(result.get_type(m_context), c, result.is_sint); return result; } template auto fsplat(f64 c) { value_t result; result.value = llvm::ConstantFP::get(result.get_type(m_context), c); return result; } // Min template auto min(T a, T b) { T result; result.value = m_ir->CreateSelect((a > b).eval(m_ir), b.eval(m_ir), a.eval(m_ir)); return result; } // Max template auto max(T a, T b) { T result; result.value = m_ir->CreateSelect((a > b).eval(m_ir), a.eval(m_ir), b.eval(m_ir)); return result; } // Shuffle single vector using all zeros second vector of the same size template auto zshuffle(T1 a, Args... args) { static_assert(sizeof(T) / sizeof(std::remove_extent_t) == sizeof...(Args), "zshuffle: unexpected result type"); const u32 values[]{static_cast(args)...}; value_t result; result.value = a.eval(m_ir); result.value = m_ir->CreateShuffleVector(result.value, llvm::ConstantInt::get(result.value->getType(), 0), values); return result; } template auto shuffle2(T1 a, T2 b, Args... args) { static_assert(sizeof(T) / sizeof(std::remove_extent_t) == sizeof...(Args), "shuffle2: unexpected result type"); const u32 values[]{static_cast(args)...}; value_t result; result.value = a.eval(m_ir); result.value = m_ir->CreateShuffleVector(result.value, b.eval(m_ir), values); return result; } template auto build(Args... args) { using value_type = std::remove_extent_t; const value_type values[]{static_cast(args)...}; static_assert(sizeof(T) / sizeof(value_type) == sizeof...(Args), "build: unexpected number of arguments"); value_t result; result.value = llvm::ConstantDataVector::get(m_context, values); return result; } template llvm::Function* get_intrinsic(llvm::Intrinsic::ID id) { const auto module = m_ir->GetInsertBlock()->getParent()->getParent(); return llvm::Intrinsic::getDeclaration(module, id, {get_type()...}); } template auto ctlz(T a) { value_t result; result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::ctlz), {a.eval(m_ir), m_ir->getFalse()}); return result; } template auto ctpop(T a) { value_t result; result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::ctpop), {a.eval(m_ir)}); return result; } template auto sqrt(T a) { value_t result; result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::sqrt), {a.eval(m_ir)}); return result; } template auto fabs(T a) { value_t result; result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fabs), {a.eval(m_ir)}); return result; } template auto fcmp(T a, T b) { value_t::is_vector != 0, bool[llvm_value_t::is_vector], bool>> result; result.value = m_ir->CreateFCmp(FPred, a.eval(m_ir), b.eval(m_ir)); return result; } // Opportunistic hardware FMA, can be used if results are identical for all possible input values template auto fmuladd(T a, T b, T c) { value_t result; const auto av = a.eval(m_ir); const auto bv = b.eval(m_ir); const auto cv = c.eval(m_ir); result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fmuladd), {av, bv, cv}); return result; } template value_t pshufb(T1 a, T2 b) { value_t result; const auto data0 = a.eval(m_ir); const auto index = b.eval(m_ir); const auto zeros = llvm::ConstantAggregateZero::get(get_type()); if (auto c = llvm::dyn_cast(index)) { // Convert PSHUFB index back to LLVM vector shuffle mask v128 mask{}; const auto cv = llvm::dyn_cast(c); if (cv) { for (u32 i = 0; i < 16; i++) { const u64 b = cv->getElementAsInteger(i); mask._u8[i] = b < 128 ? b % 16 : 16; } } if (cv || llvm::isa(c)) { result.value = llvm::ConstantDataVector::get(m_context, llvm::makeArrayRef((const u8*)mask._bytes, 16)); result.value = m_ir->CreateZExt(result.value, get_type()); result.value = m_ir->CreateShuffleVector(data0, zeros, result.value); return result; } } if (m_use_ssse3) { result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_ssse3_pshuf_b_128), {data0, index}); } else { // Emulate PSHUFB (TODO) const auto mask = m_ir->CreateAnd(index, 0xf); const auto loop = llvm::BasicBlock::Create(m_context, "", m_ir->GetInsertBlock()->getParent()); const auto next = llvm::BasicBlock::Create(m_context, "", m_ir->GetInsertBlock()->getParent()); const auto prev = m_ir->GetInsertBlock(); m_ir->CreateBr(loop); m_ir->SetInsertPoint(loop); const auto i = m_ir->CreatePHI(get_type(), 2); const auto v = m_ir->CreatePHI(get_type(), 2); i->addIncoming(m_ir->getInt32(0), prev); i->addIncoming(m_ir->CreateAdd(i, m_ir->getInt32(1)), loop); v->addIncoming(zeros, prev); result.value = m_ir->CreateInsertElement(v, m_ir->CreateExtractElement(data0, m_ir->CreateExtractElement(mask, i)), i); v->addIncoming(result.value, loop); m_ir->CreateCondBr(m_ir->CreateICmpULT(i, m_ir->getInt32(16)), loop, next); m_ir->SetInsertPoint(next); result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(index, zeros), zeros, result.value); } return result; } template R get_const_vector(llvm::Constant*, u32 a, u32 b); template llvm::Constant* make_const_vector(T, llvm::Type*); }; #endif