From e10756d1509635acb088b70d7a12fac2f2f084fd Mon Sep 17 00:00:00 2001 From: Max-Tepafray <134654211+Max-Tepafray@users.noreply.github.com> Date: Thu, 25 May 2023 17:27:22 -0500 Subject: [PATCH] [x64] Add AVX512 optimizations for PERMUTE_V128 --- src/xenia/cpu/backend/x64/x64_seq_vector.cc | 28 --------------------- 1 file changed, 28 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc index 75f162559..a0ac73235 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc @@ -679,19 +679,6 @@ struct VECTOR_SUB // src1/src2. e.vpsubd(e.xmm1, src1, src2); - if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { - // If the result is less or equal to the first operand then - // we did not underflow - Opmask not_underflow = e.k1; - // _mm_cmple_epu32_mask - e.vpcmpud(not_underflow, e.xmm1, src1, 0x2); - - // Copy over values that did not underflow, write zero - // everywhere else - e.vmovdqa32(dest | not_underflow | e.T_z, e.xmm1); - return; - } - // If result is greater than either of the inputs, we've // underflowed (only need to check one input) // if (res > src1) then underflowed @@ -703,21 +690,6 @@ struct VECTOR_SUB } else { e.vpsubd(e.xmm1, src1, src2); - if (e.IsFeatureEnabled(kX64EmitAVX512Ortho | - kX64EmitAVX512DQ)) { - e.vmovdqa32(e.xmm3, src1); - e.vpternlogd(e.xmm3, e.xmm1, src2, 0b00011000); - - const Opmask saturate = e.k1; - e.vpmovd2m(saturate, e.xmm3); - - e.vpsrad(e.xmm2, e.xmm1, 31); - e.vpxord(e.xmm2, e.xmm2, e.GetXmmConstPtr(XMMSignMaskI32)); - - e.vpblendmd(dest | saturate, e.xmm1, e.xmm2); - return; - } - // We can only overflow if the signs of the operands are // opposite. If signs are opposite and result sign isn't the // same as src1's sign, we've overflowed. if ((s32b)((src1 ^