From b08ea843743e8d3404eeafc6ad52ece35dc5bbf1 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 1 Mar 2021 17:20:18 -0500 Subject: [PATCH] target/arm: Implement fp16 for Neon VRSQRTS Convert the Neon VRSQRTS insn to using a gvec helper, and use this to implement the fp16 case. As with VRECPS, we adjust the phrasing of the new implementation slightly so that the fp32 version parallels the fp16 one. Backports 40fde72dda2da8d55b820fa6c5efd85814be2023 --- qemu/aarch64.h | 2 ++ qemu/aarch64eb.h | 2 ++ qemu/arm.h | 2 ++ qemu/armeb.h | 2 ++ qemu/header_gen.py | 2 ++ qemu/m68k.h | 2 ++ qemu/mips.h | 2 ++ qemu/mips64.h | 2 ++ qemu/mips64el.h | 2 ++ qemu/mipsel.h | 2 ++ qemu/powerpc.h | 2 ++ qemu/riscv32.h | 2 ++ qemu/riscv64.h | 2 ++ qemu/sparc.h | 2 ++ qemu/sparc64.h | 2 ++ qemu/target/arm/helper.h | 4 +++- qemu/target/arm/translate-neon.inc.c | 21 +------------------ qemu/target/arm/vec_helper.c | 30 ++++++++++++++++++++++++++++ qemu/target/arm/vfp_helper.c | 15 -------------- qemu/x86_64.h | 2 ++ 20 files changed, 66 insertions(+), 36 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 31ef43b7..1e09f386 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_aarch64 #define helper_gvec_rotr32v helper_gvec_rotr32v_aarch64 #define helper_gvec_rotr64v helper_gvec_rotr64v_aarch64 +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_aarch64 +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_aarch64 #define helper_gvec_sar8i helper_gvec_sar8i_aarch64 #define helper_gvec_sar8v helper_gvec_sar8v_aarch64 #define helper_gvec_sar16i helper_gvec_sar16i_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 0f7e6ece..374c6f1e 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_aarch64eb #define helper_gvec_rotr32v helper_gvec_rotr32v_aarch64eb #define helper_gvec_rotr64v helper_gvec_rotr64v_aarch64eb +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_aarch64eb +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_aarch64eb #define helper_gvec_sar8i helper_gvec_sar8i_aarch64eb #define helper_gvec_sar8v helper_gvec_sar8v_aarch64eb #define helper_gvec_sar16i helper_gvec_sar16i_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index bcc57e9b..6fae7a9f 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_arm #define helper_gvec_rotr32v helper_gvec_rotr32v_arm #define helper_gvec_rotr64v helper_gvec_rotr64v_arm +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_arm +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_arm #define helper_gvec_sar8i helper_gvec_sar8i_arm #define helper_gvec_sar8v helper_gvec_sar8v_arm #define helper_gvec_sar16i helper_gvec_sar16i_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index e4a5754d..08dd3c3b 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_armeb #define helper_gvec_rotr32v helper_gvec_rotr32v_armeb #define helper_gvec_rotr64v helper_gvec_rotr64v_armeb +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_armeb +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_armeb #define helper_gvec_sar8i helper_gvec_sar8i_armeb #define helper_gvec_sar8v helper_gvec_sar8v_armeb #define helper_gvec_sar16i helper_gvec_sar16i_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index d1110965..13ff98bd 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -1399,6 +1399,8 @@ symbols = ( 'helper_gvec_rotr16v', 'helper_gvec_rotr32v', 'helper_gvec_rotr64v', + 'helper_gvec_rsqrts_nf_h', + 'helper_gvec_rsqrts_nf_s', 'helper_gvec_sar8i', 'helper_gvec_sar8v', 'helper_gvec_sar16i', diff --git a/qemu/m68k.h b/qemu/m68k.h index c44a708c..82dbf472 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_m68k #define helper_gvec_rotr32v helper_gvec_rotr32v_m68k #define helper_gvec_rotr64v helper_gvec_rotr64v_m68k +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_m68k +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_m68k #define helper_gvec_sar8i helper_gvec_sar8i_m68k #define helper_gvec_sar8v helper_gvec_sar8v_m68k #define helper_gvec_sar16i helper_gvec_sar16i_m68k diff --git a/qemu/mips.h b/qemu/mips.h index 3926bf7d..5e87413a 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_mips #define helper_gvec_rotr32v helper_gvec_rotr32v_mips #define helper_gvec_rotr64v helper_gvec_rotr64v_mips +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mips +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mips #define helper_gvec_sar8i helper_gvec_sar8i_mips #define helper_gvec_sar8v helper_gvec_sar8v_mips #define helper_gvec_sar16i helper_gvec_sar16i_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index bc281af2..06e76bfc 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_mips64 #define helper_gvec_rotr32v helper_gvec_rotr32v_mips64 #define helper_gvec_rotr64v helper_gvec_rotr64v_mips64 +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mips64 +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mips64 #define helper_gvec_sar8i helper_gvec_sar8i_mips64 #define helper_gvec_sar8v helper_gvec_sar8v_mips64 #define helper_gvec_sar16i helper_gvec_sar16i_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 733d6012..f6846a37 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_mips64el #define helper_gvec_rotr32v helper_gvec_rotr32v_mips64el #define helper_gvec_rotr64v helper_gvec_rotr64v_mips64el +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mips64el +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mips64el #define helper_gvec_sar8i helper_gvec_sar8i_mips64el #define helper_gvec_sar8v helper_gvec_sar8v_mips64el #define helper_gvec_sar16i helper_gvec_sar16i_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index cbdb0473..914691cb 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_mipsel #define helper_gvec_rotr32v helper_gvec_rotr32v_mipsel #define helper_gvec_rotr64v helper_gvec_rotr64v_mipsel +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mipsel +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mipsel #define helper_gvec_sar8i helper_gvec_sar8i_mipsel #define helper_gvec_sar8v helper_gvec_sar8v_mipsel #define helper_gvec_sar16i helper_gvec_sar16i_mipsel diff --git a/qemu/powerpc.h b/qemu/powerpc.h index e169008b..bde8cf29 100644 --- a/qemu/powerpc.h +++ b/qemu/powerpc.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_powerpc #define helper_gvec_rotr32v helper_gvec_rotr32v_powerpc #define helper_gvec_rotr64v helper_gvec_rotr64v_powerpc +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_powerpc +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_powerpc #define helper_gvec_sar8i helper_gvec_sar8i_powerpc #define helper_gvec_sar8v helper_gvec_sar8v_powerpc #define helper_gvec_sar16i helper_gvec_sar16i_powerpc diff --git a/qemu/riscv32.h b/qemu/riscv32.h index 20acba33..881e6b58 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_riscv32 #define helper_gvec_rotr32v helper_gvec_rotr32v_riscv32 #define helper_gvec_rotr64v helper_gvec_rotr64v_riscv32 +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_riscv32 +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_riscv32 #define helper_gvec_sar8i helper_gvec_sar8i_riscv32 #define helper_gvec_sar8v helper_gvec_sar8v_riscv32 #define helper_gvec_sar16i helper_gvec_sar16i_riscv32 diff --git a/qemu/riscv64.h b/qemu/riscv64.h index e9bf5ffd..6ef43bb6 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_riscv64 #define helper_gvec_rotr32v helper_gvec_rotr32v_riscv64 #define helper_gvec_rotr64v helper_gvec_rotr64v_riscv64 +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_riscv64 +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_riscv64 #define helper_gvec_sar8i helper_gvec_sar8i_riscv64 #define helper_gvec_sar8v helper_gvec_sar8v_riscv64 #define helper_gvec_sar16i helper_gvec_sar16i_riscv64 diff --git a/qemu/sparc.h b/qemu/sparc.h index 0d9ec264..002c203c 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_sparc #define helper_gvec_rotr32v helper_gvec_rotr32v_sparc #define helper_gvec_rotr64v helper_gvec_rotr64v_sparc +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_sparc +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_sparc #define helper_gvec_sar8i helper_gvec_sar8i_sparc #define helper_gvec_sar8v helper_gvec_sar8v_sparc #define helper_gvec_sar16i helper_gvec_sar16i_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index 609cf27d..6f0f62e2 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_sparc64 #define helper_gvec_rotr32v helper_gvec_rotr32v_sparc64 #define helper_gvec_rotr64v helper_gvec_rotr64v_sparc64 +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_sparc64 +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_sparc64 #define helper_gvec_sar8i helper_gvec_sar8i_sparc64 #define helper_gvec_sar8v helper_gvec_sar8v_sparc64 #define helper_gvec_sar16i helper_gvec_sar16i_sparc64 diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index 88032cbe..6b099a00 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -223,7 +223,6 @@ DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr) DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr) DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr) -DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32) DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr) @@ -674,6 +673,9 @@ DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i3 DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c index 22e3289e..02cd322d 100644 --- a/qemu/target/arm/translate-neon.inc.c +++ b/qemu/target/arm/translate-neon.inc.c @@ -1093,6 +1093,7 @@ DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h) DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) +DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h) WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s) WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h) @@ -1131,26 +1132,6 @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) return do_3same(s, a, gen_VMINNM_fp32_3s); } -WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32) - -static void gen_VRSQRTS_fp_3s(TCGContext *s, unsigned vece, uint32_t rd_ofs, - uint32_t rn_ofs, uint32_t rm_ofs, - uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp }; - tcg_gen_gvec_3(s, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops); -} - -static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a) -{ - if (a->size != 0) { - /* TODO fp16 support */ - return false; - } - - return do_3same(s, a, gen_VRSQRTS_fp_3s); -} - static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn) { /* FP operations handled pairwise 32 bits at a time */ diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c index def56e87..11643a76 100644 --- a/qemu/target/arm/vec_helper.c +++ b/qemu/target/arm/vec_helper.c @@ -826,6 +826,33 @@ static float32 float32_recps_nf(float32 op1, float32 op2, float_status *stat) return float32_sub(float32_two, float32_mul(op1, op2, stat), stat); } +/* Reciprocal square-root step. AArch32 non-fused semantics. */ +static float16 float16_rsqrts_nf(float16 op1, float16 op2, float_status *stat) +{ + op1 = float16_squash_input_denormal(op1, stat); + op2 = float16_squash_input_denormal(op2, stat); + + if ((float16_is_infinity(op1) && float16_is_zero(op2)) || + (float16_is_infinity(op2) && float16_is_zero(op1))) { + return float16_one_point_five; + } + op1 = float16_sub(float16_three, float16_mul(op1, op2, stat), stat); + return float16_div(op1, float16_two, stat); +} + +static float32 float32_rsqrts_nf(float32 op1, float32 op2, float_status *stat) +{ + op1 = float32_squash_input_denormal(op1, stat); + op2 = float32_squash_input_denormal(op2, stat); + + if ((float32_is_infinity(op1) && float32_is_zero(op2)) || + (float32_is_infinity(op2) && float32_is_zero(op1))) { + return float32_one_point_five; + } + op1 = float32_sub(float32_three, float32_mul(op1, op2, stat), stat); + return float32_div(op1, float32_two, stat); +} + #define DO_3OP(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ @@ -886,6 +913,9 @@ DO_3OP(gvec_fminnum_s, float32_minnum, float32) DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16) DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32) +DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16) +DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32) + #ifdef TARGET_AARCH64 DO_3OP(gvec_recps_h, helper_recpsf_f16, float16) diff --git a/qemu/target/arm/vfp_helper.c b/qemu/target/arm/vfp_helper.c index e4f7326b..91c4a5e5 100644 --- a/qemu/target/arm/vfp_helper.c +++ b/qemu/target/arm/vfp_helper.c @@ -538,21 +538,6 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode) return r; } -float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b) -{ - float_status *s = &env->vfp.standard_fp_status; - float32 product; - if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) || - (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) { - if (!(float32_is_zero(a) || float32_is_zero(b))) { - float_raise(float_flag_input_denormal, s); - } - return float32_one_point_five; - } - product = float32_mul(a, b, s); - return float32_div(float32_sub(float32_three, product, s), float32_two, s); -} - /* NEON helpers. */ /* diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 45aeaf91..7e5af7a6 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -1393,6 +1393,8 @@ #define helper_gvec_rotr16v helper_gvec_rotr16v_x86_64 #define helper_gvec_rotr32v helper_gvec_rotr32v_x86_64 #define helper_gvec_rotr64v helper_gvec_rotr64v_x86_64 +#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_x86_64 +#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_x86_64 #define helper_gvec_sar8i helper_gvec_sar8i_x86_64 #define helper_gvec_sar8v helper_gvec_sar8v_x86_64 #define helper_gvec_sar16i helper_gvec_sar16i_x86_64