diff --git a/qemu/aarch64.h b/qemu/aarch64.h index ea952201..c89e5cf9 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3284,6 +3284,9 @@ #define arm_reset_cpu arm_reset_cpu_aarch64 #define arm_set_cpu_off arm_set_cpu_off_aarch64 #define arm_set_cpu_on arm_set_cpu_on_aarch64 +#define bif_op bif_op_aarch64 +#define bit_op bit_op_aarch64 +#define bsl_op bsl_op_aarch64 #define cpu_reg cpu_reg_aarch64 #define cpu_reg_sp cpu_reg_sp_aarch64 #define disas_sve disas_sve_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 929224e0..c36870a3 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3284,6 +3284,9 @@ #define arm_reset_cpu arm_reset_cpu_aarch64eb #define arm_set_cpu_off arm_set_cpu_off_aarch64eb #define arm_set_cpu_on arm_set_cpu_on_aarch64eb +#define bif_op bif_op_aarch64eb +#define bit_op bit_op_aarch64eb +#define bsl_op bsl_op_aarch64eb #define cpu_reg cpu_reg_aarch64eb #define cpu_reg_sp cpu_reg_sp_aarch64eb #define disas_sve disas_sve_aarch64eb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 48be9fd1..b35cd212 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3309,6 +3309,9 @@ aarch64_symbols = ( 'arm_reset_cpu', 'arm_set_cpu_off', 'arm_set_cpu_on', + 'bif_op', + 'bit_op', + 'bsl_op', 'cpu_reg', 'cpu_reg_sp', 'disas_sve', diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 0cbaff7f..e032bab7 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -10550,70 +10550,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } -static void gen_bsl_i64(TCGContext *s, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(s, rn, rn, rm); - tcg_gen_and_i64(s, rn, rn, rd); - tcg_gen_xor_i64(s, rd, rm, rn); -} - -static void gen_bit_i64(TCGContext *s, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(s, rn, rn, rd); - tcg_gen_and_i64(s, rn, rn, rm); - tcg_gen_xor_i64(s, rd, rd, rn); -} - -static void gen_bif_i64(TCGContext *s, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(s, rn, rn, rd); - tcg_gen_andc_i64(s, rn, rn, rm); - tcg_gen_xor_i64(s, rd, rd, rn); -} - -static void gen_bsl_vec(TCGContext *s, unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(s, vece, rn, rn, rm); - tcg_gen_and_vec(s, vece, rn, rn, rd); - tcg_gen_xor_vec(s, vece, rd, rm, rn); -} - -static void gen_bit_vec(TCGContext *s, unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(s, vece, rn, rn, rd); - tcg_gen_and_vec(s, vece, rn, rn, rm); - tcg_gen_xor_vec(s, vece, rd, rd, rn); -} - -static void gen_bif_vec(TCGContext *s, unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(s, vece, rn, rn, rd); - tcg_gen_andc_vec(s, vece, rn, rn, rm); - tcg_gen_xor_vec(s, vece, rd, rd, rn); -} - /* Logic op (opcode == 3) subgroup of C3.6.16. */ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) { - static const GVecGen3 bsl_op = { - .fni8 = gen_bsl_i64, - .fniv = gen_bsl_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bit_op = { - .fni8 = gen_bit_i64, - .fniv = gen_bit_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bif_op = { - .fni8 = gen_bif_i64, - .fniv = gen_bif_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - int rd = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); int rm = extract32(insn, 16, 5); diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index e818e704..601a13ed 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -5410,15 +5410,6 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) return 0; } -/* Bitwise select. dest = c ? t : f. Clobbers T and F. */ -static void gen_neon_bsl(DisasContext *s, TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - tcg_gen_and_i32(tcg_ctx, t, t, c); - tcg_gen_andc_i32(tcg_ctx, f, f, c); - tcg_gen_or_i32(tcg_ctx, dest, t, f); -} - static inline void gen_neon_narrow(DisasContext *s, int size, TCGv_i32 dest, TCGv_i64 src) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -5876,6 +5867,73 @@ static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, return 1; } +/* + * Expanders for VBitOps_VBIF, VBIT, VBSL. + */ +static void gen_bsl_i64(TCGContext* s, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(s, rn, rn, rm); + tcg_gen_and_i64(s, rn, rn, rd); + tcg_gen_xor_i64(s, rd, rm, rn); +} + +static void gen_bit_i64(TCGContext* s, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(s, rn, rn, rd); + tcg_gen_and_i64(s, rn, rn, rm); + tcg_gen_xor_i64(s, rd, rd, rn); +} + +static void gen_bif_i64(TCGContext *s, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(s, rn, rn, rd); + tcg_gen_andc_i64(s, rn, rn, rm); + tcg_gen_xor_i64(s, rd, rd, rn); +} + +static void gen_bsl_vec(TCGContext* s, unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(s, vece, rn, rn, rm); + tcg_gen_and_vec(s, vece, rn, rn, rd); + tcg_gen_xor_vec(s, vece, rd, rm, rn); +} + +static void gen_bit_vec(TCGContext* s, unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(s, vece, rn, rn, rd); + tcg_gen_and_vec(s, vece, rn, rn, rm); + tcg_gen_xor_vec(s, vece, rd, rd, rn); +} + +static void gen_bif_vec(TCGContext *s, unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(s, vece, rn, rn, rd); + tcg_gen_andc_vec(s, vece, rn, rn, rm); + tcg_gen_xor_vec(s, vece, rd, rd, rn); +} + +const GVecGen3 bsl_op = { + .fni8 = gen_bsl_i64, + .fniv = gen_bsl_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bit_op = { + .fni8 = gen_bit_i64, + .fniv = gen_bit_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bif_op = { + .fni8 = gen_bif_i64, + .fniv = gen_bif_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -5886,13 +5944,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) TCGContext *tcg_ctx = s->uc->tcg_ctx; int op; int q; - int rd, rn, rm; + int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; int size; int shift; int pass; int count; int pairwise; int u; + int vec_size; uint32_t imm, mask; TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; TCGv_ptr ptr1, ptr2, ptr3; @@ -5916,6 +5975,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) VFP_DREG_N(rn, insn); VFP_DREG_M(rm, insn); size = (insn >> 20) & 3; + vec_size = q ? 16 : 8; + rd_ofs = neon_reg_offset(rd, 0); + rn_ofs = neon_reg_offset(rn, 0); + rm_ofs = neon_reg_offset(rm, 0); + if ((insn & (1 << 23)) == 0) { /* Three register same length. */ op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); @@ -6006,8 +6070,51 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) q, rd, rn, rm); } return 1; + + case NEON_3R_LOGIC: /* Logic ops. */ + switch ((u << 2) | size) { + case 0: /* VAND */ + tcg_gen_gvec_and(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 1: /* VBIC */ + tcg_gen_gvec_andc(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 2: + if (rn == rm) { + /* VMOV */ + tcg_gen_gvec_mov(tcg_ctx, 0, rd_ofs, rn_ofs, vec_size, vec_size); + } else { + /* VORR */ + tcg_gen_gvec_or(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + break; + case 3: /* VORN */ + tcg_gen_gvec_orc(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 4: /* VEOR */ + tcg_gen_gvec_xor(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 5: /* VBSL */ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bsl_op); + break; + case 6: /* VBIT */ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bit_op); + break; + case 7: /* VBIF */ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bif_op); + break; + } + return 0; } - if (size == 3 && op != NEON_3R_LOGIC) { + if (size == 3) { /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { neon_load_reg64(s, s->V0, rn + pass); @@ -6165,40 +6272,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); break; - case NEON_3R_LOGIC: /* Logic ops. */ - switch ((u << 2) | size) { - case 0: /* VAND */ - tcg_gen_and_i32(tcg_ctx, tmp, tmp, tmp2); - break; - case 1: /* BIC */ - tcg_gen_andc_i32(tcg_ctx, tmp, tmp, tmp2); - break; - case 2: /* VORR */ - tcg_gen_or_i32(tcg_ctx, tmp, tmp, tmp2); - break; - case 3: /* VORN */ - tcg_gen_orc_i32(tcg_ctx, tmp, tmp, tmp2); - break; - case 4: /* VEOR */ - tcg_gen_xor_i32(tcg_ctx, tmp, tmp, tmp2); - break; - case 5: /* VBSL */ - tmp3 = neon_load_reg(s, rd, pass); - gen_neon_bsl(s, tmp, tmp, tmp2, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp3); - break; - case 6: /* VBIT */ - tmp3 = neon_load_reg(s, rd, pass); - gen_neon_bsl(s, tmp, tmp, tmp3, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp3); - break; - case 7: /* VBIF */ - tmp3 = neon_load_reg(s, rd, pass); - gen_neon_bsl(s, tmp, tmp3, tmp, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp3); - break; - } - break; case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; @@ -6891,7 +6964,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } } tcg_gen_movi_i64(tcg_ctx, t64, val); - neon_store_reg64(t64, rd + pass); + neon_store_reg64(s, t64, rd + pass); } tcg_temp_free_i64(tcg_ctx, t64); } else { diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h index f0ba2b12..bfa380a9 100644 --- a/qemu/target/arm/translate.h +++ b/qemu/target/arm/translate.h @@ -191,6 +191,11 @@ static inline TCGv_i32 get_ahp_flag(DisasContext *s) return ret; } +/* Vector operations shared between ARM and AArch64. */ +extern const GVecGen3 bsl_op; +extern const GVecGen3 bit_op; +extern const GVecGen3 bif_op; + /* * Forward to the isar_feature_* tests given a DisasContext pointer. */