diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 5d612e2d..a79f7759 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -7644,21 +7644,59 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) gen_smul_dual(s, t1, t2); if (sub) { - /* This subtraction cannot overflow. */ + /* + * This subtraction cannot overflow, so we can do a simple + * 32-bit subtraction and then a possible 32-bit saturating + * addition of Ra. + */ tcg_gen_sub_i32(tcg_ctx, t1, t1, t2); + tcg_temp_free_i32(tcg_ctx, t2); + + if (a->ra != 15) { + t2 = load_reg(s, a->ra); + gen_helper_add_setq(tcg_ctx, t1, tcg_ctx->cpu_env, t1, t2); + tcg_temp_free_i32(tcg_ctx, t2); + } + } else if (a->ra == 15) { + /* Single saturation-checking addition */ + gen_helper_add_setq(tcg_ctx, t1, tcg_ctx->cpu_env, t1, t2); + tcg_temp_free_i32(tcg_ctx, t2); } else { /* - * This addition cannot overflow 32 bits; however it may - * overflow considered as a signed operation, in which case - * we must set the Q flag. + * We need to add the products and Ra together and then + * determine whether the final result overflowed. Doing + * this as two separate add-and-check-overflow steps incorrectly + * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1. + * Do all the arithmetic at 64-bits and then check for overflow. */ - gen_helper_add_setq(tcg_ctx, t1, tcg_ctx->cpu_env, t1, t2); - } - tcg_temp_free_i32(tcg_ctx, t2); + TCGv_i64 p64, q64; + TCGv_i32 t3, qf, one; - if (a->ra != 15) { - t2 = load_reg(s, a->ra); - gen_helper_add_setq(tcg_ctx, t1, tcg_ctx->cpu_env, t1, t2); + p64 = tcg_temp_new_i64(tcg_ctx); + q64 = tcg_temp_new_i64(tcg_ctx); + tcg_gen_ext_i32_i64(tcg_ctx, p64, t1); + tcg_gen_ext_i32_i64(tcg_ctx, q64, t2); + tcg_gen_add_i64(tcg_ctx, p64, p64, q64); + load_reg_var(s, t2, a->ra); + tcg_gen_ext_i32_i64(tcg_ctx, q64, t2); + tcg_gen_add_i64(tcg_ctx, p64, p64, q64); + tcg_temp_free_i64(tcg_ctx, q64); + + tcg_gen_extr_i64_i32(tcg_ctx, t1, t2, p64); + tcg_temp_free_i64(tcg_ctx, p64); + /* + * t1 is the low half of the result which goes into Rd. + * We have overflow and must set Q if the high half (t2) + * is different from the sign-extension of t1. + */ + t3 = tcg_temp_new_i32(tcg_ctx); + tcg_gen_sari_i32(tcg_ctx, t3, t1, 31); + qf = load_cpu_field(s, QF); + one = tcg_const_i32(tcg_ctx, 1); + tcg_gen_movcond_i32(tcg_ctx, TCG_COND_NE, qf, t2, t3, one, qf); + store_cpu_field(s, qf, QF); + tcg_temp_free_i32(tcg_ctx, one); + tcg_temp_free_i32(tcg_ctx, t3); tcg_temp_free_i32(tcg_ctx, t2); } store_reg(s, a->rd, t1);