From f4c65739f3591977bf6bf0380c093141ef5ea739 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Thu, 1 Mar 2018 09:09:10 -0500
Subject: [PATCH] target-arm/translate-a64: fix gen_load_exclusive

While testing rth's latest TCG patches with risu I found ldaxp was
broken. Investigating further I found it was broken by 1dd089d0 when
the cmpxchg atomic work was merged. As part of that change the code
attempted to be clever by doing a single 64 bit load and then shuffle
the data around to set the two 32 bit registers.

As I couldn't quite follow the endian magic I've simply partially
reverted the change to the original code gen_load_exclusive code. This
doesn't affect the cmpxchg functionality as that is all done on in
gen_store_exclusive part which is untouched.

I've also restored the comment that was removed (with a slight tweak
to mention cmpxchg).

Backports commit 5460da501a57cd72eda6fec736d76539122e2f99 from qemu
---
 qemu/target-arm/translate-a64.c | 41 +++++++++++++++------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/qemu/target-arm/translate-a64.c b/qemu/target-arm/translate-a64.c
index 183ed5a6..dfbd111e 100644
--- a/qemu/target-arm/translate-a64.c
+++ b/qemu/target-arm/translate-a64.c
@@ -1884,43 +1884,38 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
     }
 }
 
+/*
+ * Load/Store exclusive instructions are implemented by remembering
+ * the value/address loaded, and seeing if these are the same
+ * when the store is performed. This is not actually the architecturally
+ * mandated semantics, but it works for typical guest code sequences
+ * and avoids having to monitor regular stores.
+ *
+ * The store exclusive uses the atomic cmpxchg primitives to avoid
+ * races in multi-threaded linux-user and when MTTCG softmmu is
+ * enabled.
+ */
 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
                                TCGv_i64 addr, int size, bool is_pair)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
-    TCGMemOp be = s->be_data;
+    TCGMemOp memop = s->be_data + size;
 
     g_assert(size <= 3);
+    tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), memop);
 
     if (is_pair) {
+        TCGv_i64 addr2 = tcg_temp_new_i64(tcg_ctx);
         TCGv_i64 hitmp = tcg_temp_new_i64(tcg_ctx);
 
-        if (size == 3) {
-            TCGv_i64 addr2 = tcg_temp_new_i64(tcg_ctx);
-
-            tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s),
-                                MO_64 | MO_ALIGN_16 | be);
-            tcg_gen_addi_i64(tcg_ctx, addr2, addr, 8);
-            tcg_gen_qemu_ld_i64(s->uc, hitmp, addr2, get_mem_index(s),
-                                MO_64 | MO_ALIGN | be);
-            tcg_temp_free_i64(tcg_ctx, addr2);
-        } else {
-            g_assert(size == 2);
-            tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s),
-                                MO_64 | MO_ALIGN | be);
-            if (be == MO_LE) {
-                tcg_gen_extr32_i64(tcg_ctx, tmp, hitmp, tmp);
-            } else {
-                tcg_gen_extr32_i64(tcg_ctx, hitmp, tmp, tmp);
-            }
-        }
-
+        g_assert(size >= 2);
+        tcg_gen_addi_i64(tcg_ctx, addr2, addr, 1 << size);
+        tcg_gen_qemu_ld_i64(s->uc, hitmp, addr2, get_mem_index(s), memop);
+        tcg_temp_free_i64(tcg_ctx, addr2);
         tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, hitmp);
         tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2), hitmp);
         tcg_temp_free_i64(tcg_ctx, hitmp);
-    } else {
-        tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), size | MO_ALIGN | be);
     }
 
     tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, tmp);