From c5a2a50c0607e9677d20c6e02a302f89b4b0fdc8 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Sat, 10 Feb 2018 20:29:09 -0500
Subject: [PATCH] tcg: Mask TCGMemOp appropriately for indexing

The addition of MO_AMASK means that places that used inverted masks
need to be changed to use positive masks, and places that failed to
mask the intended bits need updating.

Backports commit 2b7ec66f025263a5331f37d5ad78a625496fd7bd from qemu
---
 qemu/tcg/aarch64/tcg-target.c |  4 ++--
 qemu/tcg/arm/tcg-target.c     |  6 +++---
 qemu/tcg/i386/tcg-target.c    |  4 ++--
 qemu/tcg/mips/tcg-target.c    |  4 ++--
 qemu/tcg/ppc/tcg-target.c     |  8 ++++----
 qemu/tcg/s390/tcg-target.c    |  4 ++--
 qemu/tcg/sparc/tcg-target.c   | 30 +++++++++++++++---------------
 7 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/qemu/tcg/aarch64/tcg-target.c b/qemu/tcg/aarch64/tcg-target.c
index 2053fdb5..d303a4cb 100644
--- a/qemu/tcg/aarch64/tcg-target.c
+++ b/qemu/tcg/aarch64/tcg-target.c
@@ -1004,7 +1004,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
     tcg_out_adr(s, TCG_REG_X3, lb->raddr);
-    tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
+    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
     if (opc & MO_SIGN) {
         tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
     } else {
@@ -1027,7 +1027,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
     tcg_out_adr(s, TCG_REG_X4, lb->raddr);
-    tcg_out_call(s, qemu_st_helpers[opc]);
+    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
     tcg_out_goto(s, lb->raddr);
 }
 
diff --git a/qemu/tcg/arm/tcg-target.c b/qemu/tcg/arm/tcg-target.c
index ee2a4c4c..6ba54783 100644
--- a/qemu/tcg/arm/tcg-target.c
+++ b/qemu/tcg/arm/tcg-target.c
@@ -1260,9 +1260,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
        icache usage.  For pre-armv6, use the signed helpers since we do
        not have a single insn sign-extend.  */
     if (use_armv6_instructions) {
-        func = qemu_ld_helpers[opc & ~MO_SIGN];
+        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
     } else {
-        func = qemu_ld_helpers[opc];
+        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
         if (opc & MO_SIGN) {
             opc = MO_UL;
         }
@@ -1337,7 +1337,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
 
     /* Tail-call to the helper, which will return to the fast path.  */
-    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]);
+    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 }
 #endif /* SOFTMMU */
 
diff --git a/qemu/tcg/i386/tcg-target.c b/qemu/tcg/i386/tcg-target.c
index bb75ac0a..e2d12584 100644
--- a/qemu/tcg/i386/tcg-target.c
+++ b/qemu/tcg/i386/tcg-target.c
@@ -1412,7 +1412,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
                      (uintptr_t)l->raddr);
     }
 
-    tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);   // qq
+    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 
     data_reg = l->datalo_reg;
     switch (opc & MO_SSIZE) {
@@ -1518,7 +1518,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 
     /* "Tail call" to the helper, with the return address back inline.  */
     tcg_out_push(s, retaddr);
-    tcg_out_jmp(s, qemu_st_helpers[opc]);
+    tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 }
 #elif defined(__x86_64__) && defined(__linux__)
 # include <asm/prctl.h>
diff --git a/qemu/tcg/mips/tcg-target.c b/qemu/tcg/mips/tcg-target.c
index 0646371d..d953b594 100644
--- a/qemu/tcg/mips/tcg-target.c
+++ b/qemu/tcg/mips/tcg-target.c
@@ -1031,7 +1031,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     }
     i = tcg_out_call_iarg_imm(s, i, oi);
     i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr);
-    tcg_out_call_int(s, qemu_ld_helpers[opc], false);
+    tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
     /* delay slot */
     tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
 
@@ -1094,7 +1094,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
        computation to take place in the return address register.  */
     tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr);
     i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA);
-    tcg_out_call_int(s, qemu_st_helpers[opc], true);
+    tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true);
     /* delay slot */
     tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
 }
diff --git a/qemu/tcg/ppc/tcg-target.c b/qemu/tcg/ppc/tcg-target.c
index e5ef3779..1e730233 100644
--- a/qemu/tcg/ppc/tcg-target.c
+++ b/qemu/tcg/ppc/tcg-target.c
@@ -1495,7 +1495,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
     tcg_out32(s, MFSPR | RT(arg) | LR);
 
-    tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
+    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 
     lo = lb->datalo_reg;
     hi = lb->datahi_reg;
@@ -1565,7 +1565,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
     tcg_out32(s, MFSPR | RT(arg) | LR);
 
-    tcg_out_call(s, qemu_st_helpers[opc]);
+    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 
     tcg_out_b(s, 0, lb->raddr);
 }
@@ -1624,7 +1624,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
             tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
         }
     } else {
-        uint32_t insn = qemu_ldx_opc[opc];
+        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
         if (!HAVE_ISA_2_06 && insn == LDBRX) {
             tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
             tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
@@ -1696,7 +1696,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
             tcg_out32(s, STW | TAI(datalo, addrlo, 4));
         }
     } else {
-        uint32_t insn = qemu_stx_opc[opc];
+        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
         if (!HAVE_ISA_2_06 && insn == STDBRX) {
             tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
             tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
diff --git a/qemu/tcg/s390/tcg-target.c b/qemu/tcg/s390/tcg-target.c
index 815617f5..a6a39027 100644
--- a/qemu/tcg/s390/tcg-target.c
+++ b/qemu/tcg/s390/tcg-target.c
@@ -1573,7 +1573,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     }
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
     tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
-    tcg_out_call(s, qemu_ld_helpers[opc]);
+    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
     tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
 
     tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
@@ -1610,7 +1610,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     }
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
     tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
-    tcg_out_call(s, qemu_st_helpers[opc]);
+    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 
     tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
 }
diff --git a/qemu/tcg/sparc/tcg-target.c b/qemu/tcg/sparc/tcg-target.c
index 51a80e9f..463e6b05 100644
--- a/qemu/tcg/sparc/tcg-target.c
+++ b/qemu/tcg/sparc/tcg-target.c
@@ -1075,12 +1075,11 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
     TCGMemOp memop = get_memop(oi);
 #ifdef CONFIG_SOFTMMU
     unsigned memi = get_mmuidx(oi);
-    TCGMemOp s_bits = memop & MO_SIZE;
     TCGReg addrz, param;
     tcg_insn_unit *func;
     tcg_insn_unit *label_ptr;
 
-    addrz = tcg_out_tlb_load(s, addr, memi, s_bits,
+    addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
                              offsetof(CPUTLBEntry, addr_read));
 
     /* The fast path is exactly one insn.  Thus we can perform the
@@ -1092,7 +1091,8 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
     tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
                   | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
     /* delay slot */
-    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_ld_opc[memop]);
+    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
+                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
 
     /* TLB Miss.  */
 
@@ -1105,10 +1105,10 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
 
     /* We use the helpers to extend SB and SW data, leaving the case
        of SL needing explicit extending below.  */
-    if ((memop & ~MO_BSWAP) == MO_SL) {
-        func = qemu_ld_trampoline[memop & ~MO_SIGN];
+    if ((memop & MO_SSIZE) == MO_SL) {
+        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
     } else {
-        func = qemu_ld_trampoline[memop];
+        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
     }
     assert(func != NULL);
     tcg_out_call_nodelay(s, func);
@@ -1119,13 +1119,13 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
        Which complicates things for sparcv8plus.  */
     if (SPARC64) {
         /* We let the helper sign-extend SB and SW, but leave SL for here.  */
-        if (is_64 && (memop & ~MO_BSWAP) == MO_SL) {
+        if (is_64 && (memop & MO_SSIZE) == MO_SL) {
             tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
         } else {
             tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
         }
     } else {
-        if (s_bits == MO_64) {
+        if ((memop & MO_SIZE) == MO_64) {
             tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
             tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
             tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
@@ -1147,7 +1147,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
     }
     tcg_out_ldst_rr(s, data, addr,
                     (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
-                    qemu_ld_opc[memop]);
+                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
 #endif /* CONFIG_SOFTMMU */
 }
 
@@ -1157,12 +1157,11 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
     TCGMemOp memop = get_memop(oi);
 #ifdef CONFIG_SOFTMMU
     unsigned memi = get_mmuidx(oi);
-    TCGMemOp s_bits = memop & MO_SIZE;
     TCGReg addrz, param;
     tcg_insn_unit *func;
     tcg_insn_unit *label_ptr;
 
-    addrz = tcg_out_tlb_load(s, addr, memi, s_bits,
+    addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
                              offsetof(CPUTLBEntry, addr_write));
 
     /* The fast path is exactly one insn.  Thus we can perform the entire
@@ -1172,7 +1171,8 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
     tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
                   | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
     /* delay slot */
-    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_st_opc[memop]);
+    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
+                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
 
     /* TLB Miss.  */
 
@@ -1182,13 +1182,13 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
         param++;
     }
     tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
-    if (!SPARC64 && s_bits == MO_64) {
+    if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
         /* Skip the high-part; we'll perform the extract in the trampoline.  */
         param++;
     }
     tcg_out_mov(s, TCG_TYPE_REG, param++, data);
 
-    func = qemu_st_trampoline[memop];
+    func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
     assert(func != NULL);
     tcg_out_call_nodelay(s, func);
     /* delay slot */
@@ -1202,7 +1202,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
     }
     tcg_out_ldst_rr(s, data, addr,
                     (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
-                    qemu_st_opc[memop]);
+                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
 #endif /* CONFIG_SOFTMMU */
 }