diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index a52e7cb0..23a8f440 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -1682,6 +1682,29 @@ static TCGv_i32 neon_load_reg(DisasContext *s, int reg, int pass) return tmp; } +static void neon_load_element64(DisasContext *s, TCGv_i64 var, int reg, int ele, TCGMemOp mop) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld32u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_Q: + tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static void neon_store_reg(DisasContext *s, int reg, int pass, TCGv_i32 var) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -1689,6 +1712,29 @@ static void neon_store_reg(DisasContext *s, int reg, int pass, TCGv_i32 var) tcg_temp_free_i32(tcg_ctx, var); } +static void neon_store_element64(DisasContext *s, int reg, int ele, TCGMemOp size, TCGv_i64 var) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_32: + tcg_gen_st32_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_64: + tcg_gen_st_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static inline void neon_load_reg64(DisasContext *s, TCGv_i64 var, int reg) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -5030,16 +5076,16 @@ static struct { int interleave; int spacing; } const neon_ls_element_type[11] = { - {4, 4, 1}, - {4, 4, 2}, + {1, 4, 1}, + {1, 4, 2}, {4, 1, 1}, - {4, 2, 1}, - {3, 3, 1}, - {3, 3, 2}, + {2, 2, 2}, + {1, 3, 1}, + {1, 3, 2}, {3, 1, 1}, {1, 1, 1}, - {2, 2, 1}, - {2, 2, 2}, + {1, 2, 1}, + {1, 2, 2}, {2, 1, 1} }; @@ -5061,6 +5107,8 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) int shift; int n; int vec_size; + int mmu_idx; + TCGMemOp endian; TCGv_i32 addr; TCGv_i32 tmp; TCGv_i32 tmp2; @@ -5082,6 +5130,8 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) rn = (insn >> 16) & 0xf; rm = insn & 0xf; load = (insn & (1 << 21)) != 0; + endian = s->be_data; + mmu_idx = get_mem_index(s); if ((insn & (1 << 23)) == 0) { /* Load store all elements. */ op = (insn >> 8) & 0xf; @@ -5106,104 +5156,34 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) nregs = neon_ls_element_type[op].nregs; interleave = neon_ls_element_type[op].interleave; spacing = neon_ls_element_type[op].spacing; - if (size == 3 && (interleave | spacing) != 1) + if (size == 3 && (interleave | spacing) != 1) { return 1; + } + tmp64 = tcg_temp_new_i64(tcg_ctx); addr = tcg_temp_new_i32(tcg_ctx); + tmp2 = tcg_const_i32(tcg_ctx, 1 << size); load_reg_var(s, addr, rn); - stride = (1 << size) * interleave; for (reg = 0; reg < nregs; reg++) { - if (interleave > 2 || (interleave == 2 && nregs == 2)) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(tcg_ctx, addr, addr, (1 << size) * reg); - } else if (interleave == 2 && nregs == 4 && reg == 2) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size); - } - if (size == 3) { - tmp64 = tcg_temp_new_i64(tcg_ctx); - if (load) { - gen_aa32_ld64(s, tmp64, addr, get_mem_index(s)); - neon_store_reg64(s, tmp64, rd); - } else { - neon_load_reg64(s, tmp64, rd); - gen_aa32_st64(s, tmp64, addr, get_mem_index(s)); - } - tcg_temp_free_i64(tcg_ctx, tmp64); - tcg_gen_addi_i32(tcg_ctx, addr, addr, stride); - } else { - for (pass = 0; pass < 2; pass++) { - if (size == 2) { - if (load) { - tmp = tcg_temp_new_i32(tcg_ctx); - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - neon_store_reg(s, rd, pass, tmp); - } else { - tmp = neon_load_reg(s, rd, pass); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tcg_ctx, tmp); - } - tcg_gen_addi_i32(tcg_ctx, addr, addr, stride); - } else if (size == 1) { - if (load) { - tmp = tcg_temp_new_i32(tcg_ctx); - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(tcg_ctx, addr, addr, stride); - tmp2 = tcg_temp_new_i32(tcg_ctx); - gen_aa32_ld16u(s, tmp2, addr, get_mem_index(s)); - tcg_gen_addi_i32(tcg_ctx, addr, addr, stride); - tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16); - tcg_gen_or_i32(tcg_ctx, tmp, tmp, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp2); - neon_store_reg(s, rd, pass, tmp); - } else { - tmp = neon_load_reg(s, rd, pass); - tmp2 = tcg_temp_new_i32(tcg_ctx); - tcg_gen_shri_i32(tcg_ctx, tmp2, tmp, 16); - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_gen_addi_i32(tcg_ctx, addr, addr, stride); - gen_aa32_st16(s, tmp2, addr, get_mem_index(s)); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_gen_addi_i32(tcg_ctx, addr, addr, stride); - } - } else /* size == 0 */ { - if (load) { - tmp2 = NULL; - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(tcg_ctx); - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(tcg_ctx, addr, addr, stride); - if (n == 0) { - tmp2 = tmp; - } else { - tcg_gen_shli_i32(tcg_ctx, tmp, tmp, n * 8); - tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); - } - } - neon_store_reg(s, rd, pass, tmp2); - } else { - tmp2 = neon_load_reg(s, rd, pass); - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(tcg_ctx); - if (n == 0) { - tcg_gen_mov_i32(tcg_ctx, tmp, tmp2); - } else { - tcg_gen_shri_i32(tcg_ctx, tmp, tmp2, n * 8); - } - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_gen_addi_i32(tcg_ctx, addr, addr, stride); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - } + for (n = 0; n < 8 >> size; n++) { + int xs; + for (xs = 0; xs < interleave; xs++) { + int tt = rd + reg + spacing * xs; + + if (load) { + gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + neon_store_element64(s, tt, n, size, tmp64); + } else { + neon_load_element64(s, tmp64, tt, n, size); + gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); } + tcg_gen_add_i32(tcg_ctx, addr, addr, tmp2); } } - rd += spacing; } tcg_temp_free_i32(tcg_ctx, addr); - stride = nregs * 8; + tcg_temp_free_i32(tcg_ctx, tmp2); + tcg_temp_free_i64(tcg_ctx, tmp64); + stride = nregs * interleave * 8; } else { size = (insn >> 10) & 3; if (size == 3) {