diff --git a/qemu/target/arm/crypto_helper.c b/qemu/target/arm/crypto_helper.c index 0a5bb276..35c4d0a2 100644 --- a/qemu/target/arm/crypto_helper.c +++ b/qemu/target/arm/crypto_helper.c @@ -14,7 +14,9 @@ #include "cpu.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" #include "crypto/aes.h" +#include "vec_internal.h" union CRYPTO_STATE { uint8_t bytes[16]; @@ -30,23 +32,15 @@ union CRYPTO_STATE { #define CR_ST_WORD(state, i) (state.words[i]) #endif -void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) +static void do_crypto_aese(uint64_t *rd, uint64_t *rn, + uint64_t *rm, bool decrypt) { static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; - uint64_t *rd = vd; - uint64_t *rm = vm; - union CRYPTO_STATE rk; - union CRYPTO_STATE st; + union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; + union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; int i; - rk.l[0] = rm[0]; - rk.l[1] = rm[1]; - st.l[0] = rd[0]; - st.l[1] = rd[1]; - - assert(decrypt < 2); - /* xor state vector with round key */ rk.l[0] ^= st.l[0]; rk.l[1] ^= st.l[1]; @@ -60,7 +54,18 @@ void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } -void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) +void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aese(vd + i, vn + i, vm + i, decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) { static uint32_t const mc[][256] = { { /* MixColumns lookup table */ @@ -196,14 +201,8 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, } }; - uint64_t *rd = vd; - uint64_t *rm = vm; - union CRYPTO_STATE st; + union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; int i; - st.l[0] = rm[0]; - st.l[1] = rm[1]; - - assert(decrypt < 2); for (i = 0; i < 16; i += 4) { CR_ST_WORD(st, i >> 2) = @@ -217,6 +216,17 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } +void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aesmc(vd + i, vm + i, decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + /* * SHA-1 logical functions */ @@ -710,20 +720,12 @@ static uint8_t const sm4_sbox[] = { 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, }; -void HELPER(crypto_sm4e)(void *vd, void *vn) +static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - union CRYPTO_STATE d; - union CRYPTO_STATE n; + union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; uint32_t t, i; - d.l[0] = rd[0]; - d.l[1] = rd[1]; - - n.l[0] = rn[0]; - n.l[1] = rn[1]; - for (i = 0; i < 4; i++) { t = CR_ST_WORD(d, (i + 1) % 4) ^ CR_ST_WORD(d, (i + 2) % 4) ^ @@ -743,22 +745,23 @@ void HELPER(crypto_sm4e)(void *vd, void *vn) rd[1] = d.l[1]; } -void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) +void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4e(vd + i, vn + i, vm + i); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d; - union CRYPTO_STATE n; - union CRYPTO_STATE m; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; uint32_t t, i; - n.l[0] = rn[0]; - n.l[1] = rn[1]; - - m.l[0] = rm[0]; - m.l[1] = rm[1]; - d = n; for (i = 0; i < 4; i++) { t = CR_ST_WORD(d, (i + 1) % 4) ^ @@ -777,3 +780,13 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) rd[0] = d.l[0]; rd[1] = d.l[1]; } + +void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4ekey(vd + i, vn + i, vm + i); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index d9105f70..4411e533 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -506,7 +506,7 @@ DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -527,8 +527,8 @@ DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crc32_arm, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index e0cdfebc..f5c83e3c 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -734,6 +734,17 @@ static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); } +/* Expand a 2-operand operation using an out-of-line helper. */ +static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, + int rn, int data, gen_helper_gvec_2 *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + tcg_gen_gvec_2_ool(tcg_ctx, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + is_q ? 16 : 8, vec_full_reg_size(s), data, fn); +} + /* Expand a 3-operand operation using an out-of-line helper. */ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, int rn, int rm, int data, gen_helper_gvec_3 *fn) @@ -13698,15 +13709,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) */ static void disas_crypto_aes(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = extract32(insn, 22, 2); int opcode = extract32(insn, 12, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); int decrypt; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; - TCGv_i32 tcg_decrypt; - CryptoThreeOpIntFn *genfn; + gen_helper_gvec_2 *genfn2 = NULL; + gen_helper_gvec_3 *genfn3 = NULL; if (!dc_isar_feature(aa64_aes, s) || size != 0) { unallocated_encoding(s); @@ -13716,19 +13725,19 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) switch (opcode) { case 0x4: /* AESE */ decrypt = 0; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x6: /* AESMC */ decrypt = 0; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; case 0x5: /* AESD */ decrypt = 1; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x7: /* AESIMC */ decrypt = 1; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; default: unallocated_encoding(s); @@ -13739,15 +13748,11 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_decrypt = tcg_const_i32(tcg_ctx, decrypt); - - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_i32(tcg_ctx, tcg_decrypt); + if (genfn2) { + gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); + } else { + gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); + } } /* Crypto three-reg SHA @@ -13899,7 +13904,8 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); bool feature; - CryptoThreeOpFn *genfn; + CryptoThreeOpFn *genfn = NULL; + gen_helper_gvec_3 *oolfn = NULL; if (o == 0) { switch (opcode) { @@ -13934,7 +13940,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) break; case 2: /* SM4EKEY */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4ekey; + oolfn = gen_helper_crypto_sm4ekey; break; default: unallocated_encoding(s); @@ -13951,6 +13957,11 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) return; } + if (oolfn) { + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); + return; + } + if (genfn) { TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; @@ -14004,6 +14015,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; bool feature; CryptoTwoOpFn *genfn; + gen_helper_gvec_3 *oolfn = NULL; switch (opcode) { case 0: /* SHA512SU0 */ @@ -14012,7 +14024,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) break; case 1: /* SM4E */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4e; + oolfn = gen_helper_crypto_sm4e; break; default: unallocated_encoding(s); @@ -14028,6 +14040,11 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } + if (oolfn) { + gen_gvec_op3_ool(s, true, rd, rd, rn, 0, oolfn); + return; + } + tcg_rd_ptr = vec_full_reg_ptr(s, rd); tcg_rn_ptr = vec_full_reg_ptr(s, rn); diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 3d29e3cf..f05ad8e5 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -6498,22 +6498,24 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { return 1; } - ptr1 = vfp_reg_ptr(s, true, rd); - ptr2 = vfp_reg_ptr(s, true, rm); - - /* Bit 6 is the lowest opcode bit; it distinguishes between - * encryption (AESE/AESMC) and decryption (AESD/AESIMC) - */ - tmp3 = tcg_const_i32(tcg_ctx, extract32(insn, 6, 1)); + /* + * Bit 6 is the lowest opcode bit; it distinguishes + * between encryption (AESE/AESMC) and decryption + * (AESD/AESIMC). + */ if (op == NEON_2RM_AESE) { - gen_helper_crypto_aese(tcg_ctx, ptr1, ptr2, tmp3); + tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(true, rd), + vfp_reg_offset(true, rd), + vfp_reg_offset(true, rm), + 16, 16, extract32(insn, 6, 1), + gen_helper_crypto_aese); } else { - gen_helper_crypto_aesmc(tcg_ctx, ptr1, ptr2, tmp3); + tcg_gen_gvec_2_ool(tcg_ctx, vfp_reg_offset(true, rd), + vfp_reg_offset(true, rm), + 16, 16, extract32(insn, 6, 1), + gen_helper_crypto_aesmc); } - tcg_temp_free_ptr(tcg_ctx, ptr1); - tcg_temp_free_ptr(tcg_ctx, ptr2); - tcg_temp_free_i32(tcg_ctx, tmp3); break; case NEON_2RM_SHA1H: if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c index 4b1ade24..15317178 100644 --- a/qemu/target/arm/vec_helper.c +++ b/qemu/target/arm/vec_helper.c @@ -23,7 +23,7 @@ #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" - +#include "vec_internal.h" /* Note that vector data is stored in host-endian 64-bit chunks, so addressing units smaller than that needs a host-endian fixup. */ @@ -37,16 +37,6 @@ #define H4(x) (x) #endif -static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) -{ - uint64_t *d = (uint64_t *)((char *)vd + opr_sz); - uintptr_t i; - - for (i = opr_sz; i < max_sz; i += 8) { - *d++ = 0; - } -} - /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2, int16_t src3, uint32_t *sat)