From 9aaad9ed27ee52384a58eaf9d7100a5961a8b56a Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Fri, 2 Mar 2018 21:16:56 -0500 Subject: [PATCH] target/arm: optimize indirect branches Speed up indirect branches by jumping to the target if it is valid. Softmmu measurements (see later commit for user-mode results): Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0. - Impact on Boot time | setup | ARM debian jessie boot+shutdown time | stddev | |--------+--------------------------------------+--------| | v2.9.0 | 8.84 | 0.07 | | +cross | 8.85 | 0.03 | | +jr | 8.83 | 0.06 | - NBench, arm-softmmu (debian jessie guest). Host: Intel i7-4790K @ 4.00GHz 1.3x +-+-------------------------------------------------------------------------------------------------------------+-+ | | | cross #### | 1.25x +cross+jr..........................................................#++#.........................................+-+ | #### # # | | +++# # # # | | +++ **** # # # | 1.2x +-+...................................####............*..*..#......#..#.........................................+-+ | **** # * * # # # #### | | * * # * * # # # # # | 1.15x +-+................................*..*..#............*..*..#......#..#.....#..#................................+-+ | * * # * * # # # # # | | * * # #### * * # # # # # | | * * # # # * * # # # # # #### | 1.1x +-+................................*..*..#......#..#..*..*..#......#..#.....#..#.........................#..#...+-+ | * * # # # * * # # # # # # # | | * * # # # * * # # # # # # # | 1.05x +-+..........................####..*..*..#......#..#..*..*..#......#..#.....#..#......+++............*****..#...+-+ | ***** # * * # # # * * # ***** # # # +++ | ****### * * # | | *+++* # * * # # # * * # *+++* # **** # *****### * * # * * # | | *****### +++#### * * # * * # ***** # * * # * * # * * # * | *++# * * # * * # | 1x +-++-+*+++*-+#++****++#++*+-+*++#+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-++-+ | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.95x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+ ASSIGNMENT BITFIELD FOURFP EMULATION HUFFMAN LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT hmean png: http://imgur.com/eOLmZNR NB. 'cross' represents the previous commit. Backports commit 8a6b28c7b5104263344508df0f4bce97f22cfcaf from qemu --- qemu/target/arm/translate.c | 26 ++++++++++++++++++-------- qemu/target/arm/translate.h | 4 ++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 9cb2faa3..c07e9c56 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -1222,7 +1222,7 @@ static void gen_exception_internal_insn(DisasContext *s, int offset, int excp) gen_set_condexec(s); gen_set_pc_im(s, s->pc - offset); gen_exception_internal(s, excp); - s->is_jmp = DISAS_JUMP; + s->is_jmp = DISAS_EXC; } static void gen_exception_insn(DisasContext *s, int offset, int excp, @@ -1231,7 +1231,7 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp, gen_set_condexec(s); gen_set_pc_im(s, s->pc - offset); gen_exception(s, excp, syn, target_el); - s->is_jmp = DISAS_JUMP; + s->is_jmp = DISAS_EXC; } /* Force a TB lookup after an instruction that changes the CPU state. */ @@ -1239,7 +1239,7 @@ static inline void gen_lookup_tb(DisasContext *s) { TCGContext *tcg_ctx = s->uc->tcg_ctx; tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_R[15], s->pc & ~1); - s->is_jmp = DISAS_JUMP; + s->is_jmp = DISAS_EXIT; } static inline void gen_hlt(DisasContext *s, int imm) @@ -4257,7 +4257,17 @@ static inline bool use_goto_tb(DisasContext *s, target_ulong dest) #endif } -static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) +static void gen_goto_ptr(DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv addr = tcg_temp_new(tcg_ctx); + tcg_gen_extu_i32_tl(tcg_ctx, addr, tcg_ctx->cpu_R[15]); + tcg_gen_lookup_and_goto_ptr(tcg_ctx, addr); + tcg_temp_free(tcg_ctx, addr); +} + +static void gen_goto_tb(DisasContext *s, int n, target_ulong dest) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -4266,11 +4276,8 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) gen_set_pc_im(s, dest); tcg_gen_exit_tb(tcg_ctx, (uintptr_t)s->tb + n); } else { - TCGv addr = tcg_temp_new(tcg_ctx); gen_set_pc_im(s, dest); - tcg_gen_extu_i32_tl(tcg_ctx, addr, tcg_ctx->cpu_R[15]); - tcg_gen_lookup_and_goto_ptr(tcg_ctx, addr); - tcg_temp_free(tcg_ctx, addr); + gen_goto_ptr(s); } } @@ -12326,11 +12333,14 @@ tb_end: gen_set_pc_im(dc, dc->pc); /* fall through */ case DISAS_JUMP: + gen_goto_ptr(dc); + break; default: /* indicate that the hash table must be used to find the next TB */ tcg_gen_exit_tb(tcg_ctx, 0); break; case DISAS_TB_JUMP: + case DISAS_EXC: /* nothing more to generate */ break; case DISAS_WFI: diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h index b2b9e198..8bb27190 100644 --- a/qemu/target/arm/translate.h +++ b/qemu/target/arm/translate.h @@ -138,6 +138,10 @@ static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) * custom end-of-TB code) */ #define DISAS_BX_EXCRET 11 +/* For instructions which want an immediate exit to the main loop, + * as opposed to attempting to use lookup_and_goto_ptr. + */ +#define DISAS_EXIT 12 #ifdef TARGET_AARCH64 void a64_translate_init(struct uc_struct *uc);