diff --git a/qemu/aarch64.h b/qemu/aarch64.h index e852738e..5e72cbeb 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_aarch64 #define tcg_target_qemu_prologue tcg_target_qemu_prologue_aarch64 #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_aarch64 +#define tcg_tb_alloc tcg_tb_alloc_aarch64 #define tcg_temp_alloc tcg_temp_alloc_aarch64 #define tcg_temp_free_i32 tcg_temp_free_i32_aarch64 #define tcg_temp_free_i64 tcg_temp_free_i64_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 829f88e3..2980cd24 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_aarch64eb #define tcg_target_qemu_prologue tcg_target_qemu_prologue_aarch64eb #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_aarch64eb +#define tcg_tb_alloc tcg_tb_alloc_aarch64eb #define tcg_temp_alloc tcg_temp_alloc_aarch64eb #define tcg_temp_free_i32 tcg_temp_free_i32_aarch64eb #define tcg_temp_free_i64 tcg_temp_free_i64_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index 87e0158b..4d37a0ff 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_arm #define tcg_target_qemu_prologue tcg_target_qemu_prologue_arm #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_arm +#define tcg_tb_alloc tcg_tb_alloc_arm #define tcg_temp_alloc tcg_temp_alloc_arm #define tcg_temp_free_i32 tcg_temp_free_i32_arm #define tcg_temp_free_i64 tcg_temp_free_i64_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index 688bac5c..1b586568 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_armeb #define tcg_target_qemu_prologue tcg_target_qemu_prologue_armeb #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_armeb +#define tcg_tb_alloc tcg_tb_alloc_armeb #define tcg_temp_alloc tcg_temp_alloc_armeb #define tcg_temp_free_i32 tcg_temp_free_i32_armeb #define tcg_temp_free_i64 tcg_temp_free_i64_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 51dd01f2..6e152936 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3285,6 +3285,7 @@ symbols = ( 'tcg_target_init', 'tcg_target_qemu_prologue', 'tcg_target_reg_alloc_order', + 'tcg_tb_alloc', 'tcg_temp_alloc', 'tcg_temp_free_i32', 'tcg_temp_free_i64', diff --git a/qemu/include/exec/tb-context.h b/qemu/include/exec/tb-context.h index 6680fc2f..7b9d0735 100644 --- a/qemu/include/exec/tb-context.h +++ b/qemu/include/exec/tb-context.h @@ -30,8 +30,9 @@ typedef struct TBContext TBContext; struct TBContext { - TranslationBlock *tbs; + TranslationBlock **tbs; TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; + size_t tbs_size; int nb_tbs; /* statistics */ diff --git a/qemu/m68k.h b/qemu/m68k.h index 0853579c..7f4af686 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_m68k #define tcg_target_qemu_prologue tcg_target_qemu_prologue_m68k #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_m68k +#define tcg_tb_alloc tcg_tb_alloc_m68k #define tcg_temp_alloc tcg_temp_alloc_m68k #define tcg_temp_free_i32 tcg_temp_free_i32_m68k #define tcg_temp_free_i64 tcg_temp_free_i64_m68k diff --git a/qemu/mips.h b/qemu/mips.h index cb35d0fa..71ea7813 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_mips #define tcg_target_qemu_prologue tcg_target_qemu_prologue_mips #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_mips +#define tcg_tb_alloc tcg_tb_alloc_mips #define tcg_temp_alloc tcg_temp_alloc_mips #define tcg_temp_free_i32 tcg_temp_free_i32_mips #define tcg_temp_free_i64 tcg_temp_free_i64_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index a16255d5..672d6f97 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_mips64 #define tcg_target_qemu_prologue tcg_target_qemu_prologue_mips64 #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_mips64 +#define tcg_tb_alloc tcg_tb_alloc_mips64 #define tcg_temp_alloc tcg_temp_alloc_mips64 #define tcg_temp_free_i32 tcg_temp_free_i32_mips64 #define tcg_temp_free_i64 tcg_temp_free_i64_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 7dd98b1a..4e040738 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_mips64el #define tcg_target_qemu_prologue tcg_target_qemu_prologue_mips64el #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_mips64el +#define tcg_tb_alloc tcg_tb_alloc_mips64el #define tcg_temp_alloc tcg_temp_alloc_mips64el #define tcg_temp_free_i32 tcg_temp_free_i32_mips64el #define tcg_temp_free_i64 tcg_temp_free_i64_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index aeb70ffe..c2739a08 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_mipsel #define tcg_target_qemu_prologue tcg_target_qemu_prologue_mipsel #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_mipsel +#define tcg_tb_alloc tcg_tb_alloc_mipsel #define tcg_temp_alloc tcg_temp_alloc_mipsel #define tcg_temp_free_i32 tcg_temp_free_i32_mipsel #define tcg_temp_free_i64 tcg_temp_free_i64_mipsel diff --git a/qemu/powerpc.h b/qemu/powerpc.h index 5dc97b11..ada1e695 100644 --- a/qemu/powerpc.h +++ b/qemu/powerpc.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_powerpc #define tcg_target_qemu_prologue tcg_target_qemu_prologue_powerpc #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_powerpc +#define tcg_tb_alloc tcg_tb_alloc_powerpc #define tcg_temp_alloc tcg_temp_alloc_powerpc #define tcg_temp_free_i32 tcg_temp_free_i32_powerpc #define tcg_temp_free_i64 tcg_temp_free_i64_powerpc diff --git a/qemu/sparc.h b/qemu/sparc.h index 4ba5a8f0..79a1b824 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_sparc #define tcg_target_qemu_prologue tcg_target_qemu_prologue_sparc #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_sparc +#define tcg_tb_alloc tcg_tb_alloc_sparc #define tcg_temp_alloc tcg_temp_alloc_sparc #define tcg_temp_free_i32 tcg_temp_free_i32_sparc #define tcg_temp_free_i64 tcg_temp_free_i64_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index 64f99e41..65de72a5 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_sparc64 #define tcg_target_qemu_prologue tcg_target_qemu_prologue_sparc64 #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_sparc64 +#define tcg_tb_alloc tcg_tb_alloc_sparc64 #define tcg_temp_alloc tcg_temp_alloc_sparc64 #define tcg_temp_free_i32 tcg_temp_free_i32_sparc64 #define tcg_temp_free_i64 tcg_temp_free_i64_sparc64 diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c index 4c09b750..5496f64e 100644 --- a/qemu/tcg/tcg.c +++ b/qemu/tcg/tcg.c @@ -374,6 +374,26 @@ void tcg_context_init(TCGContext *s) } } +/* + * Allocate TBs right before their corresponding translated code, making + * sure that TBs and code are on different cache lines. + */ +TranslationBlock *tcg_tb_alloc(TCGContext *s) +{ + uintptr_t align = s->uc->qemu_icache_linesize; + TranslationBlock *tb; + void *next; + + tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); + next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); + + if (unlikely(next > s->code_gen_highwater)) { + return NULL; + } + s->code_gen_ptr = next; + return tb; +} + void tcg_prologue_init(TCGContext *s) { size_t prologue_size, total_size; diff --git a/qemu/tcg/tcg.h b/qemu/tcg/tcg.h index 2c399b07..2b75ac68 100644 --- a/qemu/tcg/tcg.h +++ b/qemu/tcg/tcg.h @@ -649,6 +649,7 @@ QEMU_BUILD_BUG_ON(sizeof(TCGOp) > 8); /* tb_lock must be held for tcg_malloc_internal. */ void *tcg_malloc_internal(TCGContext *s, int size); void tcg_pool_reset(TCGContext *s); +TranslationBlock *tcg_tb_alloc(TCGContext *s); void tcg_context_init(TCGContext *s); void tcg_context_free(void *s); // free memory allocated for @s @@ -826,7 +827,6 @@ struct TCGContext { here, because there's too much arithmetic throughout that relies on addition and subtraction working on bytes. Rely on the GCC extension that allows arithmetic on void*. */ - int code_gen_max_blocks; void *code_gen_prologue; void *code_gen_epilogue; void *code_gen_buffer; diff --git a/qemu/translate-all.c b/qemu/translate-all.c index d267c8dd..0f4e64f3 100644 --- a/qemu/translate-all.c +++ b/qemu/translate-all.c @@ -823,9 +823,13 @@ static inline void code_gen_alloc(struct uc_struct *uc, size_t tb_size) /* Estimate a good size for the number of TBs we can support. We still haven't deducted the prologue from the buffer size here, but that's minimal and won't affect the estimate much. */ - tcg_ctx->code_gen_max_blocks - = tcg_ctx->code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; - tcg_ctx->tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx->code_gen_max_blocks); + /* size this conservatively -- realloc later if needed */ + tcg_ctx->tb_ctx.tbs_size = + tcg_ctx->code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE / 8; + if (unlikely(!tcg_ctx->tb_ctx.tbs_size)) { + tcg_ctx->tb_ctx.tbs_size = 64 * 1024; + } + tcg_ctx->tb_ctx.tbs = g_new(TranslationBlock *, tcg_ctx->tb_ctx.tbs_size); } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -861,13 +865,20 @@ bool tcg_enabled(struct uc_struct *uc) */ static TranslationBlock *tb_alloc(struct uc_struct *uc, target_ulong pc) { - TranslationBlock *tb; TCGContext *tcg_ctx = uc->tcg_ctx; + TranslationBlock *tb; + TBContext *ctx; - if (tcg_ctx->tb_ctx.nb_tbs >= tcg_ctx->code_gen_max_blocks) { + tb = tcg_tb_alloc(tcg_ctx); + if (unlikely(tb == NULL)) { return NULL; } - tb = &tcg_ctx->tb_ctx.tbs[tcg_ctx->tb_ctx.nb_tbs++]; + ctx = &tcg_ctx->tb_ctx; + if (unlikely(ctx->nb_tbs == ctx->tbs_size)) { + ctx->tbs_size *= 2; + ctx->tbs = g_renew(TranslationBlock *, ctx->tbs, ctx->tbs_size); + } + ctx->tbs[ctx->nb_tbs++] = tb; tb->pc = pc; tb->cflags = 0; tb->invalid = false; @@ -883,8 +894,10 @@ void tb_free(struct uc_struct *uc, TranslationBlock *tb) Ignore the hard cases and just back up if this TB happens to be the last one generated. */ if (tcg_ctx->tb_ctx.nb_tbs > 0 && - tb == &tcg_ctx->tb_ctx.tbs[tcg_ctx->tb_ctx.nb_tbs - 1]) { - tcg_ctx->code_gen_ptr = tb->tc_ptr; + tb == tcg_ctx->tb_ctx.tbs[tcg_ctx->tb_ctx.nb_tbs - 1]) { + size_t struct_size = ROUND_UP(sizeof(*tb), uc->qemu_icache_linesize); + + tcg_ctx->code_gen_ptr = tb->tc_ptr - struct_size; tcg_ctx->tb_ctx.nb_tbs--; } } @@ -1722,7 +1735,7 @@ static TranslationBlock *tb_find_pc(struct uc_struct *uc, uintptr_t tc_ptr) m_max = tcg_ctx->tb_ctx.nb_tbs - 1; while (m_min <= m_max) { m = (m_min + m_max) >> 1; - tb = &tcg_ctx->tb_ctx.tbs[m]; + tb = tcg_ctx->tb_ctx.tbs[m]; v = (uintptr_t)tb->tc_ptr; if (v == tc_ptr) { return tb; @@ -1732,7 +1745,7 @@ static TranslationBlock *tb_find_pc(struct uc_struct *uc, uintptr_t tc_ptr) m_min = m + 1; } } - return &tcg_ctx->tb_ctx.tbs[m_max]; + return tcg_ctx->tb_ctx.tbs[m_max]; } #if !defined(CONFIG_USER_ONLY) @@ -1898,8 +1911,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) cpu_fprintf(f, "gen code size %td/%zd\n", tcg_ctx->code_gen_ptr - tcg_ctx->code_gen_buffer, tcg_ctx->code_gen_highwater - tcg_ctx->code_gen_buffer); - cpu_fprintf(f, "TB count %d/%d\n", - tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks); + cpu_fprintf(f, "TB count %d\n", tcg_ctx.tb_ctx.nb_tbs); cpu_fprintf(f, "TB avg target size %d max=%d bytes\n", tcg_ctx.tb_ctx.nb_tbs ? target_code_size / tcg_ctx.tb_ctx.nb_tbs : 0, diff --git a/qemu/x86_64.h b/qemu/x86_64.h index e5303da2..1091d9f2 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -3279,6 +3279,7 @@ #define tcg_target_init tcg_target_init_x86_64 #define tcg_target_qemu_prologue tcg_target_qemu_prologue_x86_64 #define tcg_target_reg_alloc_order tcg_target_reg_alloc_order_x86_64 +#define tcg_tb_alloc tcg_tb_alloc_x86_64 #define tcg_temp_alloc tcg_temp_alloc_x86_64 #define tcg_temp_free_i32 tcg_temp_free_i32_x86_64 #define tcg_temp_free_i64 tcg_temp_free_i64_x86_64