From 30c0950567733f6fdd1239b2dc877ffaa60c272a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 May 2019 22:30:16 -0400 Subject: [PATCH] tcg: Add CPUState cflags_next_tb We were generating code during tb_invalidate_phys_page_range, check_watchpoint, cpu_io_recompile, and (seemingly) discarding the TB, assuming that it would magically be picked up during the next iteration through the cpu_exec loop. Instead, record the desired cflags in CPUState so that we request the proper TB so that there is no more magic. Backports commit 9b990ee5a3cc6aa38f81266fb0c6ef37a36c45b9 from qemu --- qemu/accel/tcg/cpu-exec.c | 19 +++++++++++++++--- qemu/accel/tcg/translate-all.c | 35 ++++++++++------------------------ qemu/include/qom/cpu.h | 1 + qemu/qom/cpu.c | 1 + 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/qemu/accel/tcg/cpu-exec.c b/qemu/accel/tcg/cpu-exec.c index 9cfe7141..a9986d16 100644 --- a/qemu/accel/tcg/cpu-exec.c +++ b/qemu/accel/tcg/cpu-exec.c @@ -203,13 +203,12 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, static inline TranslationBlock *tb_find(CPUState *cpu, TranslationBlock *last_tb, - int tb_exit) + int tb_exit, uint32_t cf_mask) { TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; bool acquired_tb_lock = false; - uint32_t cf_mask = curr_cflags(cpu->uc); tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); if (tb == NULL) { @@ -561,7 +560,21 @@ int cpu_exec(struct uc_struct *uc, CPUState *cpu) int tb_exit = 0; while (!cpu_handle_interrupt(cpu, &last_tb)) { - TranslationBlock *tb = tb_find(cpu, last_tb, tb_exit); + uint32_t cflags = cpu->cflags_next_tb; + TranslationBlock *tb; + + /* When requested, use an exact setting for cflags for the next + execution. This is used for icount, precise smc, and stop- + after-access watchpoints. Since this request should never + have CF_INVALID set, -1 is a convenient invalid value that + does not require tcg headers for cpu_common_reset. */ + if (cflags == -1) { + cflags = curr_cflags(uc); + } else { + cpu->cflags_next_tb = -1; + } + + tb = tb_find(cpu, last_tb, tb_exit, cflags); if (!tb) { // invalid TB due to invalid code? uc->invalid_error = UC_ERR_FETCH_UNMAPPED; ret = EXCP_HLT; diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index 18f51e2c..40d2fc5b 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -1570,13 +1570,11 @@ void tb_invalidate_phys_page_range(struct uc_struct *uc, tb_page_addr_t start, t int is_cpu_write_access) { TranslationBlock *tb, *tb_next; -#if defined(TARGET_HAS_PRECISE_SMC) - CPUArchState *env = NULL; -#endif tb_page_addr_t tb_start, tb_end; PageDesc *p; int n; #ifdef TARGET_HAS_PRECISE_SMC + CPUArchState *env = NULL; int current_tb_not_found = is_cpu_write_access; TranslationBlock *current_tb = NULL; int current_tb_modified = 0; @@ -1655,11 +1653,8 @@ void tb_invalidate_phys_page_range(struct uc_struct *uc, tb_page_addr_t start, t #endif #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { - /* we generate a block containing just the instruction - modifying the memory. It will ensure that it cannot modify - itself */ - tb_gen_code(uc->cpu, current_pc, current_cs_base, current_flags, - 1 | curr_cflags(uc)); + /* Force execution of one insn next time. */ + uc->cpu->cflags_next_tb = 1 | curr_cflags(uc); cpu_loop_exit_noexc(uc->cpu); } #endif @@ -1768,11 +1763,8 @@ static bool tb_invalidate_phys_page(struct uc_struct *uc, tb_page_addr_t addr, u p->first_tb = NULL; #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { - /* we generate a block containing just the instruction - modifying the memory. It will ensure that it cannot modify - itself */ - tb_gen_code(cpu, current_pc, current_cs_base, current_flags, - 1 | curr_cflags(uc)); + /* Force execution of one insn next time. */ + cpu->cflags_next_tb = 1 | curr_cflags(uc); return true; } #endif @@ -1865,9 +1857,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) { CPUArchState *env = cpu->env_ptr; TranslationBlock *tb; - uint32_t n, cflags; - target_ulong pc, cs_base; - uint32_t flags; + uint32_t n; tb = tb_find_pc(env->uc, retaddr); if (!tb) { @@ -1904,12 +1894,9 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) cpu_abort(cpu, "TB too big during recompile"); } - cflags = n | CF_LAST_IO; - cflags |= curr_cflags(cpu->uc); - pc = tb->pc; - cs_base = tb->cs_base; - flags = tb->flags; - tb_phys_invalidate(cpu->uc, tb, -1); + /* Adjust the execution state of the next TB. */ + cpu->cflags_next_tb = curr_cflags(cpu->uc) | CF_LAST_IO | n; + if (tb_cflags(tb) & CF_NOCACHE) { if (tb->orig_tb) { /* Invalidate original TB if this TB was generated in @@ -1918,9 +1905,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) } tb_free(env->uc, tb); } - /* FIXME: In theory this could raise an exception. In practice - we have already translated the block once so it's probably ok. */ - tb_gen_code(cpu, pc, cs_base, (int)flags, cflags); + /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not the first in the TB) then we end up generating a whole new TB and repeating the fault, which is horribly inefficient. diff --git a/qemu/include/qom/cpu.h b/qemu/include/qom/cpu.h index 6b5d6380..a770add7 100644 --- a/qemu/include/qom/cpu.h +++ b/qemu/include/qom/cpu.h @@ -309,6 +309,7 @@ struct CPUState { bool stop; bool stopped; bool crash_occurred; + uint32_t cflags_next_tb; bool tb_flushed; volatile sig_atomic_t exit_request; uint32_t interrupt_request; diff --git a/qemu/qom/cpu.c b/qemu/qom/cpu.c index a87a89c8..957be6f4 100644 --- a/qemu/qom/cpu.c +++ b/qemu/qom/cpu.c @@ -149,6 +149,7 @@ static void cpu_common_reset(CPUState *cpu) cpu->can_do_io = 0; cpu->exception_index = -1; cpu->crash_occurred = false; + cpu->cflags_next_tb = -1; // TODO: Should be uncommented, but good 'ol // unicorn's crappy symbol deduplication