From ab23d4ceb0417343949363b813823f64408c66a5 Mon Sep 17 00:00:00 2001 From: PhilippTakacs <76390863+PhilippTakacs@users.noreply.github.com> Date: Thu, 31 Oct 2024 17:02:11 +0100 Subject: [PATCH] Optimize Notdirty write (#2031) * enable notdirty_write for snapshots when possible Snapshots only happens when the priority of the memory region is smaller then the snapshot_level. After a snapshot notdirty can be set. * disable notdirty_write for self modifying code When SMC access the memory region more then once the tb must be rebuild multible times. fixes #2029 * notdirty_write better hook check Check all relevant memory hooks before enabling notdirty write. This also checks if the memory hook is registered for the affected region. So it is possible to use notdirty write and have some hooks on different addresses. * notdirty_write check for addr_write in snapshot case * self modifying code clear recursive mem access when self modifying code does unaligned memory accese sometimes uc->size_recur_mem is changed but for notdirty write not changed back. This causes mem_hooks to be missed. To fix this uc->size_recur_mem is set to 0 before each cpu_exec() call. --- qemu/accel/tcg/cputlb.c | 23 +++++++------ qemu/accel/tcg/translate-all.c | 2 +- qemu/include/exec/exec-all.h | 23 +++++++++++++ qemu/softmmu/cpus.c | 1 + tests/unit/test_x86.c | 63 ++++++++++++++++++++++++++++++++++ uc.c | 2 ++ 6 files changed, 102 insertions(+), 12 deletions(-) diff --git a/qemu/accel/tcg/cputlb.c b/qemu/accel/tcg/cputlb.c index f3f89aefc3..5206f13231 100644 --- a/qemu/accel/tcg/cputlb.c +++ b/qemu/accel/tcg/cputlb.c @@ -1188,15 +1188,15 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, CPUIOTLBEntry *iotlbentry, uintptr_t retaddr, - MemoryRegion *mr) + CPUTLBEntry *tlbe) { +#ifdef TARGET_ARM + struct uc_struct *uc = cpu->uc; +#endif ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; + MemoryRegion *mr = cpu->uc->memory_mapping(cpu->uc, tlbe->paddr | (mem_vaddr & ~TARGET_PAGE_MASK)); - if (mr == NULL) { - mr = cpu->uc->memory_mapping(cpu->uc, mem_vaddr); - } - - if ((mr->perms & UC_PROT_EXEC) != 0) { + if (mr && (mr->perms & UC_PROT_EXEC) != 0) { struct page_collection *pages = page_collection_lock(cpu->uc, ram_addr, ram_addr + size); tb_invalidate_phys_page_fast(cpu->uc, pages, ram_addr, size, retaddr); @@ -1208,8 +1208,9 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, // - have memory hooks installed // - or doing snapshot // , then never clean the tlb - if (!(cpu->uc->snapshot_level > 0 || mr->priority > 0) && - !(HOOK_EXISTS(cpu->uc, UC_HOOK_MEM_READ) || HOOK_EXISTS(cpu->uc, UC_HOOK_MEM_WRITE))) { + if (!(!mr || (tlbe->addr_write != -1 && mr->priority < cpu->uc->snapshot_level)) && + !(tlbe->addr_code != -1) && + !uc_mem_hook_installed(cpu->uc, tlbe->paddr | (mem_vaddr & ~TARGET_PAGE_MASK))) { tlb_set_dirty(cpu, mem_vaddr); } } @@ -1288,7 +1289,7 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, /* Handle clean RAM pages. */ if (tlb_addr & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr, NULL); + notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr, entry); } } @@ -1414,7 +1415,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, if (unlikely(tlb_addr & TLB_NOTDIRTY)) { notdirty_write(env_cpu(env), addr, 1 << s_bits, - &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr, NULL); + &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr, tlbe); } return hostaddr; @@ -2273,7 +2274,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, /* Handle clean RAM pages. */ if (tlb_addr & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr, mr); + notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr, entry); } haddr = (void *)((uintptr_t)addr + entry->addend); diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index 217522d085..366f3a4f5c 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -1839,7 +1839,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } /* Undoes tlb_set_dirty in notdirty_write. */ - if (!(HOOK_EXISTS(cpu->uc, UC_HOOK_MEM_READ) || HOOK_EXISTS(cpu->uc, UC_HOOK_MEM_WRITE))) { + if (!uc_mem_hook_installed(cpu->uc, tb->pc)) { tlb_reset_dirty_by_vaddr(cpu, pc & TARGET_PAGE_MASK, (pc & ~TARGET_PAGE_MASK) + tb->size); } diff --git a/qemu/include/exec/exec-all.h b/qemu/include/exec/exec-all.h index b999c71678..3b23c30430 100644 --- a/qemu/include/exec/exec-all.h +++ b/qemu/include/exec/exec-all.h @@ -477,4 +477,27 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, hwaddr memory_region_section_get_iotlb(CPUState *cpu, MemoryRegionSection *section); +static inline bool uc_mem_hook_installed(struct uc_struct *uc, target_ulong paddr) +{ + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_MEM_FETCH_UNMAPPED, paddr)) + return true; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_MEM_READ_UNMAPPED, paddr)) + return true; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_MEM_READ, paddr)) + return true; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_MEM_READ_PROT, paddr)) + return true; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_MEM_FETCH_PROT, paddr)) + return true; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_MEM_READ_AFTER, paddr)) + return true; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_MEM_WRITE, paddr)) + return true; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_MEM_WRITE_UNMAPPED, paddr)) + return true; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_MEM_WRITE_PROT, paddr)) + return true; + return false; +} + #endif diff --git a/qemu/softmmu/cpus.c b/qemu/softmmu/cpus.c index 22511ac73c..afb8bbedff 100644 --- a/qemu/softmmu/cpus.c +++ b/qemu/softmmu/cpus.c @@ -93,6 +93,7 @@ static int tcg_cpu_exec(struct uc_struct *uc) // (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); if (cpu_can_run(cpu)) { uc->quit_request = false; + uc->size_recur_mem = 0; r = cpu_exec(uc, cpu); // quit current TB but continue emulating? diff --git a/tests/unit/test_x86.c b/tests/unit/test_x86.c index c3d0b15776..3eee30e8a3 100644 --- a/tests/unit/test_x86.c +++ b/tests/unit/test_x86.c @@ -624,6 +624,67 @@ static void test_x86_smc_xor(void) OK(uc_close(uc)); } +static void test_x86_smc_add(void) +{ + uc_engine *uc; + uint64_t stack_base = 0x20000; + int r_rsp; + /* + * mov qword ptr [rip+0x10], rax + * mov word ptr [rip], 0x0548 + * [orig] mov eax, dword ptr [rax + 0x12345678]; [after SMC] 480578563412 add rax, 0x12345678 + * hlt + */ + char code[] = "\x48\x89\x05\x10\x00\x00\x00\x66\xc7\x05\x00\x00\x00\x00\x48\x05\x8b\x80\x78\x56\x34\x12\xf4"; + uc_common_setup(&uc, UC_ARCH_X86, UC_MODE_64, code, sizeof(code) - 1); + + OK(uc_mem_map(uc, stack_base, 0x2000, UC_PROT_ALL)); + r_rsp = stack_base + 0x1800; + OK(uc_reg_write(uc, UC_X86_REG_RSP, &r_rsp)); + OK(uc_emu_start(uc, code_start, -1, 0, 0)); +} + +static void test_x86_smc_mem_hook_callback(uc_engine *uc, uc_mem_type t, + uint64_t addr, int size, + uint64_t value, void *user_data) +{ + uint64_t write_addresses[] = { 0x1030, 0x1010, 0x1010, 0x1018, 0x1018, 0x1029, 0x1029 }; + unsigned int *i = user_data; + + TEST_CHECK(*i < (sizeof(write_addresses)/sizeof(write_addresses[0]))); + TEST_CHECK(write_addresses[*i] == addr); + (*i)++; +} + +static void test_x86_smc_mem_hook(void) +{ + uc_engine *uc; + uc_hook hook; + uint64_t stack_base = 0x20000; + int r_rsp; + unsigned int i = 0; + /* + * mov qword ptr [rip+0x29], rax + * mov word ptr [rip], 0x0548 + * [orig] mov eax, dword ptr [rax + 0x12345678]; [after SMC] 480578563412 add rax, 0x12345678 + * nop + * nop + * nop + * mov qword ptr [rip-0x08], rax + * mov word ptr [rip], 0x0548 + * [orig] mov eax, dword ptr [rax + 0x12345678]; [after SMC] 480578563412 add rax, 0x12345678 + * hlt + */ + char code[] = "\x48\x89\x05\x29\x00\x00\x00\x66\xC7\x05\x00\x00\x00\x00\x48\x05\x8B\x80\x78\x56\x34\x12\x90\x90\x90\x48\x89\x05\xF8\xFF\xFF\xFF\x66\xC7\x05\x00\x00\x00\x00\x48\x05\x8B\x80\x78\x56\x34\x12\xF4"; + uc_common_setup(&uc, UC_ARCH_X86, UC_MODE_64, code, sizeof(code) - 1); + + OK(uc_hook_add(uc, &hook, UC_HOOK_MEM_WRITE, test_x86_smc_mem_hook_callback, &i, 1, 0)); + OK(uc_mem_map(uc, stack_base, 0x2000, UC_PROT_ALL)); + r_rsp = stack_base + 0x1800; + OK(uc_reg_write(uc, UC_X86_REG_RSP, &r_rsp)); + OK(uc_emu_start(uc, code_start, -1, 0, 0)); +} + static uint64_t test_x86_mmio_uc_mem_rw_read_callback(uc_engine *uc, uint64_t offset, unsigned size, @@ -1849,6 +1910,8 @@ TEST_LIST = { {"test_x86_mmio", test_x86_mmio}, {"test_x86_missing_code", test_x86_missing_code}, {"test_x86_smc_xor", test_x86_smc_xor}, + {"test_x86_smc_add", test_x86_smc_add}, + {"test_x86_smc_mem_hook", test_x86_smc_mem_hook}, {"test_x86_mmio_uc_mem_rw", test_x86_mmio_uc_mem_rw}, {"test_x86_sysenter", test_x86_sysenter}, {"test_x86_hook_cpuid", test_x86_hook_cpuid}, diff --git a/uc.c b/uc.c index 0ff4196f1f..11b6a3f838 100644 --- a/uc.c +++ b/uc.c @@ -2160,6 +2160,7 @@ uc_err uc_context_save(uc_engine *uc, uc_context *context) } context->ramblock_freed = uc->ram_list.freed; context->last_block = uc->ram_list.last_block; + uc->tcg_flush_tlb(uc); } context->snapshot_level = uc->snapshot_level; @@ -2436,6 +2437,7 @@ uc_err uc_context_restore(uc_engine *uc, uc_context *context) if (!uc->flatview_copy(uc, uc->address_space_memory.current_map, context->fv, true)) { return UC_ERR_NOMEM; } + uc->tcg_flush_tlb(uc); } if (uc->context_content & UC_CTL_CONTEXT_CPU) {