From eb489625b582d495bf52db6228087dca2b97af35 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 1 Mar 2018 10:59:24 -0500 Subject: [PATCH] x86: implement la57 paging mode The new paging more is extension of IA32e mode with more additional page table level. It brings support of 57-bit vitrual address space (128PB) and 52-bit physical address space (4PB). The structure of new page table level is identical to pml4. The feature is enumerated with CPUID.(EAX=07H, ECX=0):ECX[bit 16]. CR4.LA57[bit 12] need to be set when pageing enables to activate 5-level paging mode. Backports commit 6c7c3c21f95dd9af8a0691c0dd29b07247984122 from qemu --- qemu/target-i386/arch_memory_mapping.c | 42 +++++++++++++++++--- qemu/target-i386/cpu.c | 16 +++++--- qemu/target-i386/cpu.h | 2 + qemu/target-i386/helper.c | 54 +++++++++++++++++++++----- 4 files changed, 94 insertions(+), 20 deletions(-) diff --git a/qemu/target-i386/arch_memory_mapping.c b/qemu/target-i386/arch_memory_mapping.c index d893f897..dcf08265 100644 --- a/qemu/target-i386/arch_memory_mapping.c +++ b/qemu/target-i386/arch_memory_mapping.c @@ -220,7 +220,8 @@ static void walk_pdpe(MemoryMappingList *list, AddressSpace *as, /* IA-32e Paging */ static void walk_pml4e(MemoryMappingList *list, AddressSpace *as, - hwaddr pml4e_start_addr, int32_t a20_mask) + hwaddr pml4e_start_addr, int32_t a20_mask, + target_ulong start_line_addr) { hwaddr pml4e_addr, pdpe_start_addr; uint64_t pml4e; @@ -236,11 +237,34 @@ static void walk_pml4e(MemoryMappingList *list, AddressSpace *as, continue; } - line_addr = ((i & 0x1ffULL) << 39) | (0xffffULL << 48); + line_addr = start_line_addr | ((i & 0x1ffULL) << 39); pdpe_start_addr = (pml4e & PLM4_ADDR_MASK) & a20_mask; walk_pdpe(list, as, pdpe_start_addr, a20_mask, line_addr); } } + +static void walk_pml5e(MemoryMappingList *list, AddressSpace *as, + hwaddr pml5e_start_addr, int32_t a20_mask) +{ + hwaddr pml5e_addr, pml4e_start_addr; + uint64_t pml5e; + target_ulong line_addr; + int i; + + for (i = 0; i < 512; i++) { + pml5e_addr = (pml5e_start_addr + i * 8) & a20_mask; + pml5e = address_space_ldq(as, pml5e_addr, MEMTXATTRS_UNSPECIFIED, + NULL); + if (!(pml5e & PG_PRESENT_MASK)) { + /* not present */ + continue; + } + + line_addr = (0x7fULL << 57) | ((i & 0x1ffULL) << 48); + pml4e_start_addr = (pml5e & PLM4_ADDR_MASK) & a20_mask; + walk_pml4e(list, as, pml4e_start_addr, a20_mask, line_addr); + } +} #endif void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, @@ -257,10 +281,18 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, if (env->cr[4] & CR4_PAE_MASK) { #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { - hwaddr pml4e_addr; + if (env->cr[4] & CR4_LA57_MASK) { + hwaddr pml5e_addr; - pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask; - walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask); + pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask; + walk_pml5e(list, cs->as, pml5e_addr, env->a20_mask); + } else { + hwaddr pml4e_addr; + + pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask; + walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask, + 0xffffULL << 48); + } } else #endif { diff --git a/qemu/target-i386/cpu.c b/qemu/target-i386/cpu.c index 16368b50..153efaec 100644 --- a/qemu/target-i386/cpu.c +++ b/qemu/target-i386/cpu.c @@ -230,7 +230,8 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED */ -#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE) +#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | \ + CPUID_7_0_ECX_LA57) #define TCG_7_0_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT @@ -314,7 +315,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "ospke", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "la57", NULL, NULL, NULL, NULL, NULL, "rdpid", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -2769,10 +2770,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 0x80000008: /* virtual & phys address size in low 2 bytes. */ if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { - /* 64 bit processor, 48 bits virtual, configurable - * physical bits. - */ - *eax = 0x00003000 + cpu->phys_bits; + /* 64 bit processor */ + *eax = cpu->phys_bits; /* configurable physical bits */ + if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) { + *eax |= 0x00003900; /* 57 bits virtual */ + } else { + *eax |= 0x00003000; /* 48 bits virtual */ + } } else { *eax = cpu->phys_bits; } diff --git a/qemu/target-i386/cpu.h b/qemu/target-i386/cpu.h index 93d08cef..ee386611 100644 --- a/qemu/target-i386/cpu.h +++ b/qemu/target-i386/cpu.h @@ -224,6 +224,7 @@ #define CR4_OSFXSR_SHIFT 9 #define CR4_OSFXSR_MASK (1U << CR4_OSFXSR_SHIFT) #define CR4_OSXMMEXCPT_MASK (1U << 10) +#define CR4_LA57_MASK (1U << 12) #define CR4_VMXE_MASK (1U << 13) #define CR4_SMXE_MASK (1U << 14) #define CR4_FSGSBASE_MASK (1U << 16) @@ -627,6 +628,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_ECX_UMIP (1U << 2) #define CPUID_7_0_ECX_PKU (1U << 3) #define CPUID_7_0_ECX_OSPKE (1U << 4) +#define CPUID_7_0_ECX_LA57 (1U << 16) #define CPUID_7_0_ECX_RDPID (1U << 22) #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ diff --git a/qemu/target-i386/helper.c b/qemu/target-i386/helper.c index 6837c94f..fc42008f 100644 --- a/qemu/target-i386/helper.c +++ b/qemu/target-i386/helper.c @@ -464,11 +464,11 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) uint32_t hflags; #if defined(DEBUG_MMU) - printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]); + printf("CR4 update: %08x -> %08x\n", (uint32_t)env->cr[4], new_cr4); #endif if ((new_cr4 ^ env->cr[4]) & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK | - CR4_SMEP_MASK | CR4_SMAP_MASK)) { + CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_LA57_MASK)) { tlb_flush(CPU(cpu), 1); } @@ -570,19 +570,41 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { + bool la57 = env->cr[4] & CR4_LA57_MASK; + uint64_t pml5e_addr, pml5e; uint64_t pml4e_addr, pml4e; int32_t sext; /* test virtual address sign extension */ - sext = (int64_t)addr >> 47; + sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47; if (sext != 0 && sext != -1) { env->error_code = 0; cs->exception_index = EXCP0D_GPF; return 1; } - pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & - env->a20_mask; + if (la57) { + pml5e_addr = ((env->cr[3] & ~0xfff) + + (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask; + pml5e = x86_ldq_phys(cs, pml5e_addr); + if (!(pml5e & PG_PRESENT_MASK)) { + goto do_fault; + } + if (pml5e & (rsvd_mask | PG_PSE_MASK)) { + goto do_fault_rsvd; + } + if (!(pml5e & PG_ACCESSED_MASK)) { + pml5e |= PG_ACCESSED_MASK; + x86_stl_phys_notdirty(cs, pml5e_addr, pml5e); + } + ptep = pml5e ^ PG_NX_MASK; + } else { + pml5e = env->cr[3]; + ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; + } + + pml4e_addr = ((pml5e & PG_ADDRESS_MASK) + + (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask; pml4e = x86_ldq_phys(cs, pml4e_addr); if (!(pml4e & PG_PRESENT_MASK)) { goto do_fault; @@ -594,7 +616,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, pml4e |= PG_ACCESSED_MASK; x86_stl_phys_notdirty(cs, pml4e_addr, pml4e); } - ptep = pml4e ^ PG_NX_MASK; + ptep &= pml4e ^ PG_NX_MASK; pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) & env->a20_mask; pdpe = x86_ldq_phys(cs, pdpe_addr); @@ -844,16 +866,30 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { + bool la57 = env->cr[4] & CR4_LA57_MASK; + uint64_t pml5e_addr, pml5e; uint64_t pml4e_addr, pml4e; int32_t sext; /* test virtual address sign extension */ - sext = (int64_t)addr >> 47; + sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47; if (sext != 0 && sext != -1) { return -1; } - pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & - env->a20_mask; + + if (la57) { + pml5e_addr = ((env->cr[3] & ~0xfff) + + (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask; + pml5e = x86_ldq_phys(cs, pml5e_addr); + if (!(pml5e & PG_PRESENT_MASK)) { + return -1; + } + } else { + pml5e = env->cr[3]; + } + + pml4e_addr = ((pml5e & PG_ADDRESS_MASK) + + (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask; pml4e = x86_ldq_phys(cs, pml4e_addr); if (!(pml4e & PG_PRESENT_MASK)) { return -1;