From b146b289f759315fd27402a40bc15214515e6c45 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 7 Oct 2025 19:12:31 +0000 Subject: [PATCH 01/16] KVM: selftests: Don't fall over in mmu_stress_test when only one CPU is present Running mmu_stress_test on a system with only one CPU is not a recipe for success. However, there's no clear-cut reason why it absolutely shouldn't work, so the test shouldn't completely reject such a platform. At present, the *3/4 calculation will return zero on these platforms and the test fails. So, instead just skip that calculation. Suggested-by: Sean Christopherson Signed-off-by: Brendan Jackman Link: https://lore.kernel.org/r/20251007-b4-kvm-mmu-stresstest-1proc-v1-1-8c95aa0e30b6@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 37b7e6524533..c799e0d0694f 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -263,8 +263,10 @@ static void calc_default_nr_vcpus(void) TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); - nr_vcpus = CPU_COUNT(&possible_mask) * 3/4; + nr_vcpus = CPU_COUNT(&possible_mask); TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?"); + if (nr_vcpus >= 2) + nr_vcpus = nr_vcpus * 3/4; } int main(int argc, char *argv[]) From 17e5a9b77716564540d81f0c1e6082d28cf305c9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Oct 2025 15:30:57 -0700 Subject: [PATCH 02/16] KVM: selftests: Forcefully override ARCH from x86_64 to x86 Forcefully override ARCH from x86_64 to x86 to handle the scenario where the user specifies ARCH=x86_64 on the command line. Fixes: 9af04539d474 ("KVM: selftests: Override ARCH for x86_64 instead of using ARCH_DIR") Cc: stable@vger.kernel.org Reported-by: David Matlack Closes: https://lore.kernel.org/all/20250724213130.3374922-1-dmatlack@google.com Link: https://lore.kernel.org/r/20251007223057.368082-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index d9fffe06d3ea..f2b223072b62 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -6,7 +6,7 @@ ARCH ?= $(SUBARCH) ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch)) # Top-level selftests allows ARCH=x86_64 :-( ifeq ($(ARCH),x86_64) - ARCH := x86 + override ARCH := x86 endif include Makefile.kvm else From 9e4ce7a89e0b5f1d400f05533f38fe2590166ea6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Oct 2025 15:45:15 -0700 Subject: [PATCH 03/16] KVM: selftests: Use "gpa" and "gva" for local variable names in pre-fault test Rename guest_test_{phys,virt}_mem to g{p,v}a in the pre-fault memory test to shorten line lengths and to use standard terminology. Opportunsitically use "base_gva" in the guest code instead of "base_gpa" to match the host side code, which now passes in "gva" (and because referencing the virtual address avoids having to know that the data is identity mapped). No functional change intended. Cc: Yan Zhao Link: https://lore.kernel.org/r/20251007224515.374516-1-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/pre_fault_memory_test.c | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c index f04768c1d2e4..93e603d91311 100644 --- a/tools/testing/selftests/kvm/pre_fault_memory_test.c +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -17,13 +17,13 @@ #define TEST_NPAGES (TEST_SIZE / PAGE_SIZE) #define TEST_SLOT 10 -static void guest_code(uint64_t base_gpa) +static void guest_code(uint64_t base_gva) { volatile uint64_t val __used; int i; for (i = 0; i < TEST_NPAGES; i++) { - uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE); + uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE); val = *src; } @@ -161,6 +161,7 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, static void __test_pre_fault_memory(unsigned long vm_type, bool private) { + uint64_t gpa, gva, alignment, guest_page_size; const struct vm_shape shape = { .mode = VM_MODE_DEFAULT, .type = vm_type, @@ -170,35 +171,30 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private) struct kvm_vm *vm; struct ucall uc; - uint64_t guest_test_phys_mem; - uint64_t guest_test_virt_mem; - uint64_t alignment, guest_page_size; - vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code); alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; - guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size; + gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size; #ifdef __s390x__ alignment = max(0x100000UL, guest_page_size); #else alignment = SZ_2M; #endif - guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); - guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1); + gpa = align_down(gpa, alignment); + gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, TEST_SLOT, TEST_NPAGES, - private ? KVM_MEM_GUEST_MEMFD : 0); - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT, + TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0); + virt_map(vm, gva, gpa, TEST_NPAGES); if (private) - vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE); + vm_mem_set_private(vm, gpa, TEST_SIZE); - pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private); - pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); - pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); + pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private); + pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); + pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); - vcpu_args_set(vcpu, 1, guest_test_virt_mem); + vcpu_args_set(vcpu, 1, gva); vcpu_run(vcpu); run = vcpu->run; From 0a9eb2afa185e84cd6c6b35aefb190e8f92c4c5c Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 21 Oct 2025 07:47:16 +0000 Subject: [PATCH 04/16] KVM: selftests: Extend vmx_close_while_nested_test to cover SVM Add SVM L1 code to run the nested guest, and allow the test to run with SVM as well as VMX. Reviewed-by: Jim Mattson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251021074736.1324328-4-yosry.ahmed@linux.dev [sean: rename to "nested_close_kvm_test" to provide nested_* sorting] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile.kvm | 2 +- ..._nested_test.c => nested_close_kvm_test.c} | 42 +++++++++++++++---- 2 files changed, 34 insertions(+), 10 deletions(-) rename tools/testing/selftests/kvm/x86/{vmx_close_while_nested_test.c => nested_close_kvm_test.c} (64%) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 148d427ff24b..89ad8c82a7b2 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -88,6 +88,7 @@ TEST_GEN_PROGS_x86 += x86/kvm_pv_test TEST_GEN_PROGS_x86 += x86/kvm_buslock_test TEST_GEN_PROGS_x86 += x86/monitor_mwait_test TEST_GEN_PROGS_x86 += x86/msrs_test +TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test TEST_GEN_PROGS_x86 += x86/platform_info_test @@ -111,7 +112,6 @@ TEST_GEN_PROGS_x86 += x86/ucna_injection_test TEST_GEN_PROGS_x86 += x86/userspace_io_test TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test -TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c similarity index 64% rename from tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c rename to tools/testing/selftests/kvm/x86/nested_close_kvm_test.c index dad988351493..f001cb836bfa 100644 --- a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_close_while_nested - * * Copyright (C) 2019, Red Hat, Inc. * * Verify that nothing bad happens if a KVM user exits with open @@ -12,6 +10,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include #include @@ -22,6 +21,8 @@ enum { PORT_L0_EXIT = 0x2000, }; +#define L2_GUEST_STACK_SIZE 64 + static void l2_guest_code(void) { /* Exit to L0 */ @@ -29,9 +30,8 @@ static void l2_guest_code(void) : : [port] "d" (PORT_L0_EXIT) : "rax"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_vmx_code(struct vmx_pages *vmx_pages) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); @@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(0); } +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Prepare the VMCB for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(0); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t guest_gva; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); for (;;) { volatile struct kvm_run *run = vcpu->run; From e6bcdd21223835a6a5691af224c7c5ff4934436a Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 21 Oct 2025 07:47:17 +0000 Subject: [PATCH 05/16] KVM: selftests: Extend vmx_nested_tsc_scaling_test to cover SVM Add SVM L1 code to run the nested guest, and allow the test to run with SVM as well as VMX. Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251021074736.1324328-5-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile.kvm | 2 +- ...aling_test.c => nested_tsc_scaling_test.c} | 48 +++++++++++++++++-- 2 files changed, 44 insertions(+), 6 deletions(-) rename tools/testing/selftests/kvm/x86/{vmx_nested_tsc_scaling_test.c => nested_tsc_scaling_test.c} (83%) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 89ad8c82a7b2..9ffa0d5c7654 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -91,6 +91,7 @@ TEST_GEN_PROGS_x86 += x86/msrs_test TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test TEST_GEN_PROGS_x86 += x86/platform_info_test TEST_GEN_PROGS_x86 += x86/pmu_counters_test TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test @@ -118,7 +119,6 @@ TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test TEST_GEN_PROGS_x86 += x86/xapic_ipi_test TEST_GEN_PROGS_x86 += x86/xapic_state_test diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c similarity index 83% rename from tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c rename to tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c index 1759fa5cb3f2..4260c9e4f489 100644 --- a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c @@ -13,6 +13,7 @@ #include "kvm_util.h" #include "vmx.h" +#include "svm_util.h" #include "kselftest.h" /* L2 is scaled up (from L1's perspective) by this factor */ @@ -79,7 +80,30 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* check that L1's frequency looks alright before launching L2 */ + check_tsc_freq(UCHECK_L1); + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* enable TSC scaling for L2 */ + wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32); + + /* launch L2 */ + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + /* check that L1's frequency still looks good */ + check_tsc_freq(UCHECK_L1); + + GUEST_DONE(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; uint32_t control; @@ -116,11 +140,19 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t guest_gva = 0; uint64_t tsc_start, tsc_end; uint64_t tsc_khz; @@ -129,7 +161,8 @@ int main(int argc, char *argv[]) uint64_t l1_tsc_freq = 0; uint64_t l2_tsc_freq = 0; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); @@ -152,8 +185,13 @@ int main(int argc, char *argv[]) printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL); TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); From 4d256d00e44e02fae84851729d70df2bc2ebe6e9 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 21 Oct 2025 07:47:18 +0000 Subject: [PATCH 06/16] KVM: selftests: Move nested invalid CR3 check to its own test vmx_tsc_adjust_test currently verifies that a nested VMLAUNCH fails with an invalid CR3. This is irrelevant to TSC scaling, move it to a standalone test. Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251021074736.1324328-6-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile.kvm | 1 + .../kvm/x86/nested_invalid_cr3_test.c | 79 +++++++++++++++++++ .../selftests/kvm/x86/vmx_tsc_adjust_test.c | 10 --- 3 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 9ffa0d5c7654..5bfd242da1ca 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -91,6 +91,7 @@ TEST_GEN_PROGS_x86 += x86/msrs_test TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test +TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test TEST_GEN_PROGS_x86 += x86/platform_info_test TEST_GEN_PROGS_x86 += x86/pmu_counters_test diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c new file mode 100644 index 000000000000..e839bcf5f8ad --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + * + * This test verifies that L1 fails to enter L2 with an invalid CR3, and + * succeeds otherwise. + */ +#include "kvm_util.h" +#include "vmx.h" +#include "kselftest.h" + + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + vmcall(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + + /* Now restore CR3 and make sure L2 runs successfully */ + vmwrite(GUEST_CR3, save_cr3); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t guest_gva = 0; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_vmx_code); + vcpu_alloc_vmx(vm, &guest_gva); + vcpu_args_set(vcpu, 1, guest_gva); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c index 2ceb5c78c442..2dcc0306a0d9 100644 --- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c @@ -77,7 +77,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; uint32_t control; - uintptr_t save_cr3; GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); @@ -94,15 +93,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); - /* Jump into L2. First, test failure to load guest CR3. */ - save_cr3 = vmreadz(GUEST_CR3); - vmwrite(GUEST_CR3, -1ull); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == - (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); - check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - vmwrite(GUEST_CR3, save_cr3); - GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); From 91423b041d3ca9f07ebc0c7859a20ac7eac8c755 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 21 Oct 2025 07:47:19 +0000 Subject: [PATCH 07/16] KVM: selftests: Extend nested_invalid_cr3_test to cover SVM Add SVM L1 code to run the nested guest, and allow the test to run with SVM as well as VMX. Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251021074736.1324328-7-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson --- .../kvm/x86/nested_invalid_cr3_test.c | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c index e839bcf5f8ad..a6b6da9cf7fe 100644 --- a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c +++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c @@ -7,6 +7,7 @@ */ #include "kvm_util.h" #include "vmx.h" +#include "svm_util.h" #include "kselftest.h" @@ -17,6 +18,28 @@ static void l2_guest_code(void) vmcall(); } +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = svm->vmcb->save.cr3; + svm->vmcb->save.cr3 = -1ull; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR); + + /* Now restore CR3 and make sure L2 runs successfully */ + svm->vmcb->save.cr3 = save_cr3; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + GUEST_DONE(); +} + static void l1_vmx_code(struct vmx_pages *vmx_pages) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -43,16 +66,30 @@ static void l1_vmx_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; vm_vaddr_t guest_gva = 0; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); - vm = vm_create_with_one_vcpu(&vcpu, l1_vmx_code); - vcpu_alloc_vmx(vm, &guest_gva); vcpu_args_set(vcpu, 1, guest_gva); for (;;) { From 3c40777f0ed81e8b8f7047319ad195e407614b69 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 21 Oct 2025 07:47:20 +0000 Subject: [PATCH 08/16] KVM: selftests: Extend vmx_tsc_adjust_test to cover SVM Add SVM L1 code to run the nested guest, and allow the test to run with SVM as well as VMX. Reviewed-by: Jim Mattson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251021074736.1324328-8-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile.kvm | 2 +- ...adjust_test.c => nested_tsc_adjust_test.c} | 65 ++++++++++++------- 2 files changed, 43 insertions(+), 24 deletions(-) rename tools/testing/selftests/kvm/x86/{vmx_tsc_adjust_test.c => nested_tsc_adjust_test.c} (61%) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 5bfd242da1ca..3127983c1285 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -92,6 +92,7 @@ TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test TEST_GEN_PROGS_x86 += x86/platform_info_test TEST_GEN_PROGS_x86 += x86/pmu_counters_test @@ -119,7 +120,6 @@ TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test -TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test TEST_GEN_PROGS_x86 += x86/xapic_ipi_test TEST_GEN_PROGS_x86 += x86/xapic_state_test diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c similarity index 61% rename from tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c rename to tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c index 2dcc0306a0d9..2839f650e5c9 100644 --- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_tsc_adjust_test - * * Copyright (C) 2018, Google LLC. * * IA32_TSC_ADJUST test @@ -22,6 +20,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include #include @@ -35,6 +34,8 @@ #define TSC_ADJUST_VALUE (1ll << 32) #define TSC_OFFSET_VALUE -(1ll << 48) +#define L2_GUEST_STACK_SIZE 64 + enum { PORT_ABORT = 0x1000, PORT_REPORT, @@ -72,32 +73,47 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_guest_code(void *data) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; + /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); + /* + * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not + * intercepting the write so it should update L1's TSC_ADJUST. + */ + if (this_cpu_has(X86_FEATURE_VMX)) { + struct vmx_pages *vmx_pages = data; + uint32_t control; - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; - vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + } else { + struct svm_test_data *svm = data; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + } check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); - GUEST_DONE(); } @@ -109,16 +125,19 @@ static void report(int64_t val) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t nested_gva; struct kvm_vcpu *vcpu; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); - vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, nested_gva); for (;;) { struct ucall uc; From 28b2dced8ba4604b2cdd97c11d7fbd0fa99f9835 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 21 Oct 2025 07:47:21 +0000 Subject: [PATCH 09/16] KVM: selftests: Stop hardcoding PAGE_SIZE in x86 selftests Use PAGE_SIZE instead of 4096. Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251021074736.1324328-9-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson --- .../selftests/kvm/x86/hyperv_features.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_ipi.c | 18 +++++++++--------- .../testing/selftests/kvm/x86/sev_smoke_test.c | 2 +- tools/testing/selftests/kvm/x86/state_test.c | 2 +- .../selftests/kvm/x86/userspace_io_test.c | 2 +- .../selftests/kvm/x86/vmx_dirty_log_test.c | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 99d327084172..130b9ce7e5dd 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { input = pgs_gpa; - output = pgs_gpa + 4096; + output = pgs_gpa + PAGE_SIZE; } else { input = output = 0; } diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 2b5b4bc6ef7e..ca61836c4e32 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -102,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ ipi->vector = IPI_VECTOR; ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; - hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -116,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 0; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -138,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -160,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -183,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; - hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index 77256c89bb8d..86ad1c7d068f 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -104,7 +104,7 @@ static void test_sync_vmsa(uint32_t type, uint64_t policy) vm_sev_launch(vm, policy, NULL); /* This page is shared, so make it decrypted. */ - memset(hva, 0, 4096); + memset(hva, 0, PAGE_SIZE); vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 141b7fc0c965..f2c7a1c297e3 100644 --- a/tools/testing/selftests/kvm/x86/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c @@ -141,7 +141,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg) if (this_cpu_has(X86_FEATURE_XSAVE)) { uint64_t supported_xcr0 = this_cpu_supported_xcr0(); - uint8_t buffer[4096]; + uint8_t buffer[PAGE_SIZE]; memset(buffer, 0xcc, sizeof(buffer)); diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c index 9481cbcf284f..be7d72f3c029 100644 --- a/tools/testing/selftests/kvm/x86/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c @@ -85,7 +85,7 @@ int main(int argc, char *argv[]) regs.rcx = 1; if (regs.rcx == 3) regs.rcx = 8192; - memset((void *)run + run->io.data_offset, 0xaa, 4096); + memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE); vcpu_regs_set(vcpu, ®s); } diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index fa512d033205..34a57fe747f6 100644 --- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c @@ -122,15 +122,15 @@ static void test_vmx_dirty_log(bool enable_ept) if (enable_ept) { prepare_eptp(vmx, vm, 0); nested_map_memslot(vmx, vm, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); } bmap = bitmap_zalloc(TEST_MEM_PAGES); host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); while (!done) { - memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096); + memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE); vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); @@ -153,9 +153,9 @@ static void test_vmx_dirty_log(bool enable_ept) } TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); + TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); + TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); break; case UCALL_DONE: done = true; From ff736dba478c2bcf8c8c8328ff8936b1e6d65e81 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 21 Oct 2025 07:47:22 +0000 Subject: [PATCH 10/16] KVM: selftests: Remove the unused argument to prepare_eptp() eptp_memslot is unused, remove it. No functional change intended. Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251021074736.1324328-10-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/x86/vmx.h | 3 +-- tools/testing/selftests/kvm/lib/x86/memstress.c | 2 +- tools/testing/selftests/kvm/lib/x86/vmx.c | 3 +-- tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index edb3c391b982..96e2b4c630a9 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -568,8 +568,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); bool kvm_cpu_has_ept(void); -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index 7f5d62a65c68..0b1f288ad556 100644 --- a/tools/testing/selftests/kvm/lib/x86/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -63,7 +63,7 @@ void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) { uint64_t start, end; - prepare_eptp(vmx, vm, 0); + prepare_eptp(vmx, vm); /* * Identity map the first 4G and the test region with 1G pages so that diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index d4d1208dd023..9906456af11f 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -534,8 +534,7 @@ bool kvm_cpu_has_ept(void) return ctrl & SECONDARY_EXEC_ENABLE_EPT; } -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot) +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm) { TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index 34a57fe747f6..98cb6bdab3e6 100644 --- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c @@ -120,7 +120,7 @@ static void test_vmx_dirty_log(bool enable_ept) * GPAs as the EPT enabled case. */ if (enable_ept) { - prepare_eptp(vmx, vm, 0); + prepare_eptp(vmx, vm); nested_map_memslot(vmx, vm, 0); nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); From ae5b498b8da96749cdad7b5a013053d17d815a6e Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 28 Oct 2025 15:30:39 -0700 Subject: [PATCH 11/16] KVM: selftests: Use a loop to create guest page tables Walk the guest page tables via a loop when creating new mappings, instead of using unique variables for each level of the page tables. This simplifies the code and makes it easier to support 5-level paging in the future. Signed-off-by: Jim Mattson Reviewed-by: Yosry Ahmed Link: https://patch.msgid.link/20251028225827.2269128-2-jmattson@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/lib/x86/processor.c | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index b418502c5ecc..738f2a44083f 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -218,8 +218,8 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) { const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; + uint64_t *pte = &vm->pgd; + int current_level; TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -243,20 +243,17 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); - if (*pml4e & PTE_LARGE_MASK) - return; - - pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); - if (*pdpe & PTE_LARGE_MASK) - return; - - pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); - if (*pde & PTE_LARGE_MASK) - return; + for (current_level = vm->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_create_upper_pte(vm, pte, vaddr, paddr, + current_level, level); + if (*pte & PTE_LARGE_MASK) + return; + } /* Fill in page table entry. */ - pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); From 2103a8baf5cb7e6169434e5f2cc6c311929f529a Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 28 Oct 2025 15:30:40 -0700 Subject: [PATCH 12/16] KVM: selftests: Use a loop to walk guest page tables Walk the guest page tables via a loop when searching for a PTE, instead of using unique variables for each level of the page tables. This simplifies the code and makes it easier to support 5-level paging in the future. Signed-off-by: Jim Mattson Reviewed-by: Yosry Ahmed Link: https://patch.msgid.link/20251028225827.2269128-3-jmattson@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/lib/x86/processor.c | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 738f2a44083f..720c678187b5 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -307,7 +307,8 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level) { - uint64_t *pml4e, *pdpe, *pde; + uint64_t *pte = &vm->pgd; + int current_level; TEST_ASSERT(!vm->arch.is_pt_protected, "Walking page tables of protected guests is impossible"); @@ -328,19 +329,15 @@ uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), "Canonical check failed. The virtual address is invalid."); - pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); - if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) - return pml4e; + for (current_level = vm->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_get_pte(vm, pte, vaddr, current_level); + if (vm_is_target_pte(pte, level, current_level)) + return pte; + } - pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); - if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) - return pdpe; - - pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); - if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) - return pde; - - return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); } uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) From ec5806639e39950527593e3be0efe7f0d7b65bf7 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 28 Oct 2025 15:30:41 -0700 Subject: [PATCH 13/16] KVM: selftests: Change VM_MODE_PXXV48_4K to VM_MODE_PXXVYY_4K Use 57-bit addresses with 5-level paging on hardware that supports LA57. Continue to use 48-bit addresses with 4-level paging on hardware that doesn't support LA57. Suggested-by: Sean Christopherson Signed-off-by: Jim Mattson Link: https://patch.msgid.link/20251028225827.2269128-4-jmattson@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/include/kvm_util.h | 4 +-- .../selftests/kvm/include/x86/processor.h | 2 +- .../selftests/kvm/lib/arm64/processor.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 30 ++++++++++--------- .../testing/selftests/kvm/lib/x86/processor.c | 30 +++++++++++-------- tools/testing/selftests/kvm/lib/x86/vmx.c | 6 ++-- 6 files changed, 40 insertions(+), 34 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index d3f3e455c031..8a54a1279d44 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -177,7 +177,7 @@ enum vm_guest_mode { VM_MODE_P40V48_4K, VM_MODE_P40V48_16K, VM_MODE_P40V48_64K, - VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */ VM_MODE_P47V64_4K, VM_MODE_P44V64_4K, VM_MODE_P36V48_4K, @@ -219,7 +219,7 @@ extern enum vm_guest_mode vm_mode_default; #elif defined(__x86_64__) -#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K +#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 8) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 51cd84b9ca66..57d62a425109 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1441,7 +1441,7 @@ enum pg_level { PG_LEVEL_2M, PG_LEVEL_1G, PG_LEVEL_512G, - PG_LEVEL_NUM + PG_LEVEL_256T }; #define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12) diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 54f6d17c78f7..d46e4b13b92c 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -324,7 +324,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) /* Configure base granule size */ switch (vm->mode) { - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 1a93d6361671..364efd02ad4a 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -201,7 +201,7 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", - [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", + [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", @@ -228,7 +228,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, - [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, + [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 }, [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, @@ -310,24 +310,26 @@ struct kvm_vm *____vm_create(struct vm_shape shape) case VM_MODE_P36V47_16K: vm->pgtable_levels = 3; break; - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: #ifdef __x86_64__ kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); kvm_init_vm_address_properties(vm); - /* - * Ignore KVM support for 5-level paging (vm->va_bits == 57), - * it doesn't take effect unless a CR4.LA57 is set, which it - * isn't for this mode (48-bit virtual address space). - */ - TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, - "Linear address width (%d bits) not supported", - vm->va_bits); + pr_debug("Guest physical address width detected: %d\n", vm->pa_bits); - vm->pgtable_levels = 4; - vm->va_bits = 48; + pr_debug("Guest virtual address width detected: %d\n", + vm->va_bits); + + if (vm->va_bits == 57) { + vm->pgtable_levels = 5; + } else { + TEST_ASSERT(vm->va_bits == 48, + "Unexpected guest virtual address width: %d", + vm->va_bits); + vm->pgtable_levels = 4; + } #else - TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); + TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms"); #endif break; case VM_MODE_P47V64_4K: diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 720c678187b5..40bd69b265ef 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -158,10 +158,10 @@ bool kvm_is_tdp_enabled(void) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); - /* If needed, create page map l4 table. */ + /* If needed, create the top-level page table. */ if (!vm->pgd_created) { vm->pgd = vm_alloc_page_table(vm); vm->pgd_created = true; @@ -221,8 +221,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) uint64_t *pte = &vm->pgd; int current_level; - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, - "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT((vaddr % pg_size) == 0, "Virtual address not aligned,\n" @@ -307,27 +307,28 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level) { + int va_width = 12 + (vm->pgtable_levels) * 9; uint64_t *pte = &vm->pgd; int current_level; TEST_ASSERT(!vm->arch.is_pt_protected, "Walking page tables of protected guests is impossible"); - TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels, "Invalid PG_LEVEL_* '%d'", *level); - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), "Invalid virtual address, vaddr: 0x%lx", vaddr); /* - * Based on the mode check above there are 48 bits in the vaddr, so - * shift 16 to sign extend the last bit (bit-47), + * Check that the vaddr is a sign-extended va_width value. */ - TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), - "Canonical check failed. The virtual address is invalid."); + TEST_ASSERT(vaddr == + (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))), + "Canonical check failed. The virtual address is invalid."); for (current_level = vm->pgtable_levels; current_level > PG_LEVEL_4K; @@ -520,7 +521,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { struct kvm_sregs sregs; - TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); /* Set mode specific system register values. */ vcpu_sregs_get(vcpu, &sregs); @@ -534,6 +536,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; if (kvm_cpu_has(X86_FEATURE_XSAVE)) sregs.cr4 |= X86_CR4_OSXSAVE; + if (vm->pgtable_levels == 5) + sregs.cr4 |= X86_CR4_LA57; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 9906456af11f..29b082a58daa 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -401,11 +401,11 @@ void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; uint16_t index; - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx requires 5-level paging", + "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT", nested_paddr); TEST_ASSERT((nested_paddr % page_size) == 0, "Nested physical address not on page boundary,\n" From 6a8818de21d294c1de6be9a71afe184d08350875 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 28 Oct 2025 15:30:42 -0700 Subject: [PATCH 14/16] KVM: selftests: Add a VMX test for LA57 nested state Add a selftest that verifies KVM's ability to save and restore nested state when the L1 guest is using 5-level paging and the L2 guest is using 4-level paging. Specifically, canonicality tests of the VMCS12 host-state fields should accept 57-bit virtual addresses. Signed-off-by: Jim Mattson Link: https://patch.msgid.link/20251028225827.2269128-5-jmattson@google.com [sean: rename to vmx_nested_la57_state_test to prep nested_ namespace] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile.kvm | 1 + .../kvm/x86/vmx_nested_la57_state_test.c | 132 ++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 3127983c1285..7ebf30a87a2b 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -119,6 +119,7 @@ TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test TEST_GEN_PROGS_x86 += x86/xapic_ipi_test diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c new file mode 100644 index 000000000000..cf1d2d1f2a8f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + * + * Test KVM's ability to save and restore nested state when the L1 guest + * is using 5-level paging and the L2 guest is using 4-level paging. + * + * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86: + * model canonical checks more precisely"). + */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define LA57_GS_BASE 0xff2bc0311fb00000ull + +static void l2_guest_code(void) +{ + /* + * Sync with L0 to trigger save/restore. After + * resuming, execute VMCALL to exit back to L1. + */ + GUEST_SYNC(1); + vmcall(); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + u64 guest_cr4; + vm_paddr_t pml5_pa, pml4_pa; + u64 *pml5; + u64 exit_reason; + + /* Set GS_BASE to a value that is only canonical with LA57. */ + wrmsr(MSR_GS_BASE, LA57_GS_BASE); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE); + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Set up L2 with a 4-level page table by pointing its CR3 to + * L1's first PML4 table and clearing CR4.LA57. This creates + * the CR4.LA57 mismatch that exercises the bug. + */ + pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK; + pml5 = (u64 *)pml5_pa; + pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK; + vmwrite(GUEST_CR3, pml4_pa); + + guest_cr4 = vmreadz(GUEST_CR4); + guest_cr4 &= ~X86_CR4_LA57; + vmwrite(GUEST_CR4, guest_cr4); + + GUEST_ASSERT(!vmlaunch()); + + exit_reason = vmreadz(VM_EXIT_REASON); + GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + l1_guest_code(vmx_pages); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* + * L1 needs to read its own PML5 table to set up L2. Identity map + * the PML5 table to facilitate this. + */ + virt_map(vm, vm->pgd, vm->pgd, 1); + + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + TEST_ASSERT(uc.args[1] == stage, + "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]); + if (stage == 1) { + pr_info("L2 is active; performing save/restore.\n"); + state = vcpu_save_state(vcpu); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + } + } + +done: + kvm_vm_free(vm); + return 0; +} From 1de4dc15baa1917adb3a0b369d00d818be9d9bab Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 21 Oct 2025 07:47:23 +0000 Subject: [PATCH 15/16] KVM: selftests: Stop using __virt_pg_map() directly in tests Replace __virt_pg_map() calls in tests by high-level equivalent functions, removing some loops in the process. No functional change intended. Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251021074736.1324328-11-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 6 ++---- tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index c799e0d0694f..51c070556f3e 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -362,11 +362,9 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += SZ_1G) - __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); + virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G); #else - for (i = 0; i < slot_size; i += vm->page_size) - virt_pg_map(vm, gpa + i, gpa + i); + virt_map(vm, gpa, gpa, slot_size >> vm->page_shift); #endif } diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 077cd0ec3040..a3b7ce155981 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -621,7 +621,7 @@ int main(int argc, char *argv[]) for (i = 0; i < NTEST_PAGES; i++) { pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); gpa = addr_hva2gpa(vm, pte); - __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); + virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK); data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); } From d2e50389ab44acfa05e72604d701a70b234f9938 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 21 Oct 2025 07:47:24 +0000 Subject: [PATCH 16/16] KVM: selftests: Make sure vm->vpages_mapped is always up-to-date Call paths leading to __virt_pg_map() are currently: (a) virt_pg_map() -> virt_arch_pg_map() -> __virt_pg_map() (b) virt_map_level() -> __virt_pg_map() For (a), calls to virt_pg_map() from kvm_util.c make sure they update vm->vpages_mapped, but other callers do not. Move the sparsebit_set() call into virt_pg_map() to make sure all callers are captured. For (b), call sparsebit_set_num() from virt_map_level(). It's tempting to have a single the call inside __virt_pg_map(), however: - The call path in (a) is not x86-specific, while (b) is. Moving the call into __virt_pg_map() would require doing something similar for other archs implementing virt_pg_map(). - Future changes will reusue __virt_pg_map() for nested PTEs, which should not update vm->vpages_mapped, i.e. a triple underscore version that does not update vm->vpages_mapped would need to be provided. Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251021074736.1324328-12-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 3 --- tools/testing/selftests/kvm/lib/x86/processor.c | 2 ++ 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 8a54a1279d44..d701ee557f5b 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -1230,6 +1230,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { virt_arch_pg_map(vm, vaddr, paddr); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 364efd02ad4a..33aebedfc050 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1458,8 +1458,6 @@ static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, pages--, vaddr += vm->page_size, paddr += vm->page_size) { virt_pg_map(vm, vaddr, paddr); - - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } return vaddr_start; @@ -1573,7 +1571,6 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, while (npages--) { virt_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); vaddr += page_size; paddr += page_size; diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 40bd69b265ef..36104d27f3d9 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -286,6 +286,8 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, for (i = 0; i < nr_pages; i++) { __virt_pg_map(vm, vaddr, paddr, level); + sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift, + nr_bytes / PAGE_SIZE); vaddr += pg_size; paddr += pg_size;