From f626279dea33ba551839f2321511ad127e5a58e8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 3 Jun 2024 00:55:29 +0100 Subject: [PATCH 1/5] KVM: selftests: remove unused struct 'memslot_antagonist_args' 'memslot_antagonist_args' is unused since the original commit f73a3446252e ("KVM: selftests: Add memslot modification stress test"). Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20240602235529.228204-1-linux@treblig.org Signed-off-by: Sean Christopherson --- .../selftests/kvm/memslot_modification_stress_test.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 05fcf902e067..49f162573126 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -53,12 +53,6 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) } } -struct memslot_antagonist_args { - struct kvm_vm *vm; - useconds_t delay; - uint64_t nr_modifications; -}; - static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, uint64_t nr_modifications) { From 17019d5195c467938b0289a2175e17eac4cc1cdf Mon Sep 17 00:00:00 2001 From: Manali Shukla Date: Wed, 5 Jun 2024 05:08:35 +0000 Subject: [PATCH 2/5] KVM: selftests: Treat AMD Family 17h+ as supporting branch insns retired When detecting AMD PMU support for encoding "branch instructions retired" as event 0xc2,0, simply check for Family 17h+ as all Zen CPUs support said encoding, and AMD will maintain the encoding for backwards compatibility on future CPUs. Note, the kernel proper also interprets Family 17h+ as Zen (see the sole caller of init_amd_zen_common()). Suggested-by: Sandipan Das Signed-off-by: Manali Shukla Link: https://lore.kernel.org/r/20240605050835.30491-1-manali.shukla@amd.com Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson --- .../kvm/x86_64/pmu_event_filter_test.c | 35 +++---------------- 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 26b3e7efe5dd..c15513cd74d1 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -32,8 +32,8 @@ struct __kvm_pmu_event_filter { /* * This event list comprises Intel's known architectural events, plus AMD's - * "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs). - * Note, AMD and Intel use the same encoding for instructions retired. + * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the + * same encoding for Instructions Retired. */ kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED); @@ -353,38 +353,13 @@ static bool use_intel_pmu(void) kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } -static bool is_zen1(uint32_t family, uint32_t model) -{ - return family == 0x17 && model <= 0x0f; -} - -static bool is_zen2(uint32_t family, uint32_t model) -{ - return family == 0x17 && model >= 0x30 && model <= 0x3f; -} - -static bool is_zen3(uint32_t family, uint32_t model) -{ - return family == 0x19 && model <= 0x0f; -} - /* - * Determining AMD support for a PMU event requires consulting the AMD - * PPR for the CPU or reference material derived therefrom. The AMD - * test code herein has been verified to work on Zen1, Zen2, and Zen3. - * - * Feel free to add more AMD CPUs that are documented to support event - * select 0xc2 umask 0 as "retired branch instructions." + * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding + * 0xc2,0 for Branch Instructions Retired. */ static bool use_amd_pmu(void) { - uint32_t family = kvm_cpu_family(); - uint32_t model = kvm_cpu_model(); - - return host_cpu_is_amd && - (is_zen1(family, model) || - is_zen2(family, model) || - is_zen3(family, model)); + return host_cpu_is_amd && kvm_cpu_family() >= 0x17; } /* From ea09ace3f8f31fa32f6674c95329a6caf5ef629d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Jun 2024 10:17:56 +0800 Subject: [PATCH 3/5] KVM: selftests: Print the seed for the guest pRNG iff it has changed Print the guest's random seed during VM creation if and only if the seed has changed since the seed was last printed. The vast majority of tests, if not all tests at this point, set the seed during test initialization and never change the seed, i.e. printing it every time a VM is created is useless noise. Snapshot and print the seed during early selftest init to play nice with tests that use the kselftests harness, at the cost of printing an unused seed for tests that change the seed during test-specific initialization, e.g. dirty_log_perf_test. The kselftests harness runs each testcase in a separate process that is forked from the original process before creating each testcase's VM, i.e. waiting until first VM creation will result in the seed being printed by each testcase despite it never changing. And long term, the hope/goal is that setting the seed will be handled by the core framework, i.e. that the dirty_log_perf_test wart will naturally go away. Reported-by: Yi Lai Reported-by: Dapeng Mi Link: https://lore.kernel.org/r/20240627021756.144815-2-dapeng1.mi@linux.intel.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index ad00e4761886..56b170b725b3 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -21,6 +21,7 @@ uint32_t guest_random_seed; struct guest_random_state guest_rng; +static uint32_t last_guest_seed; static int vcpu_mmap_sz(void); @@ -434,7 +435,10 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - pr_info("Random seed: 0x%x\n", guest_random_seed); + if (guest_random_seed != last_guest_seed) { + pr_info("Random seed: 0x%x\n", guest_random_seed); + last_guest_seed = guest_random_seed; + } guest_rng = new_guest_random_state(guest_random_seed); sync_global_to_guest(vm, guest_rng); @@ -2319,7 +2323,8 @@ void __attribute((constructor)) kvm_selftest_init(void) /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); - guest_random_seed = random(); + guest_random_seed = last_guest_seed = random(); + pr_info("Random seed: 0x%x\n", guest_random_seed); kvm_selftest_arch_init(); } From 5bb9af07d37c95ac83725ed0f92c2a4315ade0ae Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Jun 2024 17:55:56 -0700 Subject: [PATCH 4/5] KVM: selftests: Rework macros in PMU counters test to prep for multi-insn loop Tweak the macros in the PMU counters test to prepare for moving the CLFLUSH+MFENCE instructions into the loop body, to fix an issue where a single CLFUSH doesn't guarantee an LLC miss. Link: https://lore.kernel.org/r/20240628005558.3835480-2-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/x86_64/pmu_counters_test.c | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c index 96446134c00b..bb40d7c0f83e 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c @@ -7,15 +7,25 @@ #include "pmu.h" #include "processor.h" -/* Number of LOOP instructions for the guest measurement payload. */ -#define NUM_BRANCHES 10 +/* Number of iterations of the loop for the guest measurement payload. */ +#define NUM_LOOPS 10 + +/* Each iteration of the loop retires one branch instruction. */ +#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) + +/* Number of instructions in each loop. */ +#define NUM_INSNS_PER_LOOP 1 + /* * Number of "extra" instructions that will be counted, i.e. the number of - * instructions that are needed to set up the loop and then disabled the + * instructions that are needed to set up the loop and then disable the * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR. */ -#define NUM_EXTRA_INSNS 7 -#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS) +#define NUM_EXTRA_INSNS 7 + +/* Total number of instructions retired within the measured section. */ +#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) + static uint8_t kvm_pmu_version; static bool kvm_has_perf_caps; @@ -100,7 +110,7 @@ static void guest_assert_event_count(uint8_t idx, GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); break; case INTEL_ARCH_BRANCHES_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_BRANCHES); + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); break; case INTEL_ARCH_LLC_REFERENCES_INDEX: case INTEL_ARCH_LLC_MISSES_INDEX: @@ -120,7 +130,7 @@ static void guest_assert_event_count(uint8_t idx, } sanity_checks: - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); GUEST_ASSERT_EQ(_rdpmc(pmc), count); wrmsr(pmc_msr, 0xdead); @@ -147,7 +157,7 @@ do { \ __asm__ __volatile__("wrmsr\n\t" \ clflush "\n\t" \ "mfence\n\t" \ - "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \ + "1: mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ FEP "loop .\n\t" \ FEP "mov %%edi, %%ecx\n\t" \ FEP "xor %%eax, %%eax\n\t" \ @@ -500,7 +510,7 @@ static void guest_test_fixed_counters(void) wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); From 4669de42aa6c78669975d58c92433cdedeb7c2c3 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 27 Jun 2024 17:55:57 -0700 Subject: [PATCH 5/5] KVM: selftests: Increase robustness of LLC cache misses in PMU counters test Currently the PMU counters test does a single CLFLUSH{,OPT} on the loop's code, but due to speculative execution this might not cause LLC misses within the measured section. Instead of doing a single flush before the loop, do a cache flush on each iteration of the loop to confuse the prediction and ensure that at least one cache miss occurs within the measured section. Signed-off-by: Maxim Levitsky [sean: keep MFENCE, massage changelog] Link: https://lore.kernel.org/r/20240628005558.3835480-3-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/x86_64/pmu_counters_test.c | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c index bb40d7c0f83e..698cb36989db 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c @@ -13,15 +13,18 @@ /* Each iteration of the loop retires one branch instruction. */ #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) -/* Number of instructions in each loop. */ -#define NUM_INSNS_PER_LOOP 1 +/* + * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, + * 1 LOOP. + */ +#define NUM_INSNS_PER_LOOP 3 /* * Number of "extra" instructions that will be counted, i.e. the number of * instructions that are needed to set up the loop and then disable the - * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR. + * counter. 2 MOV, 2 XOR, 1 WRMSR. */ -#define NUM_EXTRA_INSNS 7 +#define NUM_EXTRA_INSNS 5 /* Total number of instructions retired within the measured section. */ #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) @@ -144,8 +147,8 @@ static void guest_assert_event_count(uint8_t idx, * before the end of the sequence. * * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the - * start of the loop to force LLC references and misses, i.e. to allow testing - * that those events actually count. + * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and + * misses, i.e. to allow testing that those events actually count. * * If forced emulation is enabled (and specified), force emulation on a subset * of the measured code to verify that KVM correctly emulates instructions and @@ -155,10 +158,11 @@ static void guest_assert_event_count(uint8_t idx, #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ do { \ __asm__ __volatile__("wrmsr\n\t" \ + " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ + "1:\n\t" \ clflush "\n\t" \ "mfence\n\t" \ - "1: mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ - FEP "loop .\n\t" \ + FEP "loop 1b\n\t" \ FEP "mov %%edi, %%ecx\n\t" \ FEP "xor %%eax, %%eax\n\t" \ FEP "xor %%edx, %%edx\n\t" \ @@ -173,9 +177,9 @@ do { \ wrmsr(pmc_msr, 0); \ \ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \ else \ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ \