diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 34c9950884d5..9453321ef8c6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -555,8 +555,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_destroy_mpidr_data(vcpu->kvm); err = kvm_vgic_vcpu_init(vcpu); - if (err) + if (err) { + kvm_vgic_vcpu_destroy(vcpu); return err; + } err = kvm_share_hyp(vcpu, vcpu + 1); if (err) diff --git a/arch/arm64/kvm/hyp/nvhe/trace.c b/arch/arm64/kvm/hyp/nvhe/trace.c index a6ca27b18e15..e7e150ab265f 100644 --- a/arch/arm64/kvm/hyp/nvhe/trace.c +++ b/arch/arm64/kvm/hyp/nvhe/trace.c @@ -164,13 +164,16 @@ static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer, return ret; } -static bool hyp_trace_desc_validate(struct hyp_trace_desc *desc, size_t desc_size) +static bool hyp_trace_desc_is_valid(struct hyp_trace_desc *desc, size_t desc_size) { struct ring_buffer_desc *rb_desc; unsigned int cpu; size_t nr_bpages; void *desc_end; + if (!is_protected_kvm_enabled()) + return true; + /* * Both desc_size and bpages_backing_size are untrusted host-provided * values. We rely on __pkvm_host_donate_hyp() to enforce their validity. @@ -212,8 +215,10 @@ int __tracing_load(unsigned long desc_hva, size_t desc_size) if (ret) return ret; - if (!hyp_trace_desc_validate(desc, desc_size)) + if (!hyp_trace_desc_is_valid(desc, desc_size)) { + ret = -EINVAL; goto err_release_desc; + } hyp_spin_lock(&trace_buffer.lock); diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 2ea9f1c7ebcd..1d7e5d560af4 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2307,6 +2307,10 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id, /* dte entry is valid */ offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT; + /* Mimic the MAPD behaviour and reject invalid EID bits. */ + if (num_eventid_bits > VITS_TYPER_IDBITS) + return -EINVAL; + if (!vgic_its_check_id(its, baser, id, NULL)) return -EINVAL; diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c index 4d89b94128ae..f09f9251d1f0 100644 --- a/arch/riscv/kvm/vcpu_insn.c +++ b/arch/riscv/kvm/vcpu_insn.c @@ -415,7 +415,6 @@ int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run, shift = 8 * (sizeof(ulong) - len); } else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) { len = 1; - shift = 8 * (sizeof(ulong) - len); #ifdef CONFIG_64BIT } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) { len = 8; @@ -649,22 +648,22 @@ int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) case 1: data8 = *((u8 *)run->mmio.data); SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data8 << shift >> shift); + (long)((ulong)data8 << shift) >> shift); break; case 2: data16 = *((u16 *)run->mmio.data); SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data16 << shift >> shift); + (long)((ulong)data16 << shift) >> shift); break; case 4: data32 = *((u32 *)run->mmio.data); SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data32 << shift >> shift); + (long)((ulong)data32 << shift) >> shift); break; case 8: data64 = *((u64 *)run->mmio.data); SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data64 << shift >> shift); + (long)((ulong)data64 << shift) >> shift); break; default: return -EOPNOTSUPP; diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index a935ed96bc17..bb46dcbfb24d 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -453,8 +453,10 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s } kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); - if (!kvpmu->sdata) - return -ENOMEM; + if (!kvpmu->sdata) { + sbiret = SBI_ERR_FAILURE; + goto out; + } /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { @@ -499,8 +501,10 @@ int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low } einfo = kzalloc(shmem_size, GFP_KERNEL); - if (!einfo) - return -ENOMEM; + if (!einfo) { + ret = SBI_ERR_FAILURE; + goto out; + } ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); if (ret) { diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index 3b834709b429..60e50296a008 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -46,7 +46,7 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu) gfn = shmem >> PAGE_SHIFT; hva = kvm_vcpu_gfn_to_hva(vcpu, gfn); - if (WARN_ON(kvm_is_error_hva(hva))) { + if (kvm_is_error_hva(hva)) { vcpu->arch.sta.shmem = INVALID_GPA; return; } diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c index 188d5ea5b3b8..c9c323d4577a 100644 --- a/arch/riscv/kvm/vcpu_sbi_v01.c +++ b/arch/riscv/kvm/vcpu_sbi_v01.c @@ -55,6 +55,8 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, for_each_set_bit(i, &hmask, BITS_PER_LONG) { rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); + if (!rvcpu) + continue; ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT); if (ret < 0) break; diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c index 7b8d70fe406d..4a41c0247ffa 100644 --- a/arch/s390/kvm/dat.c +++ b/arch/s390/kvm/dat.c @@ -267,6 +267,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g /* No need to take locks as the page table is not installed yet. */ pgste_init.prefix_notif = old.s.fc1.prefix_notif; pgste_init.vsie_notif = old.s.fc1.vsie_notif; + pgste_init.vsie_gmem = old.s.fc1.vsie_notif; pgste_init.pcl = uses_skeys && init.h.i; dat_init_pgstes(pt, pgste_init.val); } else { diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h index 8f8278c44879..873e13ac5a27 100644 --- a/arch/s390/kvm/dat.h +++ b/arch/s390/kvm/dat.h @@ -145,7 +145,8 @@ union pgste { unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */ unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long vsie_notif : 1; /* Referenced in a shadow table */ - unsigned long : 5; + unsigned long vsie_gmem : 1; /* Contains nested guest memory */ + unsigned long : 4; unsigned long : 8; }; struct { diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index b07accd19618..4f8d5592c9a9 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1445,6 +1445,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union } else { pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false); pgste.vsie_notif = 1; + pgste.vsie_gmem = 1; } pgste_set_unlock(ptep_h, pgste); if (rc) diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 3c26e35af0ef..957126ab991c 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -125,7 +125,7 @@ struct gmap *gmap_new_child(struct gmap *parent, gfn_t limit) int gmap_set_limit(struct gmap *gmap, gfn_t limit) { - struct kvm_s390_mmu_cache *mc; + struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL; int rc, type; type = gmap_limit_to_type(limit); @@ -142,7 +142,6 @@ int gmap_set_limit(struct gmap *gmap, gfn_t limit) rc = dat_set_asce_limit(mc, &gmap->asce, type); } while (rc == -ENOMEM); - kvm_s390_free_mmu_cache(mc); return 0; } @@ -822,8 +821,8 @@ int gmap_ucas_translate(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gpa_t int gmap_ucas_map(struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, unsigned long count) { - struct kvm_s390_mmu_cache *mc; - int rc; + struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL; + int rc = 0; mc = kvm_s390_new_mmu_cache(); if (!mc) @@ -1026,13 +1025,15 @@ int gmap_insert_rmap(struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, int level) int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, kvm_pfn_t pfn, int level, bool wr) { + unsigned long bitmask; union crste *crstep; union pgste pgste; union pte *ptep; union pte pte; int flags, rc; - KVM_BUG_ON(!is_shadow(sg), sg->kvm); + if (KVM_BUG_ON(!is_shadow(sg) || level <= TABLE_TYPE_PAGE_TABLE, sg->kvm)) + return -EINVAL; lockdep_assert_held(&sg->parent->children_lock); flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0); @@ -1041,8 +1042,9 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf if (rc) return rc; if (level <= TABLE_TYPE_REGION1) { + bitmask = -1UL << (8 + 11 * level); scoped_guard(spinlock, &sg->host_to_rmap_lock) - rc = gmap_insert_rmap(sg, p_gfn, r_gfn, level); + rc = gmap_insert_rmap(sg, p_gfn, r_gfn & bitmask, level); } if (rc) return rc; @@ -1143,8 +1145,10 @@ void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn) } scoped_guard(spinlock, &sg->host_to_rmap_lock) head = radix_tree_delete(&sg->host_to_rmap, gfn); - gmap_for_each_rmap_safe(rmap, rnext, head) + gmap_for_each_rmap_safe(rmap, rnext, head) { gmap_unshadow_level(sg, rmap->r_gfn, rmap->level); + kfree(rmap); + } } } diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index 96ee1395a592..742e42a31744 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -167,6 +167,36 @@ static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end) return _gmap_unmap_prefix(gmap, gfn, end, false); } +/** + * pte_needs_unshadow() -- Check if the pte operations triggers unshadowing. + * @oldpte: the previous value for the guest pte. + * @newpte: the new pte being set. + * @pgste: the pgste for the pte entry. + * + * If the pgste.vsie_notif bit is not set, return false: the page is not + * involved in vsie and thus should not trigger an unshadow operation. + * + * If the pgste.vsie_gmem bit is set, this pte represents shadowed guest + * memory. The access rights on g3's memory should be synchronized with g1's + * and g2's. Therefore unshadowing is triggered if the new and old pte + * differ in protection, or if the new pte is invalid. + * + * If the pgste.vsie_gmem bit is not set, this pte maps the g2 dat tables + * for g3. If the entry becomes writable or absent, it becomes impossible to + * guarantee that the shadow mapping will match g2's mapping. In that case, + * trigger an unshadow event. + * + * Return: true if an unshadow event should be triggered, otherwise false. + */ +static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste) +{ + if (!pgste.vsie_notif) + return false; + if (pgste.vsie_gmem) + return (oldpte.h.p != newpte.h.p) || newpte.h.i; + return !newpte.h.p || !newpte.s.pr; +} + static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte, union pgste pgste, gfn_t gfn, bool needs_lock) { @@ -180,8 +210,9 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un pgste.prefix_notif = 0; gmap_unmap_prefix(gmap, gfn, gfn + 1); } - if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) { + if (pte_needs_unshadow(*ptep, newpte, pgste)) { pgste.vsie_notif = 0; + pgste.vsie_gmem = 0; if (needs_lock) gmap_handle_vsie_unshadow_event(gmap, gfn); else @@ -189,6 +220,7 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un } if (!ptep->s.d && newpte.s.d && !newpte.s.s) SetPageDirty(pfn_to_page(newpte.h.pfra)); + pgste.zero = 0; return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap)); } @@ -198,6 +230,30 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true); } +/** + * crste_needs_unshadow() -- Check if the crste operations triggers unshadowing. + * @oldcrste: the previous value for the crste. + * @newcrste: the new value for the crste. + * + * If the old crste did not have the vsie_notif bit set, return false: the + * page is not involved in vsie and thus should not trigger an unshadow + * operation. Conversely, if the bit is set, it can only be g3 memory, since + * dat tables are never mapped using large pages. + * + * Similar to the pgste.vsie_gmem case of pte_needs_unshadow(), if the + * protection bit is changing or the new page is invalid, trigger an + * unshadow event. Also trigger an unshadow event if the new crste does not + * have the vsie_notif bit set. + * + * Return: true if an unshadow event should be triggered, otherwise false. + */ +static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste) +{ + if (!oldcrste.s.fc1.vsie_notif) + return false; + return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif; +} + static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep, union crste oldcrste, union crste newcrste, gfn_t gfn, bool needs_lock) @@ -216,8 +272,7 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio newcrste.s.fc1.prefix_notif = 0; gmap_unmap_prefix(gmap, gfn, gfn + align); } - if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif && - (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) { + if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) { newcrste.s.fc1.vsie_notif = 0; if (needs_lock) gmap_handle_vsie_unshadow_event(gmap, gfn); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index adf211860949..993b551180fe 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -1300,12 +1300,14 @@ bool __init avic_hardware_setup(void) } /* - * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2) - * due to erratum 1235, which results in missed VM-Exits on the sender - * and thus missed wake events for blocking vCPUs due to the CPU - * failing to see a software update to clear IsRunning. + * Disable IPI virtualization for AMD Family 17h (Zen1 and Zen2) and + * Hygon Family 18h (derived from AMD Zen1) CPUs due to erratum 1235, + * which results in missed VM-Exits on the sender and thus missed wake + * events for blocking vCPUs due to the CPU failing to see a software + * update to clear IsRunning. */ - enable_ipiv = enable_ipiv && boot_cpu_data.x86 != 0x17; + if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) + enable_ipiv = false; amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a1b63c63d1a..c1a72d749084 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4876,7 +4876,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = tdp_enabled; break; case KVM_CAP_X86_APIC_BUS_CYCLES_NS: - r = APIC_BUS_CYCLE_NS_DEFAULT; + r = kvm ? kvm->arch.apic_bus_cycle_ns : APIC_BUS_CYCLE_NS_DEFAULT; break; case KVM_CAP_EXIT_HYPERCALL: r = KVM_EXIT_HYPERCALL_VALID_MASK; diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index b689c4df4a01..1924a9895834 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -7,7 +7,7 @@ #include "test_util.h" -#include +#include #include #include "kvm_util.h" diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c index 404f0028e110..0c84c27ea584 100644 --- a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c @@ -137,6 +137,10 @@ static void run_apic_bus_clock_test(u64 apic_hz, u64 delay_ms, vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, NSEC_PER_SEC / apic_hz); + TEST_ASSERT_EQ(kvm_check_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS), 1); + TEST_ASSERT_EQ(vm_check_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS), + NSEC_PER_SEC / apic_hz); + vcpu = vm_vcpu_add(vm, 0, apic_guest_code); vcpu_args_set(vcpu, 2, apic_hz, delay_ms);