Merge branch 'kvm-arm64/misc' into kvmarm/next

* kvm-arm64/misc:
  : Miscellaneous fixes/cleanups for KVM/arm64
  :
  :  - Fix for need_resched warnings on non-preemptible kernels when
  :    tearing down a VM's stage-2
  :
  :  - Improvements to KVM struct allocation, getting rid of pointless
  :    __GFP_HIGHMEM and switching to kvzalloc()
  :
  :  - SYNC ITS configuration before injecting LPIs in vgic_lpi_stress
  :    selftest
  KVM: arm64: Reschedule as needed when destroying the stage-2 page-tables
  KVM: arm64: Split kvm_pgtable_stage2_destroy()
  KVM: arm64: Only drop references on empty tables in stage2_free_walker
  KVM: selftests: SYNC after guest ITS setup in vgic_lpi_stress
  KVM: selftests: Assert GICR_TYPER.Processor_Number matches selftest CPU number
  KVM: arm64: Use kvzalloc() for kvm struct allocation
  KVM: arm64: Drop useless __GFP_HIGHMEM from kvm struct allocation

Signed-off-by: Oliver Upton <oupton@kernel.org>
This commit is contained in:
Oliver Upton 2025-12-01 00:47:12 -08:00
commit 404c2027d5
10 changed files with 153 additions and 19 deletions

View File

@ -355,6 +355,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return pteref;
}
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
{
return pteref;
}
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
/*
@ -384,6 +389,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
}
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
{
return rcu_dereference_raw(pteref);
}
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
if (walker->flags & KVM_PGTABLE_WALK_SHARED)
@ -551,6 +561,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2
*/
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
* kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address at which to place the mapping.
* @size: Size of the mapping.
*
* The page-table is assumed to be unreachable by any hardware walkers prior
* to freeing and therefore no TLB invalidation is performed.
*/
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
u64 addr, u64 size);
/**
* kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
*
* It is assumed that the rest of the page-table is freed before this operation.
*/
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
/**
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
* @mm_ops: Memory management callbacks.

View File

@ -180,7 +180,9 @@ struct pkvm_mapping {
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops);
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
u64 addr, u64 size);
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot, void *mc,
enum kvm_pgtable_walk_flags flags);

View File

@ -440,7 +440,7 @@ struct kvm *kvm_arch_alloc_vm(void)
if (!has_vhe())
return kzalloc(sz, GFP_KERNEL_ACCOUNT);
return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO);
return kvzalloc(sz, GFP_KERNEL_ACCOUNT);
}
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)

View File

@ -1535,37 +1535,80 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
}
static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
static int stage2_free_leaf(const struct kvm_pgtable_visit_ctx *ctx)
{
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
if (!stage2_pte_is_counted(ctx->old))
return 0;
mm_ops->put_page(ctx->ptep);
if (kvm_pte_table(ctx->old, ctx->level))
mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
return 0;
}
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
static int stage2_free_table_post(const struct kvm_pgtable_visit_ctx *ctx)
{
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops);
if (mm_ops->page_count(childp) != 1)
return 0;
/*
* Drop references and clear the now stale PTE to avoid rewalking the
* freed page table.
*/
mm_ops->put_page(ctx->ptep);
mm_ops->put_page(childp);
kvm_clear_pte(ctx->ptep);
return 0;
}
static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
if (!stage2_pte_is_counted(ctx->old))
return 0;
switch (visit) {
case KVM_PGTABLE_WALK_LEAF:
return stage2_free_leaf(ctx);
case KVM_PGTABLE_WALK_TABLE_POST:
return stage2_free_table_post(ctx);
default:
return -EINVAL;
}
}
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
u64 addr, u64 size)
{
size_t pgd_sz;
struct kvm_pgtable_walker walker = {
.cb = stage2_free_walker,
.flags = KVM_PGTABLE_WALK_LEAF |
KVM_PGTABLE_WALK_TABLE_POST,
};
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
}
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
{
size_t pgd_sz;
pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
/*
* Since the pgtable is unlinked at this point, and not shared with
* other walkers, safely deference pgd with kvm_dereference_pteref_raw()
*/
pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz);
pgt->pgd = NULL;
}
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
{
kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits));
kvm_pgtable_stage2_destroy_pgd(pgt);
}
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;

View File

@ -904,6 +904,38 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
return 0;
}
/*
* Assume that @pgt is valid and unlinked from the KVM MMU to free the
* page-table without taking the kvm_mmu_lock and without performing any
* TLB invalidations.
*
* Also, the range of addresses can be large enough to cause need_resched
* warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke
* cond_resched() periodically to prevent hogging the CPU for a long time
* and schedule something else, if required.
*/
static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr,
phys_addr_t end)
{
u64 next;
do {
next = stage2_range_addr_end(addr, end);
KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr,
next - addr);
if (next != end)
cond_resched();
} while (addr = next, addr != end);
}
static void kvm_stage2_destroy(struct kvm_pgtable *pgt)
{
unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr);
stage2_destroy_range(pgt, 0, BIT(ia_bits));
KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt);
}
/**
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure
@ -980,7 +1012,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
return 0;
out_destroy_pgtable:
KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
kvm_stage2_destroy(pgt);
out_free_pgtable:
kfree(pgt);
return err;
@ -1081,7 +1113,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
write_unlock(&kvm->mmu_lock);
if (pgt) {
KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
kvm_stage2_destroy(pgt);
kfree(pgt);
}
}

View File

@ -344,9 +344,16 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
return 0;
}
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
u64 addr, u64 size)
{
__pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
__pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
}
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
{
/* Expected to be called after all pKVM mappings have been released. */
WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
}
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,

View File

@ -118,6 +118,10 @@ static void guest_setup_gic(void)
guest_setup_its_mappings();
guest_invalidate_all_rdists();
/* SYNC to ensure ITS setup is complete */
for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
}
static void guest_code(size_t nr_lpis)

View File

@ -15,5 +15,6 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
u32 collection_id, u32 intid);
void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id);
#endif // __SELFTESTS_GIC_V3_ITS_H__

View File

@ -298,12 +298,17 @@ static void gicv3_cpu_init(unsigned int cpu)
volatile void *sgi_base;
unsigned int i;
volatile void *redist_base_cpu;
u64 typer;
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
redist_base_cpu = gicr_base_cpu(cpu);
sgi_base = sgi_base_from_redist(redist_base_cpu);
/* Verify assumption that GICR_TYPER.Processor_number == cpu */
typer = readq_relaxed(redist_base_cpu + GICR_TYPER);
GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu);
gicv3_enable_redist(redist_base_cpu);
/*

View File

@ -246,3 +246,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
its_send_cmd(cmdq_base, &cmd);
}
void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id)
{
struct its_cmd_block cmd = {};
its_encode_cmd(&cmd, GITS_CMD_SYNC);
its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
its_send_cmd(cmdq_base, &cmd);
}