KVM: s390: vsie: Fix unshadowing logic

In some cases (i.e. under extreme memory pressure on the host),
attempting to shadow memory will result in the same memory being
unshadowed, causing a loop.

Add a PGSTE bit to distinguish between shadowed memory and shadowed DAT
tables, fix the unshadowing logic in _gmap_ptep_xchg() to prevent
unnecessary unshadowing and perform better checks.

Also fix the unshadowing logic in _gmap_crstep_xchg_atomic() which did
not unshadow properly when the large page would become unprotected.

Opportunistically add a check in gmap_protect_rmap() to make sure it
won't be called with level == TABLE_TYPE_PAGE_TABLE.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: a2c17f9270 ("KVM: s390: New gmap code")
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
This commit is contained in:
Claudio Imbrenda 2026-05-19 17:01:12 +02:00
parent 4df4b7cdf5
commit 2d505c2906
5 changed files with 63 additions and 5 deletions

View File

@ -267,6 +267,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
/* No need to take locks as the page table is not installed yet. */
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
pgste_init.vsie_notif = old.s.fc1.vsie_notif;
pgste_init.vsie_gmem = old.s.fc1.vsie_notif;
pgste_init.pcl = uses_skeys && init.h.i;
dat_init_pgstes(pt, pgste_init.val);
} else {

View File

@ -145,7 +145,8 @@ union pgste {
unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
unsigned long : 5;
unsigned long vsie_gmem : 1; /* Contains nested guest memory */
unsigned long : 4;
unsigned long : 8;
};
struct {

View File

@ -1445,6 +1445,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
} else {
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
pgste.vsie_notif = 1;
pgste.vsie_gmem = 1;
}
pgste_set_unlock(ptep_h, pgste);
if (rc)

View File

@ -1031,7 +1031,8 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf
union pte pte;
int flags, rc;
KVM_BUG_ON(!is_shadow(sg), sg->kvm);
if (KVM_BUG_ON(!is_shadow(sg) || level <= TABLE_TYPE_PAGE_TABLE, sg->kvm))
return -EINVAL;
lockdep_assert_held(&sg->parent->children_lock);
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);

View File

@ -167,6 +167,36 @@ static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
return _gmap_unmap_prefix(gmap, gfn, end, false);
}
/**
* pte_needs_unshadow() -- Check if the pte operations triggers unshadowing.
* @oldpte: the previous value for the guest pte.
* @newpte: the new pte being set.
* @pgste: the pgste for the pte entry.
*
* If the pgste.vsie_notif bit is not set, return false: the page is not
* involved in vsie and thus should not trigger an unshadow operation.
*
* If the pgste.vsie_gmem bit is set, this pte represents shadowed guest
* memory. The access rights on g3's memory should be synchronized with g1's
* and g2's. Therefore unshadowing is triggered if the new and old pte
* differ in protection, or if the new pte is invalid.
*
* If the pgste.vsie_gmem bit is not set, this pte maps the g2 dat tables
* for g3. If the entry becomes writable or absent, it becomes impossible to
* guarantee that the shadow mapping will match g2's mapping. In that case,
* trigger an unshadow event.
*
* Return: true if an unshadow event should be triggered, otherwise false.
*/
static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste)
{
if (!pgste.vsie_notif)
return false;
if (pgste.vsie_gmem)
return (oldpte.h.p != newpte.h.p) || newpte.h.i;
return !newpte.h.p || !newpte.s.pr;
}
static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
union pgste pgste, gfn_t gfn, bool needs_lock)
{
@ -180,8 +210,9 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
pgste.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + 1);
}
if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) {
if (pte_needs_unshadow(*ptep, newpte, pgste)) {
pgste.vsie_notif = 0;
pgste.vsie_gmem = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
@ -198,6 +229,30 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
}
/**
* crste_needs_unshadow() -- Check if the crste operations triggers unshadowing.
* @oldcrste: the previous value for the crste.
* @newcrste: the new value for the crste.
*
* If the old crste did not have the vsie_notif bit set, return false: the
* page is not involved in vsie and thus should not trigger an unshadow
* operation. Conversely, if the bit is set, it can only be g3 memory, since
* dat tables are never mapped using large pages.
*
* Similar to the pgste.vsie_gmem case of pte_needs_unshadow(), if the
* protection bit is changing or the new page is invalid, trigger an
* unshadow event. Also trigger an unshadow event if the new crste does not
* have the vsie_notif bit set.
*
* Return: true if an unshadow event should be triggered, otherwise false.
*/
static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste)
{
if (!oldcrste.s.fc1.vsie_notif)
return false;
return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif;
}
static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
union crste oldcrste, union crste newcrste,
gfn_t gfn, bool needs_lock)
@ -216,8 +271,7 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio
newcrste.s.fc1.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
(newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) {
newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);