mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 02:24:24 +02:00
KVM: s390: Remove non-atomic dat_crstep_xchg()
In practice dat_crstep_xchg() is racy and hard to use correctly. Simply remove it and replace its uses with dat_crstep_xchg_atomic(). This solves some actual races that lead to system hangs / crashes. Opportunistically fix an alignment issue in _gmap_crstep_xchg_atomic(). Fixes:589071eaaa("KVM: s390: KVM page table management functions: clear and replace") Fixes:94fd9b16cc("KVM: s390: KVM page table management functions: lifecycle management") Reviewed-by: Steffen Eiden <seiden@linux.ibm.com> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
This commit is contained in:
parent
0f54755343
commit
b827ef02f4
|
|
@ -134,32 +134,6 @@ int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newt
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dat_crstep_xchg() - Exchange a gmap CRSTE with another.
|
||||
* @crstep: Pointer to the CRST entry
|
||||
* @new: Replacement entry.
|
||||
* @gfn: The affected guest address.
|
||||
* @asce: The ASCE of the address space.
|
||||
*
|
||||
* Context: This function is assumed to be called with kvm->mmu_lock held.
|
||||
*/
|
||||
void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce)
|
||||
{
|
||||
if (crstep->h.i) {
|
||||
WRITE_ONCE(*crstep, new);
|
||||
return;
|
||||
} else if (cpu_has_edat2()) {
|
||||
crdte_crste(crstep, *crstep, new, gfn, asce);
|
||||
return;
|
||||
}
|
||||
|
||||
if (machine_has_tlb_guest())
|
||||
idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL);
|
||||
else
|
||||
idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL);
|
||||
WRITE_ONCE(*crstep, new);
|
||||
}
|
||||
|
||||
/**
|
||||
* dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another.
|
||||
* @crstep: Pointer to the CRST entry.
|
||||
|
|
@ -175,8 +149,8 @@ void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce
|
|||
*
|
||||
* Return: %true if the exchange was successful.
|
||||
*/
|
||||
bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
|
||||
union asce asce)
|
||||
bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new,
|
||||
gfn_t gfn, union asce asce)
|
||||
{
|
||||
if (old.h.i)
|
||||
return arch_try_cmpxchg((long *)crstep, &old.val, new.val);
|
||||
|
|
@ -894,7 +868,8 @@ static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct d
|
|||
|
||||
/* This table entry needs to be updated. */
|
||||
if (walk->start <= gfn && walk->end >= next) {
|
||||
dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce);
|
||||
if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce))
|
||||
return -EINVAL;
|
||||
/* A lower level table was present, needs to be freed. */
|
||||
if (!crste.h.fc && !crste.h.i) {
|
||||
if (is_pmd(crste))
|
||||
|
|
@ -1072,17 +1047,19 @@ int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
|
|||
|
||||
static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
|
||||
{
|
||||
union crste crste = READ_ONCE(*crstep);
|
||||
union crste newcrste, oldcrste;
|
||||
int *n = walk->priv;
|
||||
|
||||
if (!crste.h.fc || crste.h.i || crste.h.p)
|
||||
return 0;
|
||||
|
||||
do {
|
||||
oldcrste = READ_ONCE(*crstep);
|
||||
if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p)
|
||||
return 0;
|
||||
if (oldcrste.s.fc1.prefix_notif)
|
||||
break;
|
||||
newcrste = oldcrste;
|
||||
newcrste.s.fc1.prefix_notif = 1;
|
||||
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce));
|
||||
*n = 2;
|
||||
if (crste.s.fc1.prefix_notif)
|
||||
return 0;
|
||||
crste.s.fc1.prefix_notif = 1;
|
||||
dat_crstep_xchg(crstep, crste, gfn, walk->asce);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -938,11 +938,14 @@ static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pu
|
|||
return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
|
||||
}
|
||||
|
||||
static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce)
|
||||
static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
|
||||
{
|
||||
union crste newcrste = _CRSTE_EMPTY(crstep->h.tt);
|
||||
union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
|
||||
|
||||
dat_crstep_xchg(crstep, newcrste, gfn, asce);
|
||||
do {
|
||||
oldcrste = READ_ONCE(*crstep);
|
||||
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
|
||||
return oldcrste;
|
||||
}
|
||||
|
||||
static inline int get_level(union crste *crstep, union pte *ptep)
|
||||
|
|
|
|||
|
|
@ -1456,7 +1456,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
|
|||
static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table,
|
||||
struct guest_fault *f, bool p)
|
||||
{
|
||||
union crste newcrste;
|
||||
union crste newcrste, oldcrste;
|
||||
gfn_t gfn;
|
||||
int rc;
|
||||
|
||||
|
|
@ -1469,16 +1469,20 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
|
|||
if (rc)
|
||||
return rc;
|
||||
|
||||
newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p);
|
||||
newcrste.s.fc1.d |= host->s.fc1.d;
|
||||
newcrste.s.fc1.sd |= host->s.fc1.sd;
|
||||
newcrste.h.p &= host->h.p;
|
||||
newcrste.s.fc1.vsie_notif = 1;
|
||||
newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif;
|
||||
_gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false);
|
||||
do {
|
||||
oldcrste = READ_ONCE(*host);
|
||||
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p);
|
||||
newcrste.s.fc1.d |= oldcrste.s.fc1.d;
|
||||
newcrste.s.fc1.sd |= oldcrste.s.fc1.sd;
|
||||
newcrste.h.p &= oldcrste.h.p;
|
||||
newcrste.s.fc1.vsie_notif = 1;
|
||||
newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif;
|
||||
} while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false));
|
||||
|
||||
newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p);
|
||||
dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce);
|
||||
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
|
||||
gfn = gpa_to_gfn(raddr);
|
||||
while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -313,13 +313,16 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
|
|||
struct clear_young_pte_priv *priv = walk->priv;
|
||||
union crste crste, new;
|
||||
|
||||
crste = READ_ONCE(*crstep);
|
||||
do {
|
||||
crste = READ_ONCE(*crstep);
|
||||
|
||||
if (!crste.h.fc)
|
||||
return 0;
|
||||
if (!crste.s.fc1.y && crste.h.i)
|
||||
return 0;
|
||||
if (crste_prefix(crste) && !gmap_mkold_prefix(priv->gmap, gfn, end))
|
||||
break;
|
||||
|
||||
if (!crste.h.fc)
|
||||
return 0;
|
||||
if (!crste.s.fc1.y && crste.h.i)
|
||||
return 0;
|
||||
if (!crste_prefix(crste) || gmap_mkold_prefix(priv->gmap, gfn, end)) {
|
||||
new = crste;
|
||||
new.h.i = 1;
|
||||
new.s.fc1.y = 0;
|
||||
|
|
@ -328,8 +331,8 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
|
|||
folio_set_dirty(phys_to_folio(crste_origin_large(crste)));
|
||||
new.s.fc1.d = 0;
|
||||
new.h.p = 1;
|
||||
dat_crstep_xchg(crstep, new, gfn, walk->asce);
|
||||
}
|
||||
} while (!dat_crstep_xchg_atomic(crstep, crste, new, gfn, walk->asce));
|
||||
|
||||
priv->young = 1;
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -391,14 +394,18 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct
|
|||
{
|
||||
struct gmap_unmap_priv *priv = walk->priv;
|
||||
struct folio *folio = NULL;
|
||||
union crste old = *crstep;
|
||||
|
||||
if (crstep->h.fc) {
|
||||
if (crstep->s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
|
||||
folio = phys_to_folio(crste_origin_large(*crstep));
|
||||
gmap_crstep_xchg(priv->gmap, crstep, _CRSTE_EMPTY(crstep->h.tt), gfn);
|
||||
if (folio)
|
||||
uv_convert_from_secure_folio(folio);
|
||||
}
|
||||
if (!old.h.fc)
|
||||
return 0;
|
||||
|
||||
if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
|
||||
folio = phys_to_folio(crste_origin_large(old));
|
||||
/* No races should happen because kvm->mmu_lock is held in write mode */
|
||||
KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn),
|
||||
priv->gmap->kvm);
|
||||
if (folio)
|
||||
uv_convert_from_secure_folio(folio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -474,23 +481,24 @@ static long _crste_test_and_clear_softdirty(union crste *table, gfn_t gfn, gfn_t
|
|||
|
||||
if (fatal_signal_pending(current))
|
||||
return 1;
|
||||
crste = READ_ONCE(*table);
|
||||
if (!crste.h.fc)
|
||||
return 0;
|
||||
if (crste.h.p && !crste.s.fc1.sd)
|
||||
return 0;
|
||||
do {
|
||||
crste = READ_ONCE(*table);
|
||||
if (!crste.h.fc)
|
||||
return 0;
|
||||
if (crste.h.p && !crste.s.fc1.sd)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If this large page contains one or more prefixes of vCPUs that are
|
||||
* currently running, do not reset the protection, leave it marked as
|
||||
* dirty.
|
||||
*/
|
||||
if (!crste.s.fc1.prefix_notif || gmap_mkold_prefix(gmap, gfn, end)) {
|
||||
/*
|
||||
* If this large page contains one or more prefixes of vCPUs that are
|
||||
* currently running, do not reset the protection, leave it marked as
|
||||
* dirty.
|
||||
*/
|
||||
if (crste.s.fc1.prefix_notif && !gmap_mkold_prefix(gmap, gfn, end))
|
||||
break;
|
||||
new = crste;
|
||||
new.h.p = 1;
|
||||
new.s.fc1.sd = 0;
|
||||
gmap_crstep_xchg(gmap, table, new, gfn);
|
||||
}
|
||||
} while (!gmap_crstep_xchg_atomic(gmap, table, crste, new, gfn));
|
||||
|
||||
for ( ; gfn < end; gfn++)
|
||||
mark_page_dirty(gmap->kvm, gfn);
|
||||
|
|
@ -646,8 +654,8 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau
|
|||
static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
|
||||
gfn_t p_gfn, gfn_t c_gfn, bool force_alloc)
|
||||
{
|
||||
union crste newcrste, oldcrste;
|
||||
struct page_table *pt;
|
||||
union crste newcrste;
|
||||
union crste *crstep;
|
||||
union pte *ptep;
|
||||
int rc;
|
||||
|
|
@ -673,7 +681,11 @@ static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
|
|||
&crstep, &ptep);
|
||||
if (rc)
|
||||
return rc;
|
||||
dat_crstep_xchg(crstep, newcrste, c_gfn, gmap->asce);
|
||||
do {
|
||||
oldcrste = READ_ONCE(*crstep);
|
||||
if (oldcrste.val == newcrste.val)
|
||||
break;
|
||||
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, c_gfn, gmap->asce));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -777,8 +789,10 @@ static void gmap_ucas_unmap_one(struct gmap *gmap, gfn_t c_gfn)
|
|||
int rc;
|
||||
|
||||
rc = dat_entry_walk(NULL, c_gfn, gmap->asce, 0, TABLE_TYPE_SEGMENT, &crstep, &ptep);
|
||||
if (!rc)
|
||||
dat_crstep_xchg(crstep, _PMD_EMPTY, c_gfn, gmap->asce);
|
||||
if (rc)
|
||||
return;
|
||||
while (!dat_crstep_xchg_atomic(crstep, READ_ONCE(*crstep), _PMD_EMPTY, c_gfn, gmap->asce))
|
||||
;
|
||||
}
|
||||
|
||||
void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count)
|
||||
|
|
@ -1017,8 +1031,8 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level)
|
|||
dat_ptep_xchg(ptep, _PTE_EMPTY, r_gfn, sg->asce, uses_skeys(sg));
|
||||
return;
|
||||
}
|
||||
crste = READ_ONCE(*crstep);
|
||||
dat_crstep_clear(crstep, r_gfn, sg->asce);
|
||||
|
||||
crste = dat_crstep_clear_atomic(crstep, r_gfn, sg->asce);
|
||||
if (crste_leaf(crste) || crste.h.i)
|
||||
return;
|
||||
if (is_pmd(crste))
|
||||
|
|
|
|||
|
|
@ -194,35 +194,40 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
|
|||
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
|
||||
}
|
||||
|
||||
static inline void _gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
|
||||
gfn_t gfn, bool needs_lock)
|
||||
static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
|
||||
union crste oldcrste, union crste newcrste,
|
||||
gfn_t gfn, bool needs_lock)
|
||||
{
|
||||
unsigned long align = 8 + (is_pmd(*crstep) ? 0 : 11);
|
||||
unsigned long align = is_pmd(newcrste) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES;
|
||||
|
||||
if (KVM_BUG_ON(crstep->h.tt != oldcrste.h.tt || newcrste.h.tt != oldcrste.h.tt, gmap->kvm))
|
||||
return true;
|
||||
|
||||
lockdep_assert_held(&gmap->kvm->mmu_lock);
|
||||
if (!needs_lock)
|
||||
lockdep_assert_held(&gmap->children_lock);
|
||||
|
||||
gfn = ALIGN_DOWN(gfn, align);
|
||||
if (crste_prefix(*crstep) && (ne.h.p || ne.h.i || !crste_prefix(ne))) {
|
||||
ne.s.fc1.prefix_notif = 0;
|
||||
if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) {
|
||||
newcrste.s.fc1.prefix_notif = 0;
|
||||
gmap_unmap_prefix(gmap, gfn, gfn + align);
|
||||
}
|
||||
if (crste_leaf(*crstep) && crstep->s.fc1.vsie_notif &&
|
||||
(ne.h.p || ne.h.i || !ne.s.fc1.vsie_notif)) {
|
||||
ne.s.fc1.vsie_notif = 0;
|
||||
if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
|
||||
(newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
|
||||
newcrste.s.fc1.vsie_notif = 0;
|
||||
if (needs_lock)
|
||||
gmap_handle_vsie_unshadow_event(gmap, gfn);
|
||||
else
|
||||
_gmap_handle_vsie_unshadow_event(gmap, gfn);
|
||||
}
|
||||
dat_crstep_xchg(crstep, ne, gfn, gmap->asce);
|
||||
return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce);
|
||||
}
|
||||
|
||||
static inline void gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
|
||||
gfn_t gfn)
|
||||
static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
|
||||
union crste oldcrste, union crste newcrste,
|
||||
gfn_t gfn)
|
||||
{
|
||||
return _gmap_crstep_xchg(gmap, crstep, ne, gfn, true);
|
||||
return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user