RCU wakeup fix for KVM s390 guest entry

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEwGNS88vfc9+v45Yq41TmuOI4ufgFAmiJzvQACgkQ41TmuOI4
 ufhSCw/7BTDYOZD24NkBEa8449tJRl4NV4nBrNQdj4E7jhvbs7+Q/zR5opZKwU/g
 vksnH9vW+YPuA01rplBWjdDk863q1oqnLvJ9lgh5KJVIvDHWf0EPyMqOnm5Y9KdP
 oALh/prtjek6B6rWA4PsC/OKXtx/w0zn4HulWr9LliUvJsmmsLkOTDvXB1bJeld9
 6Yi1AZ4MtqsxzLnKZVKFDfJKPWyJArzcIU0xyV5Rr62FtIIU/0WVGyTdwMj+DtIw
 +XaI4KSgyymyxChNn5dtV4JlNA9gi5oTggDSSglMWKw8oHeSgdvFtFD/05+txKLr
 4Veo1LAtug6iwmRBNPuPiPn1z5LBXpNMp0prwnpFsUuOaib+C1J3lkD+aCsmGPAG
 3f0Q9B6dM+m1MgabgzQHjeJVeVL6xMLxMHfVjXEVt2xq9lR4rskjvCr78aaL7UvF
 6l+wrpcdiayXkSkQKawdJUcBYS2TorQrc0Kn5XL/pD5qv5gu0BU26kvXRhe+xyyJ
 7WP8R/4uZZTLcIEZJWO9QQU3KWvnT+bKOOaPRX34SNXCxtJIhIHYw1ON6IEk/gfs
 P5WG34TmM4RtQuSLSpQpvG/K+hNU6gFl+5s+YUL4OhxDRCaDdLY/UlYwx6tXuGJ6
 D5R4F8HxLb5d32p/EOSSuJbVtYt6/hHT1m8wWACGb5SCe/gcNs4=
 =+mwu
 -----END PGP SIGNATURE-----

Merge tag 'kvm-s390-next-6.17-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

RCU wakeup fix for KVM s390 guest entry
This commit is contained in:
Paolo Bonzini 2025-07-30 13:56:09 -04:00
commit 196d9e72c4
6 changed files with 77 additions and 23 deletions

View File

@ -59,4 +59,14 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
static __always_inline bool arch_in_rcu_eqs(void)
{
if (IS_ENABLED(CONFIG_KVM))
return current->flags & PF_VCPU;
return false;
}
#define arch_in_rcu_eqs arch_in_rcu_eqs
#endif

View File

@ -716,6 +716,9 @@ extern char sie_exit;
bool kvm_s390_pv_is_protected(struct kvm *kvm);
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
extern int kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
u64 *gprs, unsigned long gasce);
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);

View File

@ -5062,6 +5062,30 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
return vcpu_post_run_handle_fault(vcpu);
}
int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
u64 *gprs, unsigned long gasce)
{
int ret;
guest_state_enter_irqoff();
/*
* The guest_state_{enter,exit}_irqoff() functions inform lockdep and
* tracing that entry to the guest will enable host IRQs, and exit from
* the guest will disable host IRQs.
*
* We must not use lockdep/tracing/RCU in this critical section, so we
* use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
*/
arch_local_irq_enable();
ret = sie64a(scb, gprs, gasce);
arch_local_irq_disable();
guest_state_exit_irqoff();
return ret;
}
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
@ -5082,20 +5106,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
kvm_vcpu_srcu_read_unlock(vcpu);
/*
* As PF_VCPU will be used in fault handler, between
* guest_enter and guest_exit should be no uaccess.
* guest_timing_enter_irqoff and guest_timing_exit_irqoff
* should be no uaccess.
*/
local_irq_disable();
guest_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
local_irq_enable();
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(sie_page->pv_grregs,
vcpu->run->s.regs.gprs,
sizeof(sie_page->pv_grregs));
}
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs,
vcpu->arch.gmap->asce);
local_irq_disable();
guest_timing_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs,
vcpu->arch.gmap->asce);
__enable_cpu_timer_accounting(vcpu);
guest_timing_exit_irqoff();
local_irq_enable();
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(vcpu->run->s.regs.gprs,
sie_page->pv_grregs,
@ -5111,10 +5142,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
}
}
local_irq_disable();
__enable_cpu_timer_accounting(vcpu);
guest_exit_irqoff();
local_irq_enable();
kvm_vcpu_srcu_read_lock(vcpu);
rc = vcpu_post_run(vcpu, exit_reason);

View File

@ -1170,10 +1170,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->arch.sie_block->fpf & FPF_BPBC)
set_thread_flag(TIF_ISOLATE_BP_GUEST);
local_irq_disable();
guest_enter_irqoff();
local_irq_enable();
/*
* Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
* and VCPU requests also hinder the vSIE from running and lead
@ -1183,15 +1179,16 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
current->thread.gmap_int_code = 0;
barrier();
if (!kvm_s390_vcpu_sie_inhibited(vcpu))
rc = sie64a(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
local_irq_disable();
guest_timing_enter_irqoff();
rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
guest_timing_exit_irqoff();
local_irq_enable();
}
barrier();
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
local_irq_disable();
guest_exit_irqoff();
local_irq_enable();
/* restore guest state for bp isolation override */
if (!guest_bp_isolation)
clear_thread_flag(TIF_ISOLATE_BP_GUEST);

View File

@ -86,6 +86,22 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs);
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {}
#endif
/**
* arch_in_rcu_eqs - Architecture specific check for RCU extended quiescent
* states.
*
* Returns: true if the CPU is potentially in an RCU EQS, false otherwise.
*
* Architectures only need to define this if threads other than the idle thread
* may have an interruptible EQS. This does not need to handle idle threads. It
* is safe to over-estimate at the cost of redundant RCU management work.
*
* Invoked from irqentry_enter()
*/
#ifndef arch_in_rcu_eqs
static __always_inline bool arch_in_rcu_eqs(void) { return false; }
#endif
/**
* enter_from_user_mode - Establish state when coming from user mode
*

View File

@ -220,7 +220,8 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
* TINY_RCU does not support EQS, so let the compiler eliminate
* this part when enabled.
*/
if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
if (!IS_ENABLED(CONFIG_TINY_RCU) &&
(is_idle_task(current) || arch_in_rcu_eqs())) {
/*
* If RCU is not watching then the same careful
* sequence vs. lockdep and tracing is required