mirror of
https://github.com/torvalds/linux.git
synced 2026-05-25 15:41:52 +02:00
KVM x86 posted interrupt changes for 6.16:
Refine and optimize KVM's software processing of the PIR, and ultimately share PIR harvesting code between KVM and the kernel's Posted MSI handler -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmgwmWcACgkQOlYIJqCj N/3mUw/9HN4OLRqFytu+GjEocl8I7JelJdwCsNMsUwZRnNVnYGDqsjvw8rzqeFmx RoQ8uNqMd1PqZOgAdN6suLES949ItErbnG2+UlBvZeNgR63K8fyNJaPUzSXh0Kyd vNNzGschI0txZXNEtMHcIsCuQknU/arlE6v+HOAokb1jxaIZH2h06vrBAj6pLAHO hbcZPkaQEaFoQhqCbYm015ecJQRPv3IZoW7H1cK5nC4q6QdNo3LPfGqUJwgHV3Wq hbfS+2J78nTqLhSn7HHE/y5z3R5+ZyPwFQwbqfvjjap5/DW5w8Tltg2Oif597lf2 klBukBkJyfzSdhjaPKb3V23kCNabNyyX7KUDZnW5HCiEu62Lnl0MexXCvFvSvtmy YDSsXMg3KdtlESwUOaxGjd2J81tx36L3ZvWRaopDLzA2A6KVyVQCSANGOGkKrRzq Qq3R/frzp1uUVpVDtdyDIO1AujoXkRecdOj1uAIr2XQBg8jx0kveAUyrkXFbQVjK oNbfRlOiu6/vnXkWqwZ2w/Q0kRRrK7M+vensOZlculqDqxPH+BLWB+dfPqjGikb/ cL01KPu6n/GQJpwAxIbGU4eUIQPAVOcHm3iRaIlRqEoDCs7C8fTRIyDx+cD1vW8O O9j/r05EV/Ck5XF2ks6bHIK+C3wemNrCvoeFbnO1uicqtdO+Tqw= =dU1G -----END PGP SIGNATURE----- Merge tag 'kvm-x86-pir-6.16' of https://github.com/kvm-x86/linux into HEAD KVM x86 posted interrupt changes for 6.16: Refine and optimize KVM's software processing of the PIR, and ultimately share PIR harvesting code between KVM and the kernel's Posted MSI handler
This commit is contained in:
commit
db44dcbdf8
|
|
@ -1,19 +1,24 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _X86_POSTED_INTR_H
|
||||
#define _X86_POSTED_INTR_H
|
||||
|
||||
#include <asm/cmpxchg.h>
|
||||
#include <asm/rwonce.h>
|
||||
#include <asm/irq_vectors.h>
|
||||
|
||||
#include <linux/bitmap.h>
|
||||
|
||||
#define POSTED_INTR_ON 0
|
||||
#define POSTED_INTR_SN 1
|
||||
|
||||
#define PID_TABLE_ENTRY_VALID 1
|
||||
|
||||
#define NR_PIR_VECTORS 256
|
||||
#define NR_PIR_WORDS (NR_PIR_VECTORS / BITS_PER_LONG)
|
||||
|
||||
/* Posted-Interrupt Descriptor */
|
||||
struct pi_desc {
|
||||
union {
|
||||
u32 pir[8]; /* Posted interrupt requested */
|
||||
u64 pir64[4];
|
||||
};
|
||||
unsigned long pir[NR_PIR_WORDS]; /* Posted interrupt requested */
|
||||
union {
|
||||
struct {
|
||||
u16 notifications; /* Suppress and outstanding bits */
|
||||
|
|
@ -26,6 +31,65 @@ struct pi_desc {
|
|||
u32 rsvd[6];
|
||||
} __aligned(64);
|
||||
|
||||
/*
|
||||
* De-multiplexing posted interrupts is on the performance path, the code
|
||||
* below is written to optimize the cache performance based on the following
|
||||
* considerations:
|
||||
* 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently
|
||||
* accessed by both CPU and IOMMU.
|
||||
* 2.During software processing of posted interrupts, the CPU needs to do
|
||||
* natural width read and xchg for checking and clearing posted interrupt
|
||||
* request (PIR), a 256 bit field within the PID.
|
||||
* 3.On the other side, the IOMMU does atomic swaps of the entire PID cache
|
||||
* line when posting interrupts and setting control bits.
|
||||
* 4.The CPU can access the cache line a magnitude faster than the IOMMU.
|
||||
* 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID
|
||||
* cache line. The cache line states after each operation are as follows,
|
||||
* assuming a 64-bit kernel:
|
||||
* CPU IOMMU PID Cache line state
|
||||
* ---------------------------------------------------------------
|
||||
*...read64 exclusive
|
||||
*...lock xchg64 modified
|
||||
*... post/atomic swap invalid
|
||||
*...-------------------------------------------------------------
|
||||
*
|
||||
* To reduce L1 data cache miss, it is important to avoid contention with
|
||||
* IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used
|
||||
* when processing posted interrupts in software, e.g. to dispatch interrupt
|
||||
* handlers for posted MSIs, or to move interrupts from the PIR to the vIRR
|
||||
* in KVM.
|
||||
*
|
||||
* In addition, the code is trying to keep the cache line state consistent
|
||||
* as much as possible. e.g. when making a copy and clearing the PIR
|
||||
* (assuming non-zero PIR bits are present in the entire PIR), it does:
|
||||
* read, read, read, read, xchg, xchg, xchg, xchg
|
||||
* instead of:
|
||||
* read, xchg, read, xchg, read, xchg, read, xchg
|
||||
*/
|
||||
static __always_inline bool pi_harvest_pir(unsigned long *pir,
|
||||
unsigned long *pir_vals)
|
||||
{
|
||||
unsigned long pending = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NR_PIR_WORDS; i++) {
|
||||
pir_vals[i] = READ_ONCE(pir[i]);
|
||||
pending |= pir_vals[i];
|
||||
}
|
||||
|
||||
if (!pending)
|
||||
return false;
|
||||
|
||||
for (i = 0; i < NR_PIR_WORDS; i++) {
|
||||
if (!pir_vals[i])
|
||||
continue;
|
||||
|
||||
pir_vals[i] = arch_xchg(&pir[i], 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
|
||||
{
|
||||
return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
|
||||
|
|
@ -43,12 +107,12 @@ static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
|
|||
|
||||
static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
|
||||
{
|
||||
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
|
||||
return test_and_set_bit(vector, pi_desc->pir);
|
||||
}
|
||||
|
||||
static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
|
||||
{
|
||||
return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
|
||||
return bitmap_empty(pi_desc->pir, NR_VECTORS);
|
||||
}
|
||||
|
||||
static inline void pi_set_sn(struct pi_desc *pi_desc)
|
||||
|
|
@ -110,7 +174,7 @@ static inline bool pi_pending_this_cpu(unsigned int vector)
|
|||
if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR))
|
||||
return false;
|
||||
|
||||
return test_bit(vector, (unsigned long *)pid->pir);
|
||||
return test_bit(vector, pid->pir);
|
||||
}
|
||||
|
||||
extern void intel_posted_msi_init(void);
|
||||
|
|
|
|||
|
|
@ -380,61 +380,18 @@ void intel_posted_msi_init(void)
|
|||
this_cpu_write(posted_msi_pi_desc.ndst, destination);
|
||||
}
|
||||
|
||||
/*
|
||||
* De-multiplexing posted interrupts is on the performance path, the code
|
||||
* below is written to optimize the cache performance based on the following
|
||||
* considerations:
|
||||
* 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently
|
||||
* accessed by both CPU and IOMMU.
|
||||
* 2.During posted MSI processing, the CPU needs to do 64-bit read and xchg
|
||||
* for checking and clearing posted interrupt request (PIR), a 256 bit field
|
||||
* within the PID.
|
||||
* 3.On the other side, the IOMMU does atomic swaps of the entire PID cache
|
||||
* line when posting interrupts and setting control bits.
|
||||
* 4.The CPU can access the cache line a magnitude faster than the IOMMU.
|
||||
* 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID
|
||||
* cache line. The cache line states after each operation are as follows:
|
||||
* CPU IOMMU PID Cache line state
|
||||
* ---------------------------------------------------------------
|
||||
*...read64 exclusive
|
||||
*...lock xchg64 modified
|
||||
*... post/atomic swap invalid
|
||||
*...-------------------------------------------------------------
|
||||
*
|
||||
* To reduce L1 data cache miss, it is important to avoid contention with
|
||||
* IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used
|
||||
* to dispatch interrupt handlers.
|
||||
*
|
||||
* In addition, the code is trying to keep the cache line state consistent
|
||||
* as much as possible. e.g. when making a copy and clearing the PIR
|
||||
* (assuming non-zero PIR bits are present in the entire PIR), it does:
|
||||
* read, read, read, read, xchg, xchg, xchg, xchg
|
||||
* instead of:
|
||||
* read, xchg, read, xchg, read, xchg, read, xchg
|
||||
*/
|
||||
static __always_inline bool handle_pending_pir(u64 *pir, struct pt_regs *regs)
|
||||
static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
|
||||
{
|
||||
int i, vec = FIRST_EXTERNAL_VECTOR;
|
||||
unsigned long pir_copy[4];
|
||||
bool handled = false;
|
||||
unsigned long pir_copy[NR_PIR_WORDS];
|
||||
int vec = FIRST_EXTERNAL_VECTOR;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
pir_copy[i] = pir[i];
|
||||
if (!pi_harvest_pir(pir, pir_copy))
|
||||
return false;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (!pir_copy[i])
|
||||
continue;
|
||||
for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR)
|
||||
call_irq_handler(vec, regs);
|
||||
|
||||
pir_copy[i] = arch_xchg(&pir[i], 0);
|
||||
handled = true;
|
||||
}
|
||||
|
||||
if (handled) {
|
||||
for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR)
|
||||
call_irq_handler(vec, regs);
|
||||
}
|
||||
|
||||
return handled;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -464,7 +421,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
|
|||
* MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here.
|
||||
*/
|
||||
while (++i < MAX_POSTED_MSI_COALESCING_LOOP) {
|
||||
if (!handle_pending_pir(pid->pir64, regs))
|
||||
if (!handle_pending_pir(pid->pir, regs))
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -479,7 +436,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
|
|||
* process PIR bits one last time such that handling the new interrupts
|
||||
* are not delayed until the next IRQ.
|
||||
*/
|
||||
handle_pending_pir(pid->pir64, regs);
|
||||
handle_pending_pir(pid->pir, regs);
|
||||
|
||||
apic_eoi();
|
||||
irq_exit();
|
||||
|
|
|
|||
|
|
@ -655,27 +655,29 @@ static u8 count_vectors(void *bitmap)
|
|||
return count;
|
||||
}
|
||||
|
||||
bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
|
||||
bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr)
|
||||
{
|
||||
unsigned long pir_vals[NR_PIR_WORDS];
|
||||
u32 *__pir = (void *)pir_vals;
|
||||
u32 i, vec;
|
||||
u32 pir_val, irr_val, prev_irr_val;
|
||||
u32 irr_val, prev_irr_val;
|
||||
int max_updated_irr;
|
||||
|
||||
max_updated_irr = -1;
|
||||
*max_irr = -1;
|
||||
|
||||
if (!pi_harvest_pir(pir, pir_vals))
|
||||
return false;
|
||||
|
||||
for (i = vec = 0; i <= 7; i++, vec += 32) {
|
||||
u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
|
||||
|
||||
irr_val = *p_irr;
|
||||
pir_val = READ_ONCE(pir[i]);
|
||||
|
||||
if (pir_val) {
|
||||
pir_val = xchg(&pir[i], 0);
|
||||
irr_val = READ_ONCE(*p_irr);
|
||||
|
||||
if (__pir[i]) {
|
||||
prev_irr_val = irr_val;
|
||||
do {
|
||||
irr_val = prev_irr_val | pir_val;
|
||||
irr_val = prev_irr_val | __pir[i];
|
||||
} while (prev_irr_val != irr_val &&
|
||||
!try_cmpxchg(p_irr, &prev_irr_val, irr_val));
|
||||
|
||||
|
|
@ -691,7 +693,7 @@ bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
|
||||
|
||||
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
|
||||
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
|
||||
|
|
|
|||
|
|
@ -103,8 +103,8 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
|||
int shorthand, unsigned int dest, int dest_mode);
|
||||
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
|
||||
void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec);
|
||||
bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr);
|
||||
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr);
|
||||
bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr);
|
||||
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr);
|
||||
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
|
||||
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
|
||||
struct dest_map *dest_map);
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
|
|||
{
|
||||
int vec;
|
||||
|
||||
vec = find_last_bit((unsigned long *)pi_desc->pir, 256);
|
||||
vec = find_last_bit(pi_desc->pir, 256);
|
||||
return vec < 256 ? vec : -1;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user