x86/pvops/msr: Refactor pv_cpu_ops.write_msr{,_safe}()

An MSR value is represented as a 64-bit unsigned integer, with existing
MSR instructions storing it in EDX:EAX as two 32-bit segments.

The new immediate form MSR instructions, however, utilize a 64-bit
general-purpose register to store the MSR value.  To unify the usage of
all MSR instructions, let the default MSR access APIs accept an MSR
value as a single 64-bit argument instead of two 32-bit segments.

The dual 32-bit APIs are still available as convenient wrappers over the
APIs that handle an MSR value as a single 64-bit argument.

The following illustrates the updated derivation of the MSR write APIs:

                 __wrmsrq(u32 msr, u64 val)
                   /                  \
                  /                    \
           native_wrmsrq(msr, val)    native_wrmsr(msr, low, high)
                 |
                 |
           native_write_msr(msr, val)
                /          \
               /            \
       wrmsrq(msr, val)    wrmsr(msr, low, high)

When CONFIG_PARAVIRT is enabled, wrmsrq() and wrmsr() are defined on top
of paravirt_write_msr():

            paravirt_write_msr(u32 msr, u64 val)
               /             \
              /               \
          wrmsrq(msr, val)    wrmsr(msr, low, high)

paravirt_write_msr() invokes cpu.write_msr(msr, val), an indirect layer
of pv_ops MSR write call:

    If on native:

            cpu.write_msr = native_write_msr

    If on Xen:

            cpu.write_msr = xen_write_msr

Therefore, refactor pv_cpu_ops.write_msr{_safe}() to accept an MSR value
in a single u64 argument, replacing the current dual u32 arguments.

No functional change intended.

Signed-off-by: Xin Li (Intel) <xin@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Juergen Gross <jgross@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Link: https://lore.kernel.org/r/20250427092027.1598740-14-xin@zytor.com
This commit is contained in:
Xin Li (Intel) 2025-04-27 02:20:25 -07:00 committed by Ingo Molnar
parent 2b7e25301c
commit 0c2678efed
6 changed files with 46 additions and 67 deletions

View File

@ -75,12 +75,12 @@ static __always_inline u64 __rdmsr(u32 msr)
return EAX_EDX_VAL(val, low, high);
}
static __always_inline void __wrmsr(u32 msr, u32 low, u32 high)
static __always_inline void __wrmsrq(u32 msr, u64 val)
{
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
: : "c" (msr), "a"(low), "d" (high) : "memory");
: : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory");
}
#define native_rdmsr(msr, val1, val2) \
@ -96,11 +96,10 @@ static __always_inline u64 native_rdmsrq(u32 msr)
}
#define native_wrmsr(msr, low, high) \
__wrmsr(msr, low, high)
__wrmsrq((msr), (u64)(high) << 32 | (low))
#define native_wrmsrq(msr, val) \
__wrmsr((msr), (u32)((u64)(val)), \
(u32)((u64)(val) >> 32))
__wrmsrq((msr), (val))
static inline u64 native_read_msr(u32 msr)
{
@ -129,11 +128,8 @@ static inline u64 native_read_msr_safe(u32 msr, int *err)
}
/* Can be uninlined because referenced by paravirt */
static inline void notrace
native_write_msr(u32 msr, u32 low, u32 high)
static inline void notrace native_write_msr(u32 msr, u64 val)
{
u64 val = (u64)high << 32 | low;
native_wrmsrq(msr, val);
if (tracepoint_enabled(write_msr))
@ -141,8 +137,7 @@ native_write_msr(u32 msr, u32 low, u32 high)
}
/* Can be uninlined because referenced by paravirt */
static inline int notrace
native_write_msr_safe(u32 msr, u32 low, u32 high)
static inline int notrace native_write_msr_safe(u32 msr, u64 val)
{
int err;
@ -150,10 +145,10 @@ native_write_msr_safe(u32 msr, u32 low, u32 high)
"2:\n\t"
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
: [err] "=a" (err)
: "c" (msr), "0" (low), "d" (high)
: "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32))
: "memory");
if (tracepoint_enabled(write_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), err);
do_trace_write_msr(msr, val, err);
return err;
}
@ -189,7 +184,7 @@ do { \
static inline void wrmsr(u32 msr, u32 low, u32 high)
{
native_write_msr(msr, low, high);
native_write_msr(msr, (u64)high << 32 | low);
}
#define rdmsrq(msr, val) \
@ -197,13 +192,13 @@ static inline void wrmsr(u32 msr, u32 low, u32 high)
static inline void wrmsrq(u32 msr, u64 val)
{
native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
native_write_msr(msr, val);
}
/* wrmsr with exception handling */
static inline int wrmsr_safe(u32 msr, u32 low, u32 high)
static inline int wrmsrq_safe(u32 msr, u64 val)
{
return native_write_msr_safe(msr, low, high);
return native_write_msr_safe(msr, val);
}
/* rdmsr with exception handling */
@ -247,11 +242,11 @@ static __always_inline void wrmsrns(u32 msr, u64 val)
}
/*
* 64-bit version of wrmsr_safe():
* Dual u32 version of wrmsrq_safe():
*/
static inline int wrmsrq_safe(u32 msr, u64 val)
static inline int wrmsr_safe(u32 msr, u32 low, u32 high)
{
return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
return wrmsrq_safe(msr, (u64)high << 32 | low);
}
struct msr __percpu *msrs_alloc(void);

View File

@ -180,10 +180,9 @@ static inline u64 paravirt_read_msr(unsigned msr)
return PVOP_CALL1(u64, cpu.read_msr, msr);
}
static inline void paravirt_write_msr(unsigned msr,
unsigned low, unsigned high)
static inline void paravirt_write_msr(u32 msr, u64 val)
{
PVOP_VCALL3(cpu.write_msr, msr, low, high);
PVOP_VCALL2(cpu.write_msr, msr, val);
}
static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
@ -191,10 +190,9 @@ static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err);
}
static inline int paravirt_write_msr_safe(unsigned msr,
unsigned low, unsigned high)
static inline int paravirt_write_msr_safe(u32 msr, u64 val)
{
return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high);
return PVOP_CALL2(int, cpu.write_msr_safe, msr, val);
}
#define rdmsr(msr, val1, val2) \
@ -204,22 +202,25 @@ do { \
val2 = _l >> 32; \
} while (0)
#define wrmsr(msr, val1, val2) \
do { \
paravirt_write_msr(msr, val1, val2); \
} while (0)
static __always_inline void wrmsr(u32 msr, u32 low, u32 high)
{
paravirt_write_msr(msr, (u64)high << 32 | low);
}
#define rdmsrq(msr, val) \
do { \
val = paravirt_read_msr(msr); \
} while (0)
static inline void wrmsrq(unsigned msr, u64 val)
static inline void wrmsrq(u32 msr, u64 val)
{
wrmsr(msr, (u32)val, (u32)(val>>32));
paravirt_write_msr(msr, val);
}
#define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b)
static inline int wrmsrq_safe(u32 msr, u64 val)
{
return paravirt_write_msr_safe(msr, val);
}
/* rdmsr with exception handling */
#define rdmsr_safe(msr, a, b) \

View File

@ -92,14 +92,14 @@ struct pv_cpu_ops {
/* Unsafe MSR operations. These will warn or panic on failure. */
u64 (*read_msr)(unsigned int msr);
void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
void (*write_msr)(u32 msr, u64 val);
/*
* Safe MSR operations.
* read sets err to 0 or -EIO. write returns 0 or -EIO.
*/
u64 (*read_msr_safe)(unsigned int msr, int *err);
int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
int (*write_msr_safe)(u32 msr, u64 val);
u64 (*read_pmc)(int counter);

View File

@ -196,7 +196,7 @@ static void kvm_setup_secondary_clock(void)
void kvmclock_disable(void)
{
if (msr_kvm_system_time)
native_write_msr(msr_kvm_system_time, 0, 0);
native_write_msr(msr_kvm_system_time, 0);
}
static void __init kvmclock_init_mem(void)

View File

@ -476,7 +476,6 @@ static void svm_inject_exception(struct kvm_vcpu *vcpu)
static void svm_init_erratum_383(void)
{
u32 low, high;
int err;
u64 val;
@ -490,10 +489,7 @@ static void svm_init_erratum_383(void)
val |= (1ULL << 47);
low = lower_32_bits(val);
high = upper_32_bits(val);
native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
native_write_msr_safe(MSR_AMD64_DC_CFG, val);
erratum_383_found = true;
}
@ -2168,17 +2164,12 @@ static bool is_erratum_383(void)
/* Clear MCi_STATUS registers */
for (i = 0; i < 6; ++i)
native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0);
value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
if (!err) {
u32 low, high;
value &= ~(1ULL << 2);
low = lower_32_bits(value);
high = upper_32_bits(value);
native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
native_write_msr_safe(MSR_IA32_MCG_STATUS, value);
}
/* Flush tlb to evict multi-match entries */

View File

@ -1111,10 +1111,8 @@ static u64 xen_do_read_msr(unsigned int msr, int *err)
return val;
}
static void set_seg(u32 which, u32 low, u32 high)
static void set_seg(u32 which, u64 base)
{
u64 base = ((u64)high << 32) | low;
if (HYPERVISOR_set_segment_base(which, base))
WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base);
}
@ -1124,22 +1122,19 @@ static void set_seg(u32 which, u32 low, u32 high)
* With err == NULL write_msr() semantics are selected.
* Supplying an err pointer requires err to be pre-initialized with 0.
*/
static void xen_do_write_msr(unsigned int msr, unsigned int low,
unsigned int high, int *err)
static void xen_do_write_msr(u32 msr, u64 val, int *err)
{
u64 val;
switch (msr) {
case MSR_FS_BASE:
set_seg(SEGBASE_FS, low, high);
set_seg(SEGBASE_FS, val);
break;
case MSR_KERNEL_GS_BASE:
set_seg(SEGBASE_GS_USER, low, high);
set_seg(SEGBASE_GS_USER, val);
break;
case MSR_GS_BASE:
set_seg(SEGBASE_GS_KERNEL, low, high);
set_seg(SEGBASE_GS_KERNEL, val);
break;
case MSR_STAR:
@ -1155,15 +1150,13 @@ static void xen_do_write_msr(unsigned int msr, unsigned int low,
break;
default:
val = (u64)high << 32 | low;
if (pmu_msr_chk_emulated(msr, &val, false))
return;
if (err)
*err = native_write_msr_safe(msr, low, high);
*err = native_write_msr_safe(msr, val);
else
native_write_msr(msr, low, high);
native_write_msr(msr, val);
}
}
@ -1172,12 +1165,11 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
return xen_do_read_msr(msr, err);
}
static int xen_write_msr_safe(unsigned int msr, unsigned int low,
unsigned int high)
static int xen_write_msr_safe(u32 msr, u64 val)
{
int err = 0;
xen_do_write_msr(msr, low, high, &err);
xen_do_write_msr(msr, val, &err);
return err;
}
@ -1189,11 +1181,11 @@ static u64 xen_read_msr(unsigned int msr)
return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
}
static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
static void xen_write_msr(u32 msr, u64 val)
{
int err;
xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL);
xen_do_write_msr(msr, val, xen_msr_safe ? &err : NULL);
}
/* This is called once we have the cpu_possible_mask */