From afe75049303f75c73a97d635a4d353c7d571d1a3 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 16 Dec 2020 16:12:16 +0530 Subject: [PATCH 01/13] KVM: PPC: Book3S HV: Allow nested guest creation when L0 hv_guest_state > L1 On powerpc, L1 hypervisor takes help of L0 using H_ENTER_NESTED hcall to load L2 guest state in cpu. L1 hypervisor prepares the L2 state in struct hv_guest_state and passes a pointer to it via hcall. Using that pointer, L0 reads/writes that state directly from/to L1 memory. Thus L0 must be aware of hv_guest_state layout of L1. Currently it uses version field to achieve this. i.e. If L0 hv_guest_state.version != L1 hv_guest_state.version, L0 won't allow nested kvm guest. This restriction can be loosened up a bit. L0 can be taught to understand older layout of hv_guest_state, if we restrict the new members to be added only at the end, i.e. we can allow nested guest even when L0 hv_guest_state.version > L1 hv_guest_state.version. Though, the other way around is not possible. Signed-off-by: Ravi Bangoria Reviewed-by: Fabiano Rosas Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/hvcall.h | 17 +++++++-- arch/powerpc/kvm/book3s_hv_nested.c | 55 +++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index c98f5141e3fc..cb4d07e2c0cc 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -535,9 +535,12 @@ struct h_cpu_char_result { u64 behaviour; }; -/* Register state for entering a nested guest with H_ENTER_NESTED */ +/* + * Register state for entering a nested guest with H_ENTER_NESTED. + * New member must be added at the end. + */ struct hv_guest_state { - u64 version; /* version of this structure layout */ + u64 version; /* version of this structure layout, must be first */ u32 lpid; u32 vcpu_token; /* These registers are hypervisor privileged (at least for writing) */ @@ -571,6 +574,16 @@ struct hv_guest_state { /* Latest version of hv_guest_state structure */ #define HV_GUEST_STATE_VERSION 1 +static inline int hv_guest_state_size(unsigned int version) +{ + switch (version) { + case 1: + return offsetofend(struct hv_guest_state, ppr); + default: + return -1; + } +} + /* * From the document "H_GetPerformanceCounterInfo Interface" v1.07 * diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 33b58549a9aa..5b8043cf4668 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -215,12 +215,51 @@ static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr) } } +static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu, + struct hv_guest_state *l2_hv, + struct pt_regs *l2_regs, + u64 hv_ptr, u64 regs_ptr) +{ + int size; + + if (kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv->version, + sizeof(l2_hv->version))) + return -1; + + if (kvmppc_need_byteswap(vcpu)) + l2_hv->version = swab64(l2_hv->version); + + size = hv_guest_state_size(l2_hv->version); + if (size < 0) + return -1; + + return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) || + kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs, + sizeof(struct pt_regs)); +} + +static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu, + struct hv_guest_state *l2_hv, + struct pt_regs *l2_regs, + u64 hv_ptr, u64 regs_ptr) +{ + int size; + + size = hv_guest_state_size(l2_hv->version); + if (size < 0) + return -1; + + return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) || + kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs, + sizeof(struct pt_regs)); +} + long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) { long int err, r; struct kvm_nested_guest *l2; struct pt_regs l2_regs, saved_l1_regs; - struct hv_guest_state l2_hv, saved_l1_hv; + struct hv_guest_state l2_hv = {0}, saved_l1_hv; struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 hv_ptr, regs_ptr; u64 hdec_exp; @@ -235,17 +274,15 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) hv_ptr = kvmppc_get_gpr(vcpu, 4); regs_ptr = kvmppc_get_gpr(vcpu, 5); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)) || - kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, - sizeof(struct pt_regs)); + err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs, + hv_ptr, regs_ptr); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_PARAMETER; if (kvmppc_need_byteswap(vcpu)) byteswap_hv_regs(&l2_hv); - if (l2_hv.version != HV_GUEST_STATE_VERSION) + if (l2_hv.version > HV_GUEST_STATE_VERSION) return H_P2; if (kvmppc_need_byteswap(vcpu)) @@ -325,10 +362,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) byteswap_pt_regs(&l2_regs); } vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)) || - kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, - sizeof(struct pt_regs)); + err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs, + hv_ptr, regs_ptr); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_AUTHORITY; From 122954ed7db313c53698c35005e93e8e04135428 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 16 Dec 2020 16:12:17 +0530 Subject: [PATCH 02/13] KVM: PPC: Book3S HV: Rename current DAWR macros and variables Power10 is introducing a second DAWR (Data Address Watchpoint Register). Use real register names (with suffix 0) from ISA for current macros and variables used by kvm. One exception is KVM_REG_PPC_DAWR. Keep it as it is because it's uapi so changing it will break userspace. Signed-off-by: Ravi Bangoria Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 4 ++-- arch/powerpc/kernel/asm-offsets.c | 4 ++-- arch/powerpc/kvm/book3s_hv.c | 24 ++++++++++++------------ arch/powerpc/kvm/book3s_hv_nested.c | 8 ++++---- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 20 ++++++++++---------- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 2b9b6855ec86..cf52081c9282 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -583,8 +583,8 @@ struct kvm_vcpu_arch { u32 ctrl; u32 dabrx; ulong dabr; - ulong dawr; - ulong dawrx; + ulong dawr0; + ulong dawrx0; ulong ciabr; ulong cfar; ulong ppr; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b12d7c049bfe..410b8c47443f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -526,8 +526,8 @@ int main(void) OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl); OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr); OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx); - OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr); - OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx); + OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0); + OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); OFFSET(VCPU_DEC, kvm_vcpu, arch.dec); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6f612d240392..07531ac6ba5d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -782,8 +782,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, return H_UNSUPPORTED_FLAG_START; if (value2 & DABRX_HYP) return H_P4; - vcpu->arch.dawr = value1; - vcpu->arch.dawrx = value2; + vcpu->arch.dawr0 = value1; + vcpu->arch.dawrx0 = value2; return H_SUCCESS; case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: /* KVM does not support mflags=2 (AIL=2) */ @@ -1759,10 +1759,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->vtb); break; case KVM_REG_PPC_DAWR: - *val = get_reg_val(id, vcpu->arch.dawr); + *val = get_reg_val(id, vcpu->arch.dawr0); break; case KVM_REG_PPC_DAWRX: - *val = get_reg_val(id, vcpu->arch.dawrx); + *val = get_reg_val(id, vcpu->arch.dawrx0); break; case KVM_REG_PPC_CIABR: *val = get_reg_val(id, vcpu->arch.ciabr); @@ -1991,10 +1991,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.vcore->vtb = set_reg_val(id, *val); break; case KVM_REG_PPC_DAWR: - vcpu->arch.dawr = set_reg_val(id, *val); + vcpu->arch.dawr0 = set_reg_val(id, *val); break; case KVM_REG_PPC_DAWRX: - vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP; + vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP; break; case KVM_REG_PPC_CIABR: vcpu->arch.ciabr = set_reg_val(id, *val); @@ -3449,8 +3449,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, int trap; unsigned long host_hfscr = mfspr(SPRN_HFSCR); unsigned long host_ciabr = mfspr(SPRN_CIABR); - unsigned long host_dawr = mfspr(SPRN_DAWR0); - unsigned long host_dawrx = mfspr(SPRN_DAWRX0); + unsigned long host_dawr0 = mfspr(SPRN_DAWR0); + unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0); unsigned long host_psscr = mfspr(SPRN_PSSCR); unsigned long host_pidr = mfspr(SPRN_PID); @@ -3489,8 +3489,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_SPURR, vcpu->arch.spurr); if (dawr_enabled()) { - mtspr(SPRN_DAWR0, vcpu->arch.dawr); - mtspr(SPRN_DAWRX0, vcpu->arch.dawrx); + mtspr(SPRN_DAWR0, vcpu->arch.dawr0); + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); } mtspr(SPRN_CIABR, vcpu->arch.ciabr); mtspr(SPRN_IC, vcpu->arch.ic); @@ -3542,8 +3542,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); mtspr(SPRN_HFSCR, host_hfscr); mtspr(SPRN_CIABR, host_ciabr); - mtspr(SPRN_DAWR0, host_dawr); - mtspr(SPRN_DAWRX0, host_dawrx); + mtspr(SPRN_DAWR0, host_dawr0); + mtspr(SPRN_DAWRX0, host_dawrx0); mtspr(SPRN_PID, host_pidr); /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 5b8043cf4668..d22e45b97f88 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -33,8 +33,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) hr->dpdes = vc->dpdes; hr->hfscr = vcpu->arch.hfscr; hr->tb_offset = vc->tb_offset; - hr->dawr0 = vcpu->arch.dawr; - hr->dawrx0 = vcpu->arch.dawrx; + hr->dawr0 = vcpu->arch.dawr0; + hr->dawrx0 = vcpu->arch.dawrx0; hr->ciabr = vcpu->arch.ciabr; hr->purr = vcpu->arch.purr; hr->spurr = vcpu->arch.spurr; @@ -151,8 +151,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) vc->pcr = hr->pcr | PCR_MASK; vc->dpdes = hr->dpdes; vcpu->arch.hfscr = hr->hfscr; - vcpu->arch.dawr = hr->dawr0; - vcpu->arch.dawrx = hr->dawrx0; + vcpu->arch.dawr0 = hr->dawr0; + vcpu->arch.dawrx0 = hr->dawrx0; vcpu->arch.ciabr = hr->ciabr; vcpu->arch.purr = hr->purr; vcpu->arch.spurr = hr->spurr; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index cd9995ee8441..75804062f2c5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -52,8 +52,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_PID (SFS-32) #define STACK_SLOT_IAMR (SFS-40) #define STACK_SLOT_CIABR (SFS-48) -#define STACK_SLOT_DAWR (SFS-56) -#define STACK_SLOT_DAWRX (SFS-64) +#define STACK_SLOT_DAWR0 (SFS-56) +#define STACK_SLOT_DAWRX0 (SFS-64) #define STACK_SLOT_HFSCR (SFS-72) #define STACK_SLOT_AMR (SFS-80) #define STACK_SLOT_UAMOR (SFS-88) @@ -711,8 +711,8 @@ BEGIN_FTR_SECTION mfspr r7, SPRN_DAWRX0 mfspr r8, SPRN_IAMR std r5, STACK_SLOT_CIABR(r1) - std r6, STACK_SLOT_DAWR(r1) - std r7, STACK_SLOT_DAWRX(r1) + std r6, STACK_SLOT_DAWR0(r1) + std r7, STACK_SLOT_DAWRX0(r1) std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) @@ -801,8 +801,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) lbz r5, 0(r5) cmpdi r5, 0 beq 1f - ld r5, VCPU_DAWR(r4) - ld r6, VCPU_DAWRX(r4) + ld r5, VCPU_DAWR0(r4) + ld r6, VCPU_DAWRX0(r4) mtspr SPRN_DAWR0, r5 mtspr SPRN_DAWRX0, r6 1: @@ -1759,8 +1759,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* Restore host values of some registers */ BEGIN_FTR_SECTION ld r5, STACK_SLOT_CIABR(r1) - ld r6, STACK_SLOT_DAWR(r1) - ld r7, STACK_SLOT_DAWRX(r1) + ld r6, STACK_SLOT_DAWR0(r1) + ld r7, STACK_SLOT_DAWRX0(r1) mtspr SPRN_CIABR, r5 /* * If the DAWR doesn't work, it's ok to write these here as @@ -2574,8 +2574,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 - std r4, VCPU_DAWR(r3) - std r5, VCPU_DAWRX(r3) + std r4, VCPU_DAWR0(r3) + std r5, VCPU_DAWRX0(r3) /* * If came in through the real mode hcall handler then it is necessary * to write the registers since the return path won't. Otherwise it is From bd1de1a0e6eff4bde5ceae969673b85b8446fd6a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 16 Dec 2020 16:12:18 +0530 Subject: [PATCH 03/13] KVM: PPC: Book3S HV: Add infrastructure to support 2nd DAWR KVM code assumes single DAWR everywhere. Add code to support 2nd DAWR. DAWR is a hypervisor resource and thus H_SET_MODE hcall is used to set/ unset it. Introduce new case H_SET_MODE_RESOURCE_SET_DAWR1 for 2nd DAWR. Also, KVM will support 2nd DAWR only if CPU_FTR_DAWR1 is set. Signed-off-by: Ravi Bangoria Signed-off-by: Paul Mackerras --- Documentation/virt/kvm/api.rst | 2 ++ arch/powerpc/include/asm/hvcall.h | 8 ++++- arch/powerpc/include/asm/kvm_host.h | 3 ++ arch/powerpc/include/uapi/asm/kvm.h | 2 ++ arch/powerpc/kernel/asm-offsets.c | 2 ++ arch/powerpc/kvm/book3s_hv.c | 43 +++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_nested.c | 7 ++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 23 ++++++++++++ tools/arch/powerpc/include/uapi/asm/kvm.h | 2 ++ 9 files changed, 91 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 482508ec7cc4..a783a8da818b 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2276,6 +2276,8 @@ registers, find a list below: PPC KVM_REG_PPC_PSSCR 64 PPC KVM_REG_PPC_DEC_EXPIRY 64 PPC KVM_REG_PPC_PTCR 64 + PPC KVM_REG_PPC_DAWR1 64 + PPC KVM_REG_PPC_DAWRX1 64 PPC KVM_REG_PPC_TM_GPR0 64 ... PPC KVM_REG_PPC_TM_GPR31 64 diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index cb4d07e2c0cc..ed6086d57b22 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -569,16 +569,22 @@ struct hv_guest_state { u64 pidr; u64 cfar; u64 ppr; + /* Version 1 ends here */ + u64 dawr1; + u64 dawrx1; + /* Version 2 ends here */ }; /* Latest version of hv_guest_state structure */ -#define HV_GUEST_STATE_VERSION 1 +#define HV_GUEST_STATE_VERSION 2 static inline int hv_guest_state_size(unsigned int version) { switch (version) { case 1: return offsetofend(struct hv_guest_state, ppr); + case 2: + return offsetofend(struct hv_guest_state, dawrx1); default: return -1; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cf52081c9282..05fb00d37609 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -306,6 +306,7 @@ struct kvm_arch { u8 svm_enabled; bool threads_indep; bool nested_enable; + bool dawr1_enabled; pgd_t *pgtable; u64 process_table; struct dentry *debugfs_dir; @@ -585,6 +586,8 @@ struct kvm_vcpu_arch { ulong dabr; ulong dawr0; ulong dawrx0; + ulong dawr1; + ulong dawrx1; ulong ciabr; ulong cfar; ulong ppr; diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c3af3f324c5a..9f18fa090f1f 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) #define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) +#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) +#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 410b8c47443f..4771a8511216 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -528,6 +528,8 @@ int main(void) OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx); OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0); OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0); + OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1); + OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); OFFSET(VCPU_DEC, kvm_vcpu, arch.dec); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 07531ac6ba5d..b37bddfead1b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -785,6 +785,22 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, vcpu->arch.dawr0 = value1; vcpu->arch.dawrx0 = value2; return H_SUCCESS; + case H_SET_MODE_RESOURCE_SET_DAWR1: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (!ppc_breakpoint_available()) + return H_P2; + if (!cpu_has_feature(CPU_FTR_DAWR1)) + return H_P2; + if (!vcpu->kvm->arch.dawr1_enabled) + return H_FUNCTION; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + if (value2 & DABRX_HYP) + return H_P4; + vcpu->arch.dawr1 = value1; + vcpu->arch.dawrx1 = value2; + return H_SUCCESS; case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: /* KVM does not support mflags=2 (AIL=2) */ if (mflags != 0 && mflags != 3) @@ -1764,6 +1780,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DAWRX: *val = get_reg_val(id, vcpu->arch.dawrx0); break; + case KVM_REG_PPC_DAWR1: + *val = get_reg_val(id, vcpu->arch.dawr1); + break; + case KVM_REG_PPC_DAWRX1: + *val = get_reg_val(id, vcpu->arch.dawrx1); + break; case KVM_REG_PPC_CIABR: *val = get_reg_val(id, vcpu->arch.ciabr); break; @@ -1996,6 +2018,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DAWRX: vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP; break; + case KVM_REG_PPC_DAWR1: + vcpu->arch.dawr1 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DAWRX1: + vcpu->arch.dawrx1 = set_reg_val(id, *val) & ~DAWRX_HYP; + break; case KVM_REG_PPC_CIABR: vcpu->arch.ciabr = set_reg_val(id, *val); /* Don't allow setting breakpoints in hypervisor code */ @@ -3453,6 +3481,13 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0); unsigned long host_psscr = mfspr(SPRN_PSSCR); unsigned long host_pidr = mfspr(SPRN_PID); + unsigned long host_dawr1 = 0; + unsigned long host_dawrx1 = 0; + + if (cpu_has_feature(CPU_FTR_DAWR1)) { + host_dawr1 = mfspr(SPRN_DAWR1); + host_dawrx1 = mfspr(SPRN_DAWRX1); + } /* * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0, @@ -3491,6 +3526,10 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, if (dawr_enabled()) { mtspr(SPRN_DAWR0, vcpu->arch.dawr0); mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + mtspr(SPRN_DAWR1, vcpu->arch.dawr1); + mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); + } } mtspr(SPRN_CIABR, vcpu->arch.ciabr); mtspr(SPRN_IC, vcpu->arch.ic); @@ -3544,6 +3583,10 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_CIABR, host_ciabr); mtspr(SPRN_DAWR0, host_dawr0); mtspr(SPRN_DAWRX0, host_dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + mtspr(SPRN_DAWR1, host_dawr1); + mtspr(SPRN_DAWRX1, host_dawrx1); + } mtspr(SPRN_PID, host_pidr); /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index d22e45b97f88..0cd0e7aad588 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -49,6 +49,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) hr->pidr = vcpu->arch.pid; hr->cfar = vcpu->arch.cfar; hr->ppr = vcpu->arch.ppr; + hr->dawr1 = vcpu->arch.dawr1; + hr->dawrx1 = vcpu->arch.dawrx1; } static void byteswap_pt_regs(struct pt_regs *regs) @@ -91,6 +93,8 @@ static void byteswap_hv_regs(struct hv_guest_state *hr) hr->pidr = swab64(hr->pidr); hr->cfar = swab64(hr->cfar); hr->ppr = swab64(hr->ppr); + hr->dawr1 = swab64(hr->dawr1); + hr->dawrx1 = swab64(hr->dawrx1); } static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, @@ -138,6 +142,7 @@ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) /* Don't let data address watchpoint match in hypervisor state */ hr->dawrx0 &= ~DAWRX_HYP; + hr->dawrx1 &= ~DAWRX_HYP; /* Don't let completed instruction address breakpt match in HV state */ if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) @@ -167,6 +172,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) vcpu->arch.pid = hr->pidr; vcpu->arch.cfar = hr->cfar; vcpu->arch.ppr = hr->ppr; + vcpu->arch.dawr1 = hr->dawr1; + vcpu->arch.dawrx1 = hr->dawrx1; } void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 75804062f2c5..65161c062dc5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -57,6 +57,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_HFSCR (SFS-72) #define STACK_SLOT_AMR (SFS-80) #define STACK_SLOT_UAMOR (SFS-88) +#define STACK_SLOT_DAWR1 (SFS-96) +#define STACK_SLOT_DAWRX1 (SFS-104) /* the following is used by the P9 short path */ #define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */ @@ -715,6 +717,12 @@ BEGIN_FTR_SECTION std r7, STACK_SLOT_DAWRX0(r1) std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + mfspr r6, SPRN_DAWR1 + mfspr r7, SPRN_DAWRX1 + std r6, STACK_SLOT_DAWR1(r1) + std r7, STACK_SLOT_DAWRX1(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1) mfspr r5, SPRN_AMR std r5, STACK_SLOT_AMR(r1) @@ -805,6 +813,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r6, VCPU_DAWRX0(r4) mtspr SPRN_DAWR0, r5 mtspr SPRN_DAWRX0, r6 +BEGIN_FTR_SECTION + ld r5, VCPU_DAWR1(r4) + ld r6, VCPU_DAWRX1(r4) + mtspr SPRN_DAWR1, r5 + mtspr SPRN_DAWRX1, r6 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) 1: ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) @@ -1769,6 +1783,12 @@ BEGIN_FTR_SECTION mtspr SPRN_DAWR0, r6 mtspr SPRN_DAWRX0, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + ld r6, STACK_SLOT_DAWR1(r1) + ld r7, STACK_SLOT_DAWRX1(r1) + mtspr SPRN_DAWR1, r6 + mtspr SPRN_DAWRX1, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1) BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) @@ -3343,6 +3363,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 mtspr SPRN_DAWRX0, r0 +BEGIN_FTR_SECTION + mtspr SPRN_DAWRX1, r0 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) BEGIN_MMU_FTR_SECTION b 4f diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index c3af3f324c5a..9f18fa090f1f 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) #define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) +#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) +#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs From d9a47edabc4f948102753fa9d41f2dc1dbeb28be Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 16 Dec 2020 16:12:19 +0530 Subject: [PATCH 04/13] KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR Introduce KVM_CAP_PPC_DAWR1 which can be used by QEMU to query whether KVM supports 2nd DAWR or not. The capability is by default disabled even when the underlying CPU supports 2nd DAWR. QEMU needs to check and enable it manually to use the feature. Signed-off-by: Ravi Bangoria Signed-off-by: Paul Mackerras --- Documentation/virt/kvm/api.rst | 10 ++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_hv.c | 12 ++++++++++++ arch/powerpc/kvm/powerpc.c | 10 ++++++++++ include/uapi/linux/kvm.h | 1 + tools/include/uapi/linux/kvm.h | 1 + 6 files changed, 35 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a783a8da818b..45fd862ac128 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6215,6 +6215,16 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons. KVM_RUN_BUS_LOCK flag is used to distinguish between them. +7.22 KVM_CAP_PPC_DAWR1 +---------------------- + +:Architectures: ppc +:Parameters: none +:Returns: 0 on success, -EINVAL when CPU doesn't support 2nd DAWR + +This capability can be used to check / enable 2nd DAWR feature provided +by POWER10 processor. + 8. Other capabilities. ====================== diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 0a056c64c317..13c39d24dda5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -314,6 +314,7 @@ struct kvmppc_ops { int size); int (*enable_svm)(struct kvm *kvm); int (*svm_off)(struct kvm *kvm); + int (*enable_dawr1)(struct kvm *kvm); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b37bddfead1b..dffbfd1ab47b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5642,6 +5642,17 @@ static int kvmhv_svm_off(struct kvm *kvm) return ret; } +static int kvmhv_enable_dawr1(struct kvm *kvm) +{ + if (!cpu_has_feature(CPU_FTR_DAWR1)) + return -ENODEV; + + /* kvm == NULL means the caller is testing if the capability exists */ + if (kvm) + kvm->arch.dawr1_enabled = true; + return 0; +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -5685,6 +5696,7 @@ static struct kvmppc_ops kvm_ops_hv = { .store_to_eaddr = kvmhv_store_to_eaddr, .enable_svm = kvmhv_enable_svm, .svm_off = kvmhv_svm_off, + .enable_dawr1 = kvmhv_enable_dawr1, }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cf52d26f49cd..5914c32d9ef3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -678,6 +678,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = hv_enabled && kvmppc_hv_ops->enable_svm && !kvmppc_hv_ops->enable_svm(NULL); break; + case KVM_CAP_PPC_DAWR1: + r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 && + !kvmppc_hv_ops->enable_dawr1(NULL)); + break; #endif default: r = 0; @@ -2187,6 +2191,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, break; r = kvm->arch.kvm_ops->enable_svm(kvm); break; + case KVM_CAP_PPC_DAWR1: + r = -EINVAL; + if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_dawr1) + break; + r = kvm->arch.kvm_ops->enable_dawr1(kvm); + break; #endif default: r = -EINVAL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 63f8f6e95648..8b281f722e5b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1077,6 +1077,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 +#define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 886802b8ffba..9c284a9c3ac5 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1056,6 +1056,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING From b1b1697ae0cc82544a03b69df49a69a9ac307b9c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 18 Jan 2021 16:28:06 +1000 Subject: [PATCH 05/13] KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host without mixed mode support This reverts much of commit c01015091a770 ("KVM: PPC: Book3S HV: Run HPT guests on POWER9 radix hosts"), which was required to run HPT guests on RPT hosts on early POWER9 CPUs without support for "mixed mode", which meant the host could not run with MMU on while guests were running. This code has some corner case bugs, e.g., when the guest hits a machine check or HMI the primary locks up waiting for secondaries to switch LPCR to host, which they never do. This could all be fixed in software, but most CPUs in production have mixed mode support, and those that don't are believed to be all in installations that don't use this capability. So simplify things and remove support. Signed-off-by: Nicholas Piggin Tested-by: Fabiano Rosas Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s_asm.h | 11 --- arch/powerpc/kernel/asm-offsets.c | 3 - arch/powerpc/kvm/book3s_hv.c | 56 +++-------- arch/powerpc/kvm/book3s_hv_builtin.c | 108 +--------------------- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 80 ++++------------ 5 files changed, 32 insertions(+), 226 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 078f4648ea27..b6d31bff5209 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -74,16 +74,6 @@ struct kvm_split_mode { u8 do_nap; u8 napped[MAX_SMT_THREADS]; struct kvmppc_vcore *vc[MAX_SUBCORES]; - /* Bits for changing lpcr on P9 */ - unsigned long lpcr_req; - unsigned long lpidr_req; - unsigned long host_lpcr; - u32 do_set; - u32 do_restore; - union { - u32 allphases; - u8 phase[4]; - } lpcr_sync; }; /* @@ -110,7 +100,6 @@ struct kvmppc_host_state { u8 hwthread_state; u8 host_ipi; u8 ptid; /* thread number within subcore when split */ - u8 tid; /* thread number within whole core */ u8 fake_suspend; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4771a8511216..b690c70f061c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -670,7 +670,6 @@ int main(void) HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); - HSTATE_FIELD(HSTATE_TID, tid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]); HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]); @@ -700,8 +699,6 @@ int main(void) OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap); OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped); - OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set); - OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index dffbfd1ab47b..89c686c17f06 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -134,7 +134,7 @@ static inline bool nesting_enabled(struct kvm *kvm) } /* If set, the threads on each CPU core have to be in the same MMU mode */ -static bool no_mixing_hpt_and_radix; +static bool no_mixing_hpt_and_radix __read_mostly; static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -2890,11 +2890,6 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm) return false; - /* Some POWER9 chips require all threads to be in the same MMU mode */ - if (no_mixing_hpt_and_radix && - kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm)) - return false; - if (n_threads < cip->max_subcore_threads) n_threads = cip->max_subcore_threads; if (!subcore_config_ok(cip->n_subcores + 1, n_threads)) @@ -2933,6 +2928,9 @@ static void prepare_threads(struct kvmppc_vcore *vc) for_each_runnable_thread(i, vcpu, vc) { if (signal_pending(vcpu->arch.run_task)) vcpu->arch.ret = -EINTR; + else if (no_mixing_hpt_and_radix && + kvm_is_radix(vc->kvm) != radix_enabled()) + vcpu->arch.ret = -EINVAL; else if (vcpu->arch.vpa.update_pending || vcpu->arch.slb_shadow.update_pending || vcpu->arch.dtl.update_pending) @@ -3138,7 +3136,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) int controlled_threads; int trap; bool is_power8; - bool hpt_on_radix; /* * Remove from the list any threads that have a signal pending @@ -3171,11 +3168,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * this is a HPT guest on a radix host machine where the * CPU threads may not be in different MMU modes. */ - hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() && - !kvm_is_radix(vc->kvm); - if (((controlled_threads > 1) && - ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) || - (hpt_on_radix && vc->kvm->arch.threads_indep)) { + if ((controlled_threads > 1) && + ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; kvmppc_remove_runnable(vc, vcpu); @@ -3243,7 +3237,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S) && !cpu_has_feature(CPU_FTR_ARCH_300); - if (split > 1 || hpt_on_radix) { + if (split > 1) { sip = &split_info; memset(&split_info, 0, sizeof(split_info)); for (sub = 0; sub < core_info.n_subcores; ++sub) @@ -3265,13 +3259,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) split_info.subcore_size = subcore_size; } else { split_info.subcore_size = 1; - if (hpt_on_radix) { - /* Use the split_info for LPCR/LPIDR changes */ - split_info.lpcr_req = vc->lpcr; - split_info.lpidr_req = vc->kvm->arch.lpid; - split_info.host_lpcr = vc->kvm->arch.host_lpcr; - split_info.do_set = 1; - } } /* order writes to split_info before kvm_split_mode pointer */ @@ -3281,7 +3268,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) for (thr = 0; thr < controlled_threads; ++thr) { struct paca_struct *paca = paca_ptrs[pcpu + thr]; - paca->kvm_hstate.tid = thr; paca->kvm_hstate.napping = 0; paca->kvm_hstate.kvm_split_mode = sip; } @@ -3355,10 +3341,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * When doing micro-threading, poke the inactive threads as well. * This gets them to the nap instruction after kvm_do_nap, * which reduces the time taken to unsplit later. - * For POWER9 HPT guest on radix host, we need all the secondary - * threads woken up so they can do the LPCR/LPIDR change. */ - if (cmd_bit || hpt_on_radix) { + if (cmd_bit) { split_info.do_nap = 1; /* ask secondaries to nap when done */ for (thr = 1; thr < threads_per_subcore; ++thr) if (!(active & (1 << thr))) @@ -3419,19 +3403,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) cpu_relax(); ++loops; } - } else if (hpt_on_radix) { - /* Wait for all threads to have seen final sync */ - for (thr = 1; thr < controlled_threads; ++thr) { - struct paca_struct *paca = paca_ptrs[pcpu + thr]; - - while (paca->kvm_hstate.kvm_split_mode) { - HMT_low(); - barrier(); - } - HMT_medium(); - } + split_info.do_nap = 0; } - split_info.do_nap = 0; kvmppc_set_host_core(pcpu); @@ -4216,7 +4189,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_clear_host_core(pcpu); - local_paca->kvm_hstate.tid = 0; local_paca->kvm_hstate.napping = 0; local_paca->kvm_hstate.kvm_split_mode = NULL; kvmppc_start_thread(vcpu, vc); @@ -4401,15 +4373,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) do { /* - * The early POWER9 chips that can't mix radix and HPT threads - * on the same core also need the workaround for the problem - * where the TLB would prefetch entries in the guest exit path - * for radix guests using the guest PIDR value and LPID 0. - * The workaround is in the old path (kvmppc_run_vcpu()) - * but not the new path (kvmhv_run_single_vcpu()). + * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu + * path, which also handles hash and dependent threads mode. */ if (kvm->arch.threads_indep && kvm_is_radix(kvm) && - !no_mixing_hpt_and_radix) + !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) r = kvmhv_run_single_vcpu(vcpu, ~(u64)0, vcpu->arch.vcore->lpcr); else diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 8053efdf7ea7..f3d3183249fe 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -277,8 +277,7 @@ void kvmhv_commence_exit(int trap) struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; int ptid = local_paca->kvm_hstate.ptid; struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode; - int me, ee, i, t; - int cpu0; + int me, ee, i; /* Set our bit in the threads-exiting-guest map in the 0xff00 bits of vcore->entry_exit_map */ @@ -320,22 +319,6 @@ void kvmhv_commence_exit(int trap) if ((ee >> 8) == 0) kvmhv_interrupt_vcore(vc, ee); } - - /* - * On POWER9 when running a HPT guest on a radix host (sip != NULL), - * we have to interrupt inactive CPU threads to get them to - * restore the host LPCR value. - */ - if (sip->lpcr_req) { - if (cmpxchg(&sip->do_restore, 0, 1) == 0) { - vc = local_paca->kvm_hstate.kvm_vcore; - cpu0 = vc->pcpu + ptid - local_paca->kvm_hstate.tid; - for (t = 1; t < threads_per_core; ++t) { - if (sip->napped[t]) - kvmhv_rm_send_ipi(cpu0 + t); - } - } - } } struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; @@ -667,95 +650,6 @@ void kvmppc_bad_interrupt(struct pt_regs *regs) panic("Bad KVM trap"); } -/* - * Functions used to switch LPCR HR and UPRT bits on all threads - * when entering and exiting HPT guests on a radix host. - */ - -#define PHASE_REALMODE 1 /* in real mode */ -#define PHASE_SET_LPCR 2 /* have set LPCR */ -#define PHASE_OUT_OF_GUEST 4 /* have finished executing in guest */ -#define PHASE_RESET_LPCR 8 /* have reset LPCR to host value */ - -#define ALL(p) (((p) << 24) | ((p) << 16) | ((p) << 8) | (p)) - -static void wait_for_sync(struct kvm_split_mode *sip, int phase) -{ - int thr = local_paca->kvm_hstate.tid; - - sip->lpcr_sync.phase[thr] |= phase; - phase = ALL(phase); - while ((sip->lpcr_sync.allphases & phase) != phase) { - HMT_low(); - barrier(); - } - HMT_medium(); -} - -void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip) -{ - int num_sets; - unsigned long rb, set; - - /* wait for every other thread to get to real mode */ - wait_for_sync(sip, PHASE_REALMODE); - - /* Set LPCR and LPIDR */ - mtspr(SPRN_LPCR, sip->lpcr_req); - mtspr(SPRN_LPID, sip->lpidr_req); - isync(); - - /* - * P10 will flush all the congruence class with a single tlbiel - */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - num_sets = 1; - else - num_sets = POWER9_TLB_SETS_RADIX; - - /* Invalidate the TLB on thread 0 */ - if (local_paca->kvm_hstate.tid == 0) { - sip->do_set = 0; - asm volatile("ptesync" : : : "memory"); - for (set = 0; set < num_sets; ++set) { - rb = TLBIEL_INVAL_SET_LPID + - (set << TLBIEL_INVAL_SET_SHIFT); - asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : : - "r" (rb), "r" (0)); - } - asm volatile("ptesync" : : : "memory"); - } - - /* indicate that we have done so and wait for others */ - wait_for_sync(sip, PHASE_SET_LPCR); - /* order read of sip->lpcr_sync.allphases vs. sip->do_set */ - smp_rmb(); -} - -/* - * Called when a thread that has been in the guest needs - * to reload the host LPCR value - but only on POWER9 when - * running a HPT guest on a radix host. - */ -void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) -{ - /* we're out of the guest... */ - wait_for_sync(sip, PHASE_OUT_OF_GUEST); - - mtspr(SPRN_LPID, 0); - mtspr(SPRN_LPCR, sip->host_lpcr); - isync(); - - if (local_paca->kvm_hstate.tid == 0) { - sip->do_restore = 0; - smp_wmb(); /* order store of do_restore vs. phase */ - } - - wait_for_sync(sip, PHASE_RESET_LPCR); - smp_mb(); - local_paca->kvm_hstate.kvm_split_mode = NULL; -} - static void kvmppc_end_cede(struct kvm_vcpu *vcpu) { vcpu->arch.ceded = 0; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 65161c062dc5..565464e3f9a5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -87,19 +87,6 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline) RFI_TO_KERNEL kvmppc_call_hv_entry: -BEGIN_FTR_SECTION - /* On P9, do LPCR setting, if necessary */ - ld r3, HSTATE_SPLIT_MODE(r13) - cmpdi r3, 0 - beq 46f - lwz r4, KVM_SPLIT_DO_SET(r3) - cmpwi r4, 0 - beq 46f - bl kvmhv_p9_set_lpcr - nop -46: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - ld r4, HSTATE_KVM_VCPU(r13) bl kvmppc_hv_entry @@ -363,11 +350,11 @@ kvm_secondary_got_guest: LOAD_REG_ADDR(r6, decrementer_max) ld r6, 0(r6) mtspr SPRN_HDEC, r6 +BEGIN_FTR_SECTION /* and set per-LPAR registers, if doing dynamic micro-threading */ ld r6, HSTATE_SPLIT_MODE(r13) cmpdi r6, 0 beq 63f -BEGIN_FTR_SECTION ld r0, KVM_SPLIT_RPR(r6) mtspr SPRN_RPR, r0 ld r0, KVM_SPLIT_PMMAR(r6) @@ -375,16 +362,7 @@ BEGIN_FTR_SECTION ld r0, KVM_SPLIT_LDBAR(r6) mtspr SPRN_LDBAR, r0 isync -FTR_SECTION_ELSE - /* On P9 we use the split_info for coordinating LPCR changes */ - lwz r4, KVM_SPLIT_DO_SET(r6) - cmpwi r4, 0 - beq 1f - mr r3, r6 - bl kvmhv_p9_set_lpcr - nop -1: -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 63: /* Order load of vcpu after load of vcore */ lwsync @@ -454,19 +432,15 @@ kvm_no_guest: mtcr r5 blr -53: HMT_LOW +53: +BEGIN_FTR_SECTION + HMT_LOW ld r5, HSTATE_KVM_VCORE(r13) cmpdi r5, 0 bne 60f ld r3, HSTATE_SPLIT_MODE(r13) cmpdi r3, 0 beq kvm_no_guest - lwz r0, KVM_SPLIT_DO_SET(r3) - cmpwi r0, 0 - bne kvmhv_do_set - lwz r0, KVM_SPLIT_DO_RESTORE(r3) - cmpwi r0, 0 - bne kvmhv_do_restore lbz r0, KVM_SPLIT_DO_NAP(r3) cmpwi r0, 0 beq kvm_no_guest @@ -474,24 +448,19 @@ kvm_no_guest: b kvm_unsplit_nap 60: HMT_MEDIUM b kvm_secondary_got_guest +FTR_SECTION_ELSE + HMT_LOW + ld r5, HSTATE_KVM_VCORE(r13) + cmpdi r5, 0 + beq kvm_no_guest + HMT_MEDIUM + b kvm_secondary_got_guest +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 54: li r0, KVM_HWTHREAD_IN_KVM stb r0, HSTATE_HWTHREAD_STATE(r13) b kvm_no_guest -kvmhv_do_set: - /* Set LPCR, LPIDR etc. on P9 */ - HMT_MEDIUM - bl kvmhv_p9_set_lpcr - nop - b kvm_no_guest - -kvmhv_do_restore: - HMT_MEDIUM - bl kvmhv_p9_restore_lpcr - nop - b kvm_no_guest - /* * Here the primary thread is trying to return the core to * whole-core mode, so we need to nap. @@ -529,7 +498,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Set kvm_split_mode.napped[tid] = 1 */ ld r3, HSTATE_SPLIT_MODE(r13) li r0, 1 - lbz r4, HSTATE_TID(r13) + lhz r4, PACAPACAINDEX(r13) + clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */ addi r4, r4, KVM_SPLIT_NAPPED stbx r0, r3, r4 /* Check the do_nap flag again after setting napped[] */ @@ -1958,24 +1928,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 19: lis r8,0x7fff /* MAX_INT@h */ mtspr SPRN_HDEC,r8 -16: -BEGIN_FTR_SECTION - /* On POWER9 with HPT-on-radix we need to wait for all other threads */ - ld r3, HSTATE_SPLIT_MODE(r13) - cmpdi r3, 0 - beq 47f - lwz r8, KVM_SPLIT_DO_RESTORE(r3) - cmpwi r8, 0 - beq 47f - bl kvmhv_p9_restore_lpcr - nop - b 48f -47: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - ld r8,KVM_HOST_LPCR(r4) +16: ld r8,KVM_HOST_LPCR(r4) mtspr SPRN_LPCR,r8 isync -48: + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING /* Finish timing, if we have a vcpu */ ld r4, HSTATE_KVM_VCPU(r13) @@ -2799,8 +2755,10 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) beq kvm_end_cede cmpwi r0, NAPPING_NOVCPU beq kvm_novcpu_wakeup +BEGIN_FTR_SECTION cmpwi r0, NAPPING_UNSPLIT beq kvm_unsplit_wakeup +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) twi 31,0,0 /* Nap state must not be zero */ 33: mr r4, r3 From 68ad28a4cdd478fa2ae37951b911ab664011098b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 18 Jan 2021 16:28:07 +1000 Subject: [PATCH 06/13] KVM: PPC: Book3S HV: Fix radix guest SLB side channel The slbmte instruction is legal in radix mode, including radix guest mode. This means radix guests can load the SLB with arbitrary data. KVM host does not clear the SLB when exiting a guest if it was a radix guest, which would allow a rogue radix guest to use the SLB as a side channel to communicate with other guests. Fix this by ensuring the SLB is cleared when coming out of a radix guest. Only the first 4 entries are a concern, because radix guests always run with LPCR[UPRT]=1, which limits the reach of slbmte. slbia is not used (except in a non-performance-critical path) because it can clear cached translations. Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 39 ++++++++++++++++++++----- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 565464e3f9a5..174e7b6eba34 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1171,6 +1171,20 @@ EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9) mr r4, r3 b fast_guest_entry_c guest_exit_short_path: + /* + * Malicious or buggy radix guests may have inserted SLB entries + * (only 0..3 because radix always runs with UPRT=1), so these must + * be cleared here to avoid side-channels. slbmte is used rather + * than slbia, as it won't clear cached translations. + */ + li r0,0 + slbmte r0,r0 + li r4,1 + slbmte r0,r4 + li r4,2 + slbmte r0,r4 + li r4,3 + slbmte r0,r4 li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) @@ -1483,7 +1497,7 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ lbz r0, KVM_RADIX(r5) li r5, 0 cmpwi r0, 0 - bne 3f /* for radix, save 0 entries */ + bne 0f /* for radix, save 0 entries */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ mtctr r0 li r6,0 @@ -1504,12 +1518,9 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ slbmte r0,r0 slbia ptesync -3: stw r5,VCPU_SLB_MAX(r9) + stw r5,VCPU_SLB_MAX(r9) /* load host SLB entries */ -BEGIN_MMU_FTR_SECTION - b 0f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED @@ -1522,7 +1533,17 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) slbmte r6,r5 1: addi r8,r8,16 .endr -0: + b guest_bypass + +0: /* Sanitise radix guest SLB, see guest_exit_short_path comment. */ + li r0,0 + slbmte r0,r0 + li r4,1 + slbmte r0,r4 + li r4,2 + slbmte r0,r4 + li r4,3 + slbmte r0,r4 guest_bypass: stw r12, STACK_SLOT_TRAP(r1) @@ -3325,12 +3346,14 @@ BEGIN_FTR_SECTION mtspr SPRN_DAWRX1, r0 END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) + /* Clear hash and radix guest SLB, see guest_exit_short_path comment. */ + slbmte r0, r0 + slbia + BEGIN_MMU_FTR_SECTION b 4f END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) - slbmte r0, r0 - slbia ptesync ld r8, PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED From 078ebe35fc0a8ab847809765be34937e656e70a6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 18 Jan 2021 16:28:08 +1000 Subject: [PATCH 07/13] KVM: PPC: Book3S HV: No need to clear radix host SLB before loading HPT guest Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 174e7b6eba34..e413d8e34dd8 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -902,15 +902,19 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) cmpdi r3, 512 /* 1 microsecond */ blt hdec_soon - /* For hash guest, clear out and reload the SLB */ ld r6, VCPU_KVM(r4) lbz r0, KVM_RADIX(r6) cmpwi r0, 0 bne 9f + + /* For hash guest, clear out and reload the SLB */ +BEGIN_MMU_FTR_SECTION + /* Radix host won't have populated the SLB, so no need to clear */ li r6, 0 slbmte r6, r6 slbia ptesync +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ lwz r5,VCPU_SLB_MAX(r4) From 7a7f94a3a9c7a5d25ec172c94a42509dce61eb0b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 18 Jan 2021 16:28:09 +1000 Subject: [PATCH 08/13] KVM: PPC: Book3S HV: Use POWER9 SLBIA IH=6 variant to clear SLB IH=6 may preserve hypervisor real-mode ERAT entries and is the recommended SLBIA hint for switching partitions. Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e413d8e34dd8..426bdc59fde2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -912,7 +912,7 @@ BEGIN_MMU_FTR_SECTION /* Radix host won't have populated the SLB, so no need to clear */ li r6, 0 slbmte r6, r6 - slbia + PPC_SLBIA(6) ptesync END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) @@ -1520,7 +1520,7 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ /* Finally clear out the SLB */ li r0,0 slbmte r0,r0 - slbia + PPC_SLBIA(6) ptesync stw r5,VCPU_SLB_MAX(r9) @@ -3352,7 +3352,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) /* Clear hash and radix guest SLB, see guest_exit_short_path comment. */ slbmte r0, r0 - slbia + PPC_SLBIA(6) BEGIN_MMU_FTR_SECTION b 4f From 63e9f2357309439f09509aa924faa3a9de84a38e Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 2 Feb 2021 11:30:53 +0800 Subject: [PATCH 09/13] KVM: PPC: remove unneeded semicolon Eliminate the following coccicheck warning: ./arch/powerpc/kvm/booke.c:701:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/booke.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 288a9820ec01..f38ae3e54b37 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -698,7 +698,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); r = 1; - }; + } return r; } From 25edcc50d76c834479d11fcc7de46f3da4d95121 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 4 Feb 2021 17:05:17 -0300 Subject: [PATCH 10/13] KVM: PPC: Book3S HV: Save and restore FSCR in the P9 path The Facility Status and Control Register is a privileged SPR that defines the availability of some features in problem state. Since it can be written by the guest, we must restore it to the previous host value after guest exit. This restoration is currently done by taking the value from current->thread.fscr, which in the P9 path is not enough anymore because the guest could context switch the QEMU thread, causing the guest-current value to be saved into the thread struct. The above situation manifested when running a QEMU linked against a libc with System Call Vectored support, which causes scv instructions to be run by QEMU early during the guest boot (during SLOF), at which point the FSCR is 0 due to guest entry. After a few scv calls (1 to a couple hundred), the context switching happens and the QEMU thread runs with the guest value, resulting in a Facility Unavailable interrupt. This patch saves and restores the host value of FSCR in the inner guest entry loop in a way independent of current->thread.fscr. The old way of doing it is still kept in place because it works for the old entry path. Signed-off-by: Fabiano Rosas Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 89c686c17f06..f6d470157fcb 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3611,6 +3611,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long host_tidr = mfspr(SPRN_TIDR); unsigned long host_iamr = mfspr(SPRN_IAMR); unsigned long host_amr = mfspr(SPRN_AMR); + unsigned long host_fscr = mfspr(SPRN_FSCR); s64 dec; u64 tb; int trap, save_pmu; @@ -3751,6 +3752,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (host_amr != vcpu->arch.amr) mtspr(SPRN_AMR, host_amr); + if (host_fscr != vcpu->arch.fscr) + mtspr(SPRN_FSCR, host_fscr); + msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); store_fp_state(&vcpu->arch.fp); #ifdef CONFIG_ALTIVEC From a722076e947023a0d6ffca79661324c45ff30641 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 5 Feb 2021 13:41:54 -0300 Subject: [PATCH 11/13] KVM: PPC: Don't always report hash MMU capability for P9 < DD2.2 These machines don't support running both MMU types at the same time, so remove the KVM_CAP_PPC_MMU_HASH_V3 capability when the host is using Radix MMU. [paulus@ozlabs.org - added defensive check on kvmppc_hv_ops->hash_v3_possible] Signed-off-by: Fabiano Rosas Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_hv.c | 10 ++++++++++ arch/powerpc/kvm/powerpc.c | 4 ++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 13c39d24dda5..df4bda867bab 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -315,6 +315,7 @@ struct kvmppc_ops { int (*enable_svm)(struct kvm *kvm); int (*svm_off)(struct kvm *kvm); int (*enable_dawr1)(struct kvm *kvm); + bool (*hash_v3_possible)(void); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f6d470157fcb..f09708da216e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5625,6 +5625,15 @@ static int kvmhv_enable_dawr1(struct kvm *kvm) return 0; } +static bool kvmppc_hash_v3_possible(void) +{ + if (radix_enabled() && no_mixing_hpt_and_radix) + return false; + + return cpu_has_feature(CPU_FTR_ARCH_300) && + cpu_has_feature(CPU_FTR_HVMODE); +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -5669,6 +5678,7 @@ static struct kvmppc_ops kvm_ops_hv = { .enable_svm = kvmhv_enable_svm, .svm_off = kvmhv_svm_off, .enable_dawr1 = kvmhv_enable_dawr1, + .hash_v3_possible = kvmppc_hash_v3_possible, }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 5914c32d9ef3..6c083a9b3545 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -611,8 +611,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = !!(hv_enabled && radix_enabled()); break; case KVM_CAP_PPC_MMU_HASH_V3: - r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) && - cpu_has_feature(CPU_FTR_HVMODE)); + r = !!(hv_enabled && kvmppc_hv_ops->hash_v3_possible && + kvmppc_hv_ops->hash_v3_possible()); break; case KVM_CAP_PPC_NESTED_HV: r = !!(hv_enabled && kvmppc_hv_ops->enable_nested && From ab950e1acd2175eae825cfcbac621c0625ad2a86 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 11 Feb 2021 17:09:45 +1100 Subject: [PATCH 12/13] KVM: PPC: Book3S HV: Ensure radix guest has no SLB entries Commit 68ad28a4cdd4 ("KVM: PPC: Book3S HV: Fix radix guest SLB side channel") changed the older guest entry path, with the side effect that vcpu->arch.slb_max no longer gets cleared for a radix guest. This means that a HPT guest which loads some SLB entries, switches to radix mode, runs the guest using the old guest entry path (e.g., because the indep_threads_mode module parameter has been set to false), and then switches back to HPT mode would now see the old SLB entries being present, whereas previously it would have seen no SLB entries. To avoid changing guest-visible behaviour, this adds a store instruction to clear vcpu->arch.slb_max for a radix guest using the old guest entry path. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 426bdc59fde2..0bc1c8248050 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1539,8 +1539,12 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ .endr b guest_bypass -0: /* Sanitise radix guest SLB, see guest_exit_short_path comment. */ +0: /* + * Sanitise radix guest SLB, see guest_exit_short_path comment. + * We clear vcpu->arch.slb_max to match earlier behaviour. + */ li r0,0 + stw r0,VCPU_SLB_MAX(r9) slbmte r0,r0 li r4,1 slbmte r0,r4 From 72476aaa469179222b92c380de60c76b4cb9a318 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 11 Feb 2021 15:38:30 +1000 Subject: [PATCH 13/13] KVM: PPC: Book3S HV: Fix host radix SLB optimisation with hash guests Commit 68ad28a4cdd4 ("KVM: PPC: Book3S HV: Fix radix guest SLB side channel") incorrectly removed the radix host instruction patch to skip re-loading the host SLB entries when exiting from a hash guest. Restore it. Fixes: 68ad28a4cdd4 ("KVM: PPC: Book3S HV: Fix radix guest SLB side channel") Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0bc1c8248050..5e634db4809b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1525,6 +1525,9 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ stw r5,VCPU_SLB_MAX(r9) /* load host SLB entries */ +BEGIN_MMU_FTR_SECTION + b guest_bypass +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED