From 5cb100b38a7bf5bc5e892fa46998ebcbab925f37 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 5 May 2013 20:03:40 +0800 Subject: [PATCH 001/287] KVM: add missing misc_deregister() on error in kvm_init() Add the missing misc_deregister() before return from kvm_init() in the debugfs init error handling case. Signed-off-by: Wei Yongjun Signed-off-by: Gleb Natapov (cherry picked from commit afc2f792cdcb67f4257f0e68d10ee4a7b7eae57a) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 302681c4aa44..b547a1ceecbc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3181,6 +3181,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, out_undebugfs: unregister_syscore_ops(&kvm_syscore_ops); + misc_deregister(&kvm_dev); out_unreg: kvm_async_pf_deinit(); out_free: From 86882a4950ff8f054d0b89bb36655446a04b89c0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 14:31:01 +0100 Subject: [PATCH 002/287] ARM: KVM: move GIC/timer code to a common location As KVM/arm64 is looming on the horizon, it makes sense to move some of the common code to a single location in order to reduce duplication. The code could live anywhere. Actually, most of KVM is already built with a bunch of ugly ../../.. hacks in the various Makefiles, so we're not exactly talking about style here. But maybe it is time to start moving into a less ugly direction. The include files must be in a "public" location, as they are accessed from non-KVM files (arch/arm/kernel/asm-offsets.c). For this purpose, introduce two new locations: - virt/kvm/arm/ : x86 and ia64 already share the ioapic code in virt/kvm, so this could be seen as a (very ugly) precedent. - include/kvm/ : there is already an include/xen, and while the intent is slightly different, this seems as good a location as any Eventually, we should probably have independant Makefiles at every levels (just like everywhere else in the kernel), but this is just the first step. Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 7275acdfe29ba03ad2f6e150386900c4e2d43fb1) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 4 ++-- arch/arm/kvm/Makefile | 7 ++++--- .../asm/kvm_arch_timer.h => include/kvm/arm_arch_timer.h | 0 arch/arm/include/asm/kvm_vgic.h => include/kvm/arm_vgic.h | 0 {arch/arm/kvm => virt/kvm/arm}/arch_timer.c | 4 ++-- {arch/arm/kvm => virt/kvm/arm}/vgic.c | 0 6 files changed, 8 insertions(+), 7 deletions(-) rename arch/arm/include/asm/kvm_arch_timer.h => include/kvm/arm_arch_timer.h (100%) rename arch/arm/include/asm/kvm_vgic.h => include/kvm/arm_vgic.h (100%) rename {arch/arm/kvm => virt/kvm/arm}/arch_timer.c (99%) rename {arch/arm/kvm => virt/kvm/arm}/vgic.c (100%) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 57cb786a6203..ff5aaf10e6ec 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #define KVM_USER_MEM_SLOTS 32 @@ -38,7 +38,7 @@ #define KVM_NR_PAGE_SIZES 1 #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) -#include +#include struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 53c5ed83d16f..9184a491d172 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -14,10 +14,11 @@ CFLAGS_mmu.o := -I. AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) -kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +KVM := ../../../virt/kvm +kvm-arm-y = $(addprefix $(KVM)/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o -obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o -obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/include/kvm/arm_arch_timer.h similarity index 100% rename from arch/arm/include/asm/kvm_arch_timer.h rename to include/kvm/arm_arch_timer.h diff --git a/arch/arm/include/asm/kvm_vgic.h b/include/kvm/arm_vgic.h similarity index 100% rename from arch/arm/include/asm/kvm_vgic.h rename to include/kvm/arm_vgic.h diff --git a/arch/arm/kvm/arch_timer.c b/virt/kvm/arm/arch_timer.c similarity index 99% rename from arch/arm/kvm/arch_timer.c rename to virt/kvm/arm/arch_timer.c index c55b6089e923..2d00b2925780 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -25,8 +25,8 @@ #include #include -#include -#include +#include +#include static struct timecounter *timecounter; static struct workqueue_struct *wqueue; diff --git a/arch/arm/kvm/vgic.c b/virt/kvm/arm/vgic.c similarity index 100% rename from arch/arm/kvm/vgic.c rename to virt/kvm/arm/vgic.c From 7ce8a35ca784be99a50520476c1a34aad2755b7a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 14:31:02 +0100 Subject: [PATCH 003/287] KVM: get rid of $(addprefix ../../../virt/kvm/, ...) in Makefiles As requested by the KVM maintainers, remove the addprefix used to refer to the main KVM code from the arch code, and replace it with a KVM variable that does the same thing. Tested-by: Christian Borntraeger Cc: Paolo Bonzini Cc: Gleb Natapov Cc: Christoffer Dall Acked-by: Xiantao Zhang Cc: Tony Luck Cc: Fenghua Yu Cc: Alexander Graf Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Cornelia Huck Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 535cf7b3b13c7faed3dfabafb6598417de1129ca) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Makefile | 2 +- arch/ia64/kvm/Makefile | 7 ++++--- arch/powerpc/kvm/Makefile | 13 +++++++------ arch/s390/kvm/Makefile | 3 ++- arch/x86/kvm/Makefile | 13 +++++++------ 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 9184a491d172..d99bee4950e5 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) KVM := ../../../virt/kvm -kvm-arm-y = $(addprefix $(KVM)/, kvm_main.o coalesced_mmio.o) +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 1a4053789d01..18e45ec49bbf 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file) ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ +KVM := ../../../virt/kvm -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o) +common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \ + $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) -common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o) +common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o endif kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 422de3f4d46c..008cd856c5b5 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -5,9 +5,10 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm +KVM := ../../../virt/kvm -common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ - eventfd.o) +common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ + $(KVM)/eventfd.o CFLAGS_44x_tlb.o := -I. CFLAGS_e500_mmu.o := -I. @@ -53,7 +54,7 @@ kvm-e500mc-objs := \ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ - ../../../virt/kvm/coalesced_mmio.o \ + $(KVM)/coalesced_mmio.o \ fpu.o \ book3s_paired_singles.o \ book3s_pr.o \ @@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o kvm-book3s_64-module-objs := \ - ../../../virt/kvm/kvm_main.o \ - ../../../virt/kvm/eventfd.o \ + $(KVM)/kvm_main.o \ + $(KVM)/eventfd.o \ powerpc.o \ emulate.o \ book3s.o \ @@ -111,7 +112,7 @@ kvm-book3s_32-objs := \ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o -kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) +kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-objs := $(kvm-objs-m) $(kvm-objs-y) diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 8fe9d65a4585..40b4c6470f88 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,8 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) +KVM := ../../../virt/kvm +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d609e1d84048..bf4fb04d0112 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -5,12 +5,13 @@ CFLAGS_x86.o := -I. CFLAGS_svm.o := -I. CFLAGS_vmx.o := -I. -kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o eventfd.o \ - irqchip.o) -kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ - assigned-dev.o iommu.o) -kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) +KVM := ../../../virt/kvm + +kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ + $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ + $(KVM)/eventfd.o $(KVM)/irqchip.o +kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o +kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ i8254.o cpuid.o pmu.o From 9861d210e02d5fb6ffcd5ea992f62043105813b3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 Mar 2013 13:41:35 +0000 Subject: [PATCH 004/287] ARM: KVM: arch_timers: zero CNTVOFF upon return to host To use the virtual counters from the host, we need to ensure that CNTVOFF doesn't change unexpectedly. When we change to a guest, we replace the host's CNTVOFF, but we don't restore it when returning to the host. As the host sets CNTVOFF to zero, and never changes it, we can simply zero CNTVOFF when returning to the host. This patch adds said zeroing to the return to host path. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Acked-by: Santosh Shilimkar Acked-by: Christoffer Dall (cherry picked from commit f793c23ebbe5afd1cabf4a42a3a297022213756f) Signed-off-by: Christoffer Dall --- arch/arm/kvm/interrupts_head.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 2b44b95a86dd..6f18695a09cb 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -503,6 +503,10 @@ vcpu .req r0 @ vcpu pointer always in r0 add r5, vcpu, r4 strd r2, r3, [r5] + @ Ensure host CNTVCT == CNTPCT + mov r2, #0 + mcrr p15, 4, r2, r2, c14 @ CNTVOFF + 1: #endif @ Allow physical timer/counter access for the host From 16d32e6b5aad12c2566cd8c6918e1be3ecac06f5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 7 Dec 2012 18:40:43 +0000 Subject: [PATCH 005/287] arm64: KVM: HYP mode idmap support Add the necessary infrastructure for identity-mapped HYP page tables. Idmap-ed code must be in the ".hyp.idmap.text" linker section. The rest of the HYP ends up in ".hyp.text". Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 2240bbb697354f5617d95e3ee104ca61bb812507) Signed-off-by: Christoffer Dall --- arch/arm64/kernel/vmlinux.lds.S | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ce2d97255ba9..55d0e035205f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -17,6 +17,19 @@ ENTRY(stext) jiffies = jiffies_64; +#define HYPERVISOR_TEXT \ + /* \ + * Force the alignment to be compatible with \ + * the vectors requirements \ + */ \ + . = ALIGN(2048); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text) \ + VMLINUX_SYMBOL(__hyp_text_end) = .; + SECTIONS { /* @@ -48,6 +61,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + HYPERVISOR_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -102,3 +116,9 @@ SECTIONS STABS_DEBUG .comment 0 : { *(.comment) } } + +/* + * The HYP init code can't be more than a page long. + */ +ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), + "HYP init code too big") From a51ca39794b462370c0b4f46b360370f55303454 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 10:46:47 +0000 Subject: [PATCH 006/287] arm64: KVM: EL2 register definitions Define all the useful bitfields for EL2 registers. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 0369f6a34b9facd16eea4236518ca6f9cbc9e5ef) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 245 +++++++++++++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_arm.h diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h new file mode 100644 index 000000000000..a5f28e2720c7 --- /dev/null +++ b/arch/arm64/include/asm/kvm_arm.h @@ -0,0 +1,245 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_ARM_H__ +#define __ARM64_KVM_ARM_H__ + +#include + +/* Hyp Configuration Register (HCR) bits */ +#define HCR_ID (UL(1) << 33) +#define HCR_CD (UL(1) << 32) +#define HCR_RW_SHIFT 31 +#define HCR_RW (UL(1) << HCR_RW_SHIFT) +#define HCR_TRVM (UL(1) << 30) +#define HCR_HCD (UL(1) << 29) +#define HCR_TDZ (UL(1) << 28) +#define HCR_TGE (UL(1) << 27) +#define HCR_TVM (UL(1) << 26) +#define HCR_TTLB (UL(1) << 25) +#define HCR_TPU (UL(1) << 24) +#define HCR_TPC (UL(1) << 23) +#define HCR_TSW (UL(1) << 22) +#define HCR_TAC (UL(1) << 21) +#define HCR_TIDCP (UL(1) << 20) +#define HCR_TSC (UL(1) << 19) +#define HCR_TID3 (UL(1) << 18) +#define HCR_TID2 (UL(1) << 17) +#define HCR_TID1 (UL(1) << 16) +#define HCR_TID0 (UL(1) << 15) +#define HCR_TWE (UL(1) << 14) +#define HCR_TWI (UL(1) << 13) +#define HCR_DC (UL(1) << 12) +#define HCR_BSU (3 << 10) +#define HCR_BSU_IS (UL(1) << 10) +#define HCR_FB (UL(1) << 9) +#define HCR_VA (UL(1) << 8) +#define HCR_VI (UL(1) << 7) +#define HCR_VF (UL(1) << 6) +#define HCR_AMO (UL(1) << 5) +#define HCR_IMO (UL(1) << 4) +#define HCR_FMO (UL(1) << 3) +#define HCR_PTW (UL(1) << 2) +#define HCR_SWIO (UL(1) << 1) +#define HCR_VM (UL(1) << 0) + +/* + * The bits we set in HCR: + * RW: 64bit by default, can be overriden for 32bit VMs + * TAC: Trap ACTLR + * TSC: Trap SMC + * TSW: Trap cache operations by set/way + * TWI: Trap WFI + * TIDCP: Trap L2CTLR/L2ECTLR + * BSU_IS: Upgrade barriers to the inner shareable domain + * FB: Force broadcast of all maintainance operations + * AMO: Override CPSR.A and enable signaling with VA + * IMO: Override CPSR.I and enable signaling with VI + * FMO: Override CPSR.F and enable signaling with VF + * SWIO: Turn set/way invalidates into set/way clean+invalidate + */ +#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ + HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ + HCR_SWIO | HCR_TIDCP | HCR_RW) +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) + +/* Hyp System Control Register (SCTLR_EL2) bits */ +#define SCTLR_EL2_EE (1 << 25) +#define SCTLR_EL2_WXN (1 << 19) +#define SCTLR_EL2_I (1 << 12) +#define SCTLR_EL2_SA (1 << 3) +#define SCTLR_EL2_C (1 << 2) +#define SCTLR_EL2_A (1 << 1) +#define SCTLR_EL2_M 1 +#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \ + SCTLR_EL2_SA | SCTLR_EL2_I) + +/* TCR_EL2 Registers bits */ +#define TCR_EL2_TBI (1 << 20) +#define TCR_EL2_PS (7 << 16) +#define TCR_EL2_PS_40B (2 << 16) +#define TCR_EL2_TG0 (1 << 14) +#define TCR_EL2_SH0 (3 << 12) +#define TCR_EL2_ORGN0 (3 << 10) +#define TCR_EL2_IRGN0 (3 << 8) +#define TCR_EL2_T0SZ 0x3f +#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ + TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) + +#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) + +/* VTCR_EL2 Registers bits */ +#define VTCR_EL2_PS_MASK (7 << 16) +#define VTCR_EL2_PS_40B (2 << 16) +#define VTCR_EL2_TG0_MASK (1 << 14) +#define VTCR_EL2_TG0_4K (0 << 14) +#define VTCR_EL2_TG0_64K (1 << 14) +#define VTCR_EL2_SH0_MASK (3 << 12) +#define VTCR_EL2_SH0_INNER (3 << 12) +#define VTCR_EL2_ORGN0_MASK (3 << 10) +#define VTCR_EL2_ORGN0_WBWA (1 << 10) +#define VTCR_EL2_IRGN0_MASK (3 << 8) +#define VTCR_EL2_IRGN0_WBWA (1 << 8) +#define VTCR_EL2_SL0_MASK (3 << 6) +#define VTCR_EL2_SL0_LVL1 (1 << 6) +#define VTCR_EL2_T0SZ_MASK 0x3f +#define VTCR_EL2_T0SZ_40B 24 + +#ifdef CONFIG_ARM64_64K_PAGES +/* + * Stage2 translation configuration: + * 40bits output (PS = 2) + * 40bits input (T0SZ = 24) + * 64kB pages (TG0 = 1) + * 2 level page tables (SL = 1) + */ +#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \ + VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ + VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ + VTCR_EL2_T0SZ_40B) +#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) +#else +/* + * Stage2 translation configuration: + * 40bits output (PS = 2) + * 40bits input (T0SZ = 24) + * 4kB pages (TG0 = 0) + * 3 level page tables (SL = 1) + */ +#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \ + VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ + VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ + VTCR_EL2_T0SZ_40B) +#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) +#endif + +#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) +#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT (48LLU) +#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) + +/* Hyp System Trap Register */ +#define HSTR_EL2_TTEE (1 << 16) +#define HSTR_EL2_T(x) (1 << x) + +/* Hyp Coprocessor Trap Register */ +#define CPTR_EL2_TCPAC (1 << 31) +#define CPTR_EL2_TTA (1 << 20) +#define CPTR_EL2_TFP (1 << 10) + +/* Hyp Debug Configuration Register bits */ +#define MDCR_EL2_TDRA (1 << 11) +#define MDCR_EL2_TDOSA (1 << 10) +#define MDCR_EL2_TDA (1 << 9) +#define MDCR_EL2_TDE (1 << 8) +#define MDCR_EL2_HPME (1 << 7) +#define MDCR_EL2_TPM (1 << 6) +#define MDCR_EL2_TPMCR (1 << 5) +#define MDCR_EL2_HPMN_MASK (0x1F) + +/* Exception Syndrome Register (ESR) bits */ +#define ESR_EL2_EC_SHIFT (26) +#define ESR_EL2_EC (0x3fU << ESR_EL2_EC_SHIFT) +#define ESR_EL2_IL (1U << 25) +#define ESR_EL2_ISS (ESR_EL2_IL - 1) +#define ESR_EL2_ISV_SHIFT (24) +#define ESR_EL2_ISV (1U << ESR_EL2_ISV_SHIFT) +#define ESR_EL2_SAS_SHIFT (22) +#define ESR_EL2_SAS (3U << ESR_EL2_SAS_SHIFT) +#define ESR_EL2_SSE (1 << 21) +#define ESR_EL2_SRT_SHIFT (16) +#define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT) +#define ESR_EL2_SF (1 << 15) +#define ESR_EL2_AR (1 << 14) +#define ESR_EL2_EA (1 << 9) +#define ESR_EL2_CM (1 << 8) +#define ESR_EL2_S1PTW (1 << 7) +#define ESR_EL2_WNR (1 << 6) +#define ESR_EL2_FSC (0x3f) +#define ESR_EL2_FSC_TYPE (0x3c) + +#define ESR_EL2_CV_SHIFT (24) +#define ESR_EL2_CV (1U << ESR_EL2_CV_SHIFT) +#define ESR_EL2_COND_SHIFT (20) +#define ESR_EL2_COND (0xfU << ESR_EL2_COND_SHIFT) + + +#define FSC_FAULT (0x04) +#define FSC_PERM (0x0c) + +/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ +#define HPFAR_MASK (~0xFUL) + +#define ESR_EL2_EC_UNKNOWN (0x00) +#define ESR_EL2_EC_WFI (0x01) +#define ESR_EL2_EC_CP15_32 (0x03) +#define ESR_EL2_EC_CP15_64 (0x04) +#define ESR_EL2_EC_CP14_MR (0x05) +#define ESR_EL2_EC_CP14_LS (0x06) +#define ESR_EL2_EC_FP_ASIMD (0x07) +#define ESR_EL2_EC_CP10_ID (0x08) +#define ESR_EL2_EC_CP14_64 (0x0C) +#define ESR_EL2_EC_ILL_ISS (0x0E) +#define ESR_EL2_EC_SVC32 (0x11) +#define ESR_EL2_EC_HVC32 (0x12) +#define ESR_EL2_EC_SMC32 (0x13) +#define ESR_EL2_EC_SVC64 (0x15) +#define ESR_EL2_EC_HVC64 (0x16) +#define ESR_EL2_EC_SMC64 (0x17) +#define ESR_EL2_EC_SYS64 (0x18) +#define ESR_EL2_EC_IABT (0x20) +#define ESR_EL2_EC_IABT_HYP (0x21) +#define ESR_EL2_EC_PC_ALIGN (0x22) +#define ESR_EL2_EC_DABT (0x24) +#define ESR_EL2_EC_DABT_HYP (0x25) +#define ESR_EL2_EC_SP_ALIGN (0x26) +#define ESR_EL2_EC_FP_EXC32 (0x28) +#define ESR_EL2_EC_FP_EXC64 (0x2C) +#define ESR_EL2_EC_SERRROR (0x2F) +#define ESR_EL2_EC_BREAKPT (0x30) +#define ESR_EL2_EC_BREAKPT_HYP (0x31) +#define ESR_EL2_EC_SOFTSTP (0x32) +#define ESR_EL2_EC_SOFTSTP_HYP (0x33) +#define ESR_EL2_EC_WATCHPT (0x34) +#define ESR_EL2_EC_WATCHPT_HYP (0x35) +#define ESR_EL2_EC_BKPT32 (0x38) +#define ESR_EL2_EC_VECTOR32 (0x3A) +#define ESR_EL2_EC_BRK64 (0x3C) + +#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 + +#endif /* __ARM64_KVM_ARM_H__ */ From 44848d746b7f29803f7ac9579d594f368a1cad11 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 11:16:40 +0000 Subject: [PATCH 007/287] arm64: KVM: system register definitions for 64bit guests Define the saved/restored registers for 64bit guests. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit fd9fc9f73cc2070d2637a7ee082800a817fd45f3) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 68 ++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_asm.h diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h new file mode 100644 index 000000000000..591ac219964a --- /dev/null +++ b/arch/arm64/include/asm/kvm_asm.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_KVM_ASM_H__ +#define __ARM_KVM_ASM_H__ + +/* + * 0 is reserved as an invalid value. + * Order *must* be kept in sync with the hyp switch code. + */ +#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */ +#define CSSELR_EL1 2 /* Cache Size Selection Register */ +#define SCTLR_EL1 3 /* System Control Register */ +#define ACTLR_EL1 4 /* Auxilliary Control Register */ +#define CPACR_EL1 5 /* Coprocessor Access Control */ +#define TTBR0_EL1 6 /* Translation Table Base Register 0 */ +#define TTBR1_EL1 7 /* Translation Table Base Register 1 */ +#define TCR_EL1 8 /* Translation Control Register */ +#define ESR_EL1 9 /* Exception Syndrome Register */ +#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */ +#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */ +#define FAR_EL1 12 /* Fault Address Register */ +#define MAIR_EL1 13 /* Memory Attribute Indirection Register */ +#define VBAR_EL1 14 /* Vector Base Address Register */ +#define CONTEXTIDR_EL1 15 /* Context ID Register */ +#define TPIDR_EL0 16 /* Thread ID, User R/W */ +#define TPIDRRO_EL0 17 /* Thread ID, User R/O */ +#define TPIDR_EL1 18 /* Thread ID, Privileged */ +#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ +#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ +#define NR_SYS_REGS 21 + +#define ARM_EXCEPTION_IRQ 0 +#define ARM_EXCEPTION_TRAP 1 + +#ifndef __ASSEMBLY__ +struct kvm; +struct kvm_vcpu; + +extern char __kvm_hyp_init[]; +extern char __kvm_hyp_init_end[]; + +extern char __kvm_hyp_vector[]; + +extern char __kvm_hyp_code_start[]; +extern char __kvm_hyp_code_end[]; + +extern void __kvm_flush_vm_context(void); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); + +extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +#endif + +#endif /* __ARM_KVM_ASM_H__ */ From b04c4cdfff4b701f01f1dae2ada92b580cc281d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 13:27:52 +0000 Subject: [PATCH 008/287] arm64: KVM: Basic ESR_EL2 helpers and vcpu register access Implements helpers for dealing with the EL2 syndrome register as well as accessing the vcpu registers. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 83a4979483c8e597b69d4403794f87fea51fa549) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_emulate.h | 158 +++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_emulate.h diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h new file mode 100644 index 000000000000..6c1725e93b0b --- /dev/null +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/include/kvm_emulate.h + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_EMULATE_H__ +#define __ARM64_KVM_EMULATE_H__ + +#include +#include +#include +#include +#include + +void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); + +static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; +} + +static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; +} + +static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; +} + +static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) +{ + return false; /* 32bit? Bahhh... */ +} + +static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) +{ + return true; /* No conditionals on arm64 */ +} + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + *vcpu_pc(vcpu) += 4; +} + +static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) +{ +} + +static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; +} + +/* Get vcpu SPSR for current mode */ +static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; +} + +static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) +{ + u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; + + return mode != PSR_MODE_EL0t; +} + +static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.esr_el2; +} + +static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.far_el2; +} + +static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) +{ + return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; +} + +static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV); +} + +static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR); +} + +static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE); +} + +static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) +{ + return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT; +} + +static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA); +} + +static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW); +} + +static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +{ + return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT); +} + +/* This one is not specific to Data Abort */ +static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL); +} + +static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT; +} + +static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT; +} + +static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; +} + +#endif /* __ARM64_KVM_EMULATE_H__ */ From fbd17d89d0b567e7c5c12955648cf728f0c04169 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 17 Dec 2012 12:27:42 +0000 Subject: [PATCH 009/287] arm64: KVM: fault injection into a guest Implement the injection of a fault (undefined, data abort or prefetch abort) into a 64bit guest. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit aa8eff9bfbd531e0fcc8e68052f4ac545cd004c5) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/inject_fault.c | 126 ++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 arch/arm64/kvm/inject_fault.c diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c new file mode 100644 index 000000000000..54f656271266 --- /dev/null +++ b/arch/arm64/kvm/inject_fault.c @@ -0,0 +1,126 @@ +/* + * Fault injection for 64bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ + PSR_I_BIT | PSR_D_BIT) +#define EL1_EXCEPT_SYNC_OFFSET 0x200 + +static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_aarch32; + u32 esr = 0; + + is_aarch32 = vcpu_mode_is_32bit(vcpu); + + *vcpu_spsr(vcpu) = cpsr; + *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; + + vcpu_sys_reg(vcpu, FAR_EL1) = addr; + + /* + * Build an {i,d}abort, depending on the level and the + * instruction set. Report an external synchronous abort. + */ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= ESR_EL1_IL; + + /* + * Here, the guest runs in AArch64 mode when in EL1. If we get + * an AArch32 fault, it means we managed to trap an EL0 fault. + */ + if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) + esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT); + else + esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT); + + if (!is_iabt) + esr |= ESR_EL1_EC_DABT_EL0; + + vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT; +} + +static void inject_undef64(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT); + + *vcpu_spsr(vcpu) = cpsr; + *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; + + /* + * Build an unknown exception, depending on the instruction + * set. + */ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= ESR_EL1_IL; + + vcpu_sys_reg(vcpu, ESR_EL1) = esr; +} + +/** + * kvm_inject_dabt - inject a data abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt64(vcpu, false, addr); +} + +/** + * kvm_inject_pabt - inject a prefetch abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt64(vcpu, true, addr); +} + +/** + * kvm_inject_undefined - inject an undefined instruction into the guest + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_undefined(struct kvm_vcpu *vcpu) +{ + inject_undef64(vcpu); +} From bef26a85389996c6f9c79f09d2ba09da0cd46351 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 15:35:24 +0000 Subject: [PATCH 010/287] arm64: KVM: architecture specific MMU backend Define the arm64 specific MMU backend: - HYP/kernel VA offset - S2 4/64kB definitions - S2 page table populating and flushing - icache cleaning Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 37c437532b0126d1df5685080db9cecf3d918175) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_mmu.h | 135 +++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_mmu.h diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h new file mode 100644 index 000000000000..efe609c6a3c9 --- /dev/null +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_MMU_H__ +#define __ARM64_KVM_MMU_H__ + +#include +#include + +/* + * As we only have the TTBR0_EL2 register, we cannot express + * "negative" addresses. This makes it impossible to directly share + * mappings with the kernel. + * + * Instead, give the HYP mode its own VA region at a fixed offset from + * the kernel by just masking the top bits (which are all ones for a + * kernel address). + */ +#define HYP_PAGE_OFFSET_SHIFT VA_BITS +#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) +#define HYP_PAGE_OFFSET (PAGE_OFFSET & HYP_PAGE_OFFSET_MASK) + +/* + * Our virtual mapping for the idmap-ed MMU-enable code. Must be + * shared across all the page-tables. Conveniently, we use the last + * possible page, where no kernel mapping will ever exist. + */ +#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) + +#ifdef __ASSEMBLY__ + +/* + * Convert a kernel VA into a HYP VA. + * reg: VA to be converted. + */ +.macro kern_hyp_va reg + and \reg, \reg, #HYP_PAGE_OFFSET_MASK +.endm + +#else + +#include +#include + +#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) + +/* + * Align KVM with the kernel's view of physical memory. Should be + * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration. + */ +#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT +#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) +#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) + +/* Make sure we get the right size, and thus the right alignment */ +#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT)) +#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) + +int create_hyp_mappings(void *from, void *to); +int create_hyp_io_mappings(void *from, void *to, phys_addr_t); +void free_boot_hyp_pgd(void); +void free_hyp_pgds(void); + +int kvm_alloc_stage2_pgd(struct kvm *kvm); +void kvm_free_stage2_pgd(struct kvm *kvm); +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, + phys_addr_t pa, unsigned long size); + +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); + +phys_addr_t kvm_mmu_get_httbr(void); +phys_addr_t kvm_mmu_get_boot_httbr(void); +phys_addr_t kvm_get_idmap_vector(void); +int kvm_mmu_init(void); +void kvm_clear_hyp_idmap(void); + +#define kvm_set_pte(ptep, pte) set_pte(ptep, pte) + +static inline bool kvm_is_write_fault(unsigned long esr) +{ + unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT; + + if (esr_ec == ESR_EL2_EC_IABT) + return false; + + if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR)) + return false; + + return true; +} + +static inline void kvm_clean_dcache_area(void *addr, size_t size) {} +static inline void kvm_clean_pgd(pgd_t *pgd) {} +static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} +static inline void kvm_clean_pte(pte_t *pte) {} +static inline void kvm_clean_pte_entry(pte_t *pte) {} + +static inline void kvm_set_s2pte_writable(pte_t *pte) +{ + pte_val(*pte) |= PTE_S2_RDWR; +} + +struct kvm; + +static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +{ + if (!icache_is_aliasing()) { /* PIPT */ + unsigned long hva = gfn_to_hva(kvm, gfn); + flush_icache_range(hva, hva + PAGE_SIZE); + } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ + /* any kind of VIPT cache */ + __flush_icache_all(); + } +} + +#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) + +#endif /* __ASSEMBLY__ */ +#endif /* __ARM64_KVM_MMU_H__ */ From becdc5b63f089f0664fc1381fa168b7a7dce09dc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:29:28 +0000 Subject: [PATCH 011/287] arm64: KVM: user space interface Provide the kvm.h file that defines the user space visible interface. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 54f81d0eb93896da73d1636bca84cf90f52cabdf) Signed-off-by: Christoffer Dall --- arch/arm64/include/uapi/asm/kvm.h | 117 ++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 arch/arm64/include/uapi/asm/kvm.h diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h new file mode 100644 index 000000000000..4e64570a20c9 --- /dev/null +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/include/uapi/asm/kvm.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_KVM_H__ +#define __ARM_KVM_H__ + +#define KVM_SPSR_EL1 0 +#define KVM_NR_SPSR 1 + +#ifndef __ASSEMBLY__ +#include +#include + +#define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_IRQ_LINE + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +struct kvm_regs { + struct user_pt_regs regs; /* sp = sp_el0 */ + + __u64 sp_el1; + __u64 elr_el1; + + __u64 spsr[KVM_NR_SPSR]; + + struct user_fpsimd_state fp_regs; +}; + +/* Supported Processor Types */ +#define KVM_ARM_TARGET_AEM_V8 0 +#define KVM_ARM_TARGET_FOUNDATION_V8 1 +#define KVM_ARM_TARGET_CORTEX_A57 2 + +#define KVM_ARM_NUM_TARGETS 3 + +/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ +#define KVM_ARM_DEVICE_TYPE_SHIFT 0 +#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_ID_SHIFT 16 +#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) + +/* Supported device IDs */ +#define KVM_ARM_DEVICE_VGIC_V2 0 + +/* Supported VGIC address types */ +#define KVM_VGIC_V2_ADDR_TYPE_DIST 0 +#define KVM_VGIC_V2_ADDR_TYPE_CPU 1 + +#define KVM_VGIC_V2_DIST_SIZE 0x1000 +#define KVM_VGIC_V2_CPU_SIZE 0x2000 + +struct kvm_vcpu_init { + __u32 target; + __u32 features[7]; +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { +}; + +struct kvm_guest_debug_arch { +}; + +struct kvm_debug_exit_arch { +}; + +struct kvm_sync_regs { +}; + +struct kvm_arch_memory_slot { +}; + +/* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_TYPE_SHIFT 24 +#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_VCPU_SHIFT 16 +#define KVM_ARM_IRQ_VCPU_MASK 0xff +#define KVM_ARM_IRQ_NUM_SHIFT 0 +#define KVM_ARM_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_ARM_IRQ_TYPE_CPU 0 +#define KVM_ARM_IRQ_TYPE_SPI 1 +#define KVM_ARM_IRQ_TYPE_PPI 2 + +/* out-of-kernel GIC cpu interrupt injection irq_number field */ +#define KVM_ARM_IRQ_CPU_IRQ 0 +#define KVM_ARM_IRQ_CPU_FIQ 1 + +/* Highest supported SPI, from VGIC_NR_IRQS */ +#define KVM_ARM_IRQ_GIC_MAX 127 + +#endif + +#endif /* __ARM_KVM_H__ */ From 068d803462e03f87953424ea6f3910578f3f293e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:15:34 +0000 Subject: [PATCH 012/287] arm64: KVM: system register handling Provide 64bit system register handling, modeled after the cp15 handling for ARM. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 7c8c5e6a9101ea57a1c2c9faff0917e79251a21e) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_coproc.h | 51 ++ arch/arm64/include/uapi/asm/kvm.h | 29 + arch/arm64/kvm/sys_regs.c | 883 ++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs.h | 138 +++++ include/uapi/linux/kvm.h | 1 + 5 files changed, 1102 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_coproc.h create mode 100644 arch/arm64/kvm/sys_regs.c create mode 100644 arch/arm64/kvm/sys_regs.h diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h new file mode 100644 index 000000000000..9b4477acb554 --- /dev/null +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/include/asm/kvm_coproc.h + * Copyright (C) 2012 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_COPROC_H__ +#define __ARM64_KVM_COPROC_H__ + +#include + +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); + +struct kvm_sys_reg_table { + const struct sys_reg_desc *table; + size_t num; +}; + +struct kvm_sys_reg_target_table { + struct kvm_sys_reg_table table64; +}; + +void kvm_register_target_sys_reg_table(unsigned int target, + struct kvm_sys_reg_target_table *table); + +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); + +#define kvm_coproc_table_init kvm_sys_reg_table_init +void kvm_sys_reg_table_init(void); + +struct kvm_one_reg; +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_COPROC_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 4e64570a20c9..ebac919dc0ca 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -92,6 +92,35 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* If you need to interpret the index values, here is the key: */ +#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 +#define KVM_REG_ARM_COPROC_SHIFT 16 + +/* Normal registers are mapped as coprocessor 16. */ +#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / sizeof(__u32)) + +/* Some registers need more space to represent values. */ +#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00 +#define KVM_REG_ARM_DEMUX_ID_SHIFT 8 +#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT) +#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF +#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0 + +/* AArch64 system registers */ +#define KVM_REG_ARM64_SYSREG (0x0013 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM64_SYSREG_OP0_MASK 0x000000000000c000 +#define KVM_REG_ARM64_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM64_SYSREG_OP1_MASK 0x0000000000003800 +#define KVM_REG_ARM64_SYSREG_OP1_SHIFT 11 +#define KVM_REG_ARM64_SYSREG_CRN_MASK 0x0000000000000780 +#define KVM_REG_ARM64_SYSREG_CRN_SHIFT 7 +#define KVM_REG_ARM64_SYSREG_CRM_MASK 0x0000000000000078 +#define KVM_REG_ARM64_SYSREG_CRM_SHIFT 3 +#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 +#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 #define KVM_ARM_IRQ_TYPE_MASK 0xff diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c new file mode 100644 index 000000000000..52fff0ae3442 --- /dev/null +++ b/arch/arm64/kvm/sys_regs.c @@ -0,0 +1,883 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/coproc.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell + * Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sys_regs.h" + +/* + * All of this file is extremly similar to the ARM coproc.c, but the + * types are different. My gut feeling is that it should be pretty + * easy to merge, but that would be an ABI breakage -- again. VFP + * would also need to be abstracted. + */ + +/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ +static u32 cache_levels; + +/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ +#define CSSELR_MAX 12 + +/* Which cache CCSIDR represents depends on CSSELR value. */ +static u32 get_ccsidr(u32 csselr) +{ + u32 ccsidr; + + /* Make sure noone else changes CSSELR during this! */ + local_irq_disable(); + /* Put value into CSSELR */ + asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + isb(); + /* Read result out of CCSIDR */ + asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr)); + local_irq_enable(); + + return ccsidr; +} + +static void do_dc_cisw(u32 val) +{ + asm volatile("dc cisw, %x0" : : "r" (val)); + dsb(); +} + +static void do_dc_csw(u32 val) +{ + asm volatile("dc csw, %x0" : : "r" (val)); + dsb(); +} + +/* See note at ARM ARM B1.14.4 */ +static bool access_dcsw(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + unsigned long val; + int cpu; + + if (!p->is_write) + return read_from_write_only(vcpu, p); + + cpu = get_cpu(); + + cpumask_setall(&vcpu->arch.require_dcache_flush); + cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); + + /* If we were already preempted, take the long way around */ + if (cpu != vcpu->arch.last_pcpu) { + flush_cache_all(); + goto done; + } + + val = *vcpu_reg(vcpu, p->Rt); + + switch (p->CRm) { + case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ + case 14: /* DCCISW */ + do_dc_cisw(val); + break; + + case 10: /* DCCSW */ + do_dc_csw(val); + break; + } + +done: + put_cpu(); + + return true; +} + +/* + * We could trap ID_DFR0 and tell the guest we don't support performance + * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was + * NAKed, so it will read the PMCR anyway. + * + * Therefore we tell the guest we have 0 counters. Unfortunately, we + * must always support PMCCNTR (the cycle counter): we just RAZ/WI for + * all PM registers, which doesn't crash the guest kernel at least. + */ +static bool pm_fake(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + else + return read_zero(vcpu, p); +} + +static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 amair; + + asm volatile("mrs %0, amair_el1\n" : "=r" (amair)); + vcpu_sys_reg(vcpu, AMAIR_EL1) = amair; +} + +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + /* + * Simply map the vcpu_id into the Aff0 field of the MPIDR. + */ + vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); +} + +/* + * Architected system registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + */ +static const struct sys_reg_desc sys_reg_descs[] = { + /* DC ISW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010), + access_dcsw }, + /* DC CSW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010), + access_dcsw }, + /* DC CISW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), + access_dcsw }, + + /* MPIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101), + NULL, reset_mpidr, MPIDR_EL1 }, + /* SCTLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), + NULL, reset_val, SCTLR_EL1, 0x00C50078 }, + /* CPACR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), + NULL, reset_val, CPACR_EL1, 0 }, + /* TTBR0_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, TTBR0_EL1 }, + /* TTBR1_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), + NULL, reset_unknown, TTBR1_EL1 }, + /* TCR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), + NULL, reset_val, TCR_EL1, 0 }, + + /* AFSR0_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), + NULL, reset_unknown, AFSR0_EL1 }, + /* AFSR1_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), + NULL, reset_unknown, AFSR1_EL1 }, + /* ESR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), + NULL, reset_unknown, ESR_EL1 }, + /* FAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, FAR_EL1 }, + + /* PMINTENSET_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), + pm_fake }, + /* PMINTENCLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), + pm_fake }, + + /* MAIR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), + NULL, reset_unknown, MAIR_EL1 }, + /* AMAIR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), + NULL, reset_amair_el1, AMAIR_EL1 }, + + /* VBAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), + NULL, reset_val, VBAR_EL1, 0 }, + /* CONTEXTIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), + NULL, reset_val, CONTEXTIDR_EL1, 0 }, + /* TPIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), + NULL, reset_unknown, TPIDR_EL1 }, + + /* CNTKCTL_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000), + NULL, reset_val, CNTKCTL_EL1, 0}, + + /* CSSELR_EL1 */ + { Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, CSSELR_EL1 }, + + /* PMCR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), + pm_fake }, + /* PMCNTENSET_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), + pm_fake }, + /* PMCNTENCLR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), + pm_fake }, + /* PMOVSCLR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), + pm_fake }, + /* PMSWINC_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), + pm_fake }, + /* PMSELR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), + pm_fake }, + /* PMCEID0_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), + pm_fake }, + /* PMCEID1_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), + pm_fake }, + /* PMCCNTR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), + pm_fake }, + /* PMXEVTYPER_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), + pm_fake }, + /* PMXEVCNTR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), + pm_fake }, + /* PMUSERENR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), + pm_fake }, + /* PMOVSSET_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), + pm_fake }, + + /* TPIDR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), + NULL, reset_unknown, TPIDR_EL0 }, + /* TPIDRRO_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), + NULL, reset_unknown, TPIDRRO_EL0 }, +}; + +/* Target specific emulation tables */ +static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; + +void kvm_register_target_sys_reg_table(unsigned int target, + struct kvm_sys_reg_target_table *table) +{ + target_tables[target] = table; +} + +/* Get specific register table for this target. */ +static const struct sys_reg_desc *get_target_table(unsigned target, size_t *num) +{ + struct kvm_sys_reg_target_table *table; + + table = target_tables[target]; + *num = table->table64.num; + return table->table64.table; +} + +static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, + const struct sys_reg_desc table[], + unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + const struct sys_reg_desc *r = &table[i]; + + if (params->Op0 != r->Op0) + continue; + if (params->Op1 != r->Op1) + continue; + if (params->CRn != r->CRn) + continue; + if (params->CRm != r->CRm) + continue; + if (params->Op2 != r->Op2) + continue; + + return r; + } + return NULL; +} + +static int emulate_sys_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + size_t num; + const struct sys_reg_desc *table, *r; + + table = get_target_table(vcpu->arch.target, &num); + + /* Search target-specific then generic table. */ + r = find_reg(params, table, num); + if (!r) + r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + if (likely(r)) { + /* + * Not having an accessor means that we have + * configured a trap that we don't know how to + * handle. This certainly qualifies as a gross bug + * that should be fixed right away. + */ + BUG_ON(!r->access); + + if (likely(r->access(vcpu, params, r))) { + /* Skip instruction, since it was emulated */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + /* If access function fails, it should complain. */ + } else { + kvm_err("Unsupported guest sys_reg access at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + } + kvm_inject_undefined(vcpu); + return 1; +} + +static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *table, size_t num) +{ + unsigned long i; + + for (i = 0; i < num; i++) + if (table[i].reset) + table[i].reset(vcpu, &table[i]); +} + +/** + * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + unsigned long esr = kvm_vcpu_get_hsr(vcpu); + + params.Op0 = (esr >> 20) & 3; + params.Op1 = (esr >> 14) & 0x7; + params.CRn = (esr >> 10) & 0xf; + params.CRm = (esr >> 1) & 0xf; + params.Op2 = (esr >> 17) & 0x7; + params.Rt = (esr >> 5) & 0x1f; + params.is_write = !(esr & 1); + + return emulate_sys_reg(vcpu, ¶ms); +} + +/****************************************************************************** + * Userspace API + *****************************************************************************/ + +static bool index_to_params(u64 id, struct sys_reg_params *params) +{ + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U64: + /* Any unused index bits means it's not valid. */ + if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK + | KVM_REG_ARM_COPROC_MASK + | KVM_REG_ARM64_SYSREG_OP0_MASK + | KVM_REG_ARM64_SYSREG_OP1_MASK + | KVM_REG_ARM64_SYSREG_CRN_MASK + | KVM_REG_ARM64_SYSREG_CRM_MASK + | KVM_REG_ARM64_SYSREG_OP2_MASK)) + return false; + params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK) + >> KVM_REG_ARM64_SYSREG_OP0_SHIFT); + params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK) + >> KVM_REG_ARM64_SYSREG_OP1_SHIFT); + params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK) + >> KVM_REG_ARM64_SYSREG_CRN_SHIFT); + params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK) + >> KVM_REG_ARM64_SYSREG_CRM_SHIFT); + params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK) + >> KVM_REG_ARM64_SYSREG_OP2_SHIFT); + return true; + default: + return false; + } +} + +/* Decode an index value, and find the sys_reg_desc entry. */ +static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, + u64 id) +{ + size_t num; + const struct sys_reg_desc *table, *r; + struct sys_reg_params params; + + /* We only do sys_reg for now. */ + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) + return NULL; + + if (!index_to_params(id, ¶ms)) + return NULL; + + table = get_target_table(vcpu->arch.target, &num); + r = find_reg(¶ms, table, num); + if (!r) + r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + /* Not saved in the sys_reg array? */ + if (r && !r->reg) + r = NULL; + + return r; +} + +/* + * These are the invariant sys_reg registers: we let the guest see the + * host versions of these, so they're part of the guest state. + * + * A future CPU may provide a mechanism to present different values to + * the guest, or a future kvm may trap them. + */ + +#define FUNCTION_INVARIANT(reg) \ + static void get_##reg(struct kvm_vcpu *v, \ + const struct sys_reg_desc *r) \ + { \ + u64 val; \ + \ + asm volatile("mrs %0, " __stringify(reg) "\n" \ + : "=r" (val)); \ + ((struct sys_reg_desc *)r)->val = val; \ + } + +FUNCTION_INVARIANT(midr_el1) +FUNCTION_INVARIANT(ctr_el0) +FUNCTION_INVARIANT(revidr_el1) +FUNCTION_INVARIANT(id_pfr0_el1) +FUNCTION_INVARIANT(id_pfr1_el1) +FUNCTION_INVARIANT(id_dfr0_el1) +FUNCTION_INVARIANT(id_afr0_el1) +FUNCTION_INVARIANT(id_mmfr0_el1) +FUNCTION_INVARIANT(id_mmfr1_el1) +FUNCTION_INVARIANT(id_mmfr2_el1) +FUNCTION_INVARIANT(id_mmfr3_el1) +FUNCTION_INVARIANT(id_isar0_el1) +FUNCTION_INVARIANT(id_isar1_el1) +FUNCTION_INVARIANT(id_isar2_el1) +FUNCTION_INVARIANT(id_isar3_el1) +FUNCTION_INVARIANT(id_isar4_el1) +FUNCTION_INVARIANT(id_isar5_el1) +FUNCTION_INVARIANT(clidr_el1) +FUNCTION_INVARIANT(aidr_el1) + +/* ->val is filled in by kvm_sys_reg_table_init() */ +static struct sys_reg_desc invariant_sys_regs[] = { + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, get_midr_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110), + NULL, get_revidr_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000), + NULL, get_id_pfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001), + NULL, get_id_pfr1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010), + NULL, get_id_dfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011), + NULL, get_id_afr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100), + NULL, get_id_mmfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101), + NULL, get_id_mmfr1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110), + NULL, get_id_mmfr2_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111), + NULL, get_id_mmfr3_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), + NULL, get_id_isar0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001), + NULL, get_id_isar1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), + NULL, get_id_isar2_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011), + NULL, get_id_isar3_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100), + NULL, get_id_isar4_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101), + NULL, get_id_isar5_el1 }, + { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001), + NULL, get_clidr_el1 }, + { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111), + NULL, get_aidr_el1 }, + { Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001), + NULL, get_ctr_el0 }, +}; + +static int reg_from_user(void *val, const void __user *uaddr, u64 id) +{ + /* This Just Works because we are little endian. */ + if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) + return -EFAULT; + return 0; +} + +static int reg_to_user(void __user *uaddr, const void *val, u64 id) +{ + /* This Just Works because we are little endian. */ + if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) + return -EFAULT; + return 0; +} + +static int get_invariant_sys_reg(u64 id, void __user *uaddr) +{ + struct sys_reg_params params; + const struct sys_reg_desc *r; + + if (!index_to_params(id, ¶ms)) + return -ENOENT; + + r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); + if (!r) + return -ENOENT; + + return reg_to_user(uaddr, &r->val, id); +} + +static int set_invariant_sys_reg(u64 id, void __user *uaddr) +{ + struct sys_reg_params params; + const struct sys_reg_desc *r; + int err; + u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ + + if (!index_to_params(id, ¶ms)) + return -ENOENT; + r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); + if (!r) + return -ENOENT; + + err = reg_from_user(&val, uaddr, id); + if (err) + return err; + + /* This is what we mean by invariant: you can't change it. */ + if (r->val != val) + return -EINVAL; + + return 0; +} + +static bool is_valid_cache(u32 val) +{ + u32 level, ctype; + + if (val >= CSSELR_MAX) + return -ENOENT; + + /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ + level = (val >> 1); + ctype = (cache_levels >> (level * 3)) & 7; + + switch (ctype) { + case 0: /* No cache */ + return false; + case 1: /* Instruction cache only */ + return (val & 1); + case 2: /* Data cache only */ + case 4: /* Unified cache */ + return !(val & 1); + case 3: /* Separate instruction and data caches */ + return true; + default: /* Reserved: we can't know instruction or data. */ + return false; + } +} + +static int demux_c15_get(u64 id, void __user *uaddr) +{ + u32 val; + u32 __user *uval = uaddr; + + /* Fail if we have unknown bits set. */ + if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK + | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1))) + return -ENOENT; + + switch (id & KVM_REG_ARM_DEMUX_ID_MASK) { + case KVM_REG_ARM_DEMUX_ID_CCSIDR: + if (KVM_REG_SIZE(id) != 4) + return -ENOENT; + val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) + >> KVM_REG_ARM_DEMUX_VAL_SHIFT; + if (!is_valid_cache(val)) + return -ENOENT; + + return put_user(get_ccsidr(val), uval); + default: + return -ENOENT; + } +} + +static int demux_c15_set(u64 id, void __user *uaddr) +{ + u32 val, newval; + u32 __user *uval = uaddr; + + /* Fail if we have unknown bits set. */ + if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK + | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1))) + return -ENOENT; + + switch (id & KVM_REG_ARM_DEMUX_ID_MASK) { + case KVM_REG_ARM_DEMUX_ID_CCSIDR: + if (KVM_REG_SIZE(id) != 4) + return -ENOENT; + val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) + >> KVM_REG_ARM_DEMUX_VAL_SHIFT; + if (!is_valid_cache(val)) + return -ENOENT; + + if (get_user(newval, uval)) + return -EFAULT; + + /* This is also invariant: you can't change it. */ + if (newval != get_ccsidr(val)) + return -EINVAL; + return 0; + default: + return -ENOENT; + } +} + +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + const struct sys_reg_desc *r; + void __user *uaddr = (void __user *)(unsigned long)reg->addr; + + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return demux_c15_get(reg->id, uaddr); + + if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) + return -ENOENT; + + r = index_to_sys_reg_desc(vcpu, reg->id); + if (!r) + return get_invariant_sys_reg(reg->id, uaddr); + + return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); +} + +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + const struct sys_reg_desc *r; + void __user *uaddr = (void __user *)(unsigned long)reg->addr; + + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return demux_c15_set(reg->id, uaddr); + + if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) + return -ENOENT; + + r = index_to_sys_reg_desc(vcpu, reg->id); + if (!r) + return set_invariant_sys_reg(reg->id, uaddr); + + return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); +} + +static unsigned int num_demux_regs(void) +{ + unsigned int i, count = 0; + + for (i = 0; i < CSSELR_MAX; i++) + if (is_valid_cache(i)) + count++; + + return count; +} + +static int write_demux_regids(u64 __user *uindices) +{ + u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; + unsigned int i; + + val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; + for (i = 0; i < CSSELR_MAX; i++) { + if (!is_valid_cache(i)) + continue; + if (put_user(val | i, uindices)) + return -EFAULT; + uindices++; + } + return 0; +} + +static u64 sys_reg_to_index(const struct sys_reg_desc *reg) +{ + return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | + KVM_REG_ARM64_SYSREG | + (reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) | + (reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) | + (reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) | + (reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) | + (reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT)); +} + +static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) +{ + if (!*uind) + return true; + + if (put_user(sys_reg_to_index(reg), *uind)) + return false; + + (*uind)++; + return true; +} + +/* Assumed ordered tables, see kvm_sys_reg_table_init. */ +static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) +{ + const struct sys_reg_desc *i1, *i2, *end1, *end2; + unsigned int total = 0; + size_t num; + + /* We check for duplicates here, to allow arch-specific overrides. */ + i1 = get_target_table(vcpu->arch.target, &num); + end1 = i1 + num; + i2 = sys_reg_descs; + end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); + + BUG_ON(i1 == end1 || i2 == end2); + + /* Walk carefully, as both tables may refer to the same register. */ + while (i1 || i2) { + int cmp = cmp_sys_reg(i1, i2); + /* target-specific overrides generic entry. */ + if (cmp <= 0) { + /* Ignore registers we trap but don't save. */ + if (i1->reg) { + if (!copy_reg_to_user(i1, &uind)) + return -EFAULT; + total++; + } + } else { + /* Ignore registers we trap but don't save. */ + if (i2->reg) { + if (!copy_reg_to_user(i2, &uind)) + return -EFAULT; + total++; + } + } + + if (cmp <= 0 && ++i1 == end1) + i1 = NULL; + if (cmp >= 0 && ++i2 == end2) + i2 = NULL; + } + return total; +} + +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu) +{ + return ARRAY_SIZE(invariant_sys_regs) + + num_demux_regs() + + walk_sys_regs(vcpu, (u64 __user *)NULL); +} + +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + int err; + + /* Then give them all the invariant registers' indices. */ + for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) { + if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices)) + return -EFAULT; + uindices++; + } + + err = walk_sys_regs(vcpu, uindices); + if (err < 0) + return err; + uindices += err; + + return write_demux_regids(uindices); +} + +void kvm_sys_reg_table_init(void) +{ + unsigned int i; + struct sys_reg_desc clidr; + + /* Make sure tables are unique and in order. */ + for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++) + BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0); + + /* We abuse the reset function to overwrite the table itself. */ + for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) + invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]); + + /* + * CLIDR format is awkward, so clean it up. See ARM B4.1.20: + * + * If software reads the Cache Type fields from Ctype1 + * upwards, once it has seen a value of 0b000, no caches + * exist at further-out levels of the hierarchy. So, for + * example, if Ctype3 is the first Cache Type field with a + * value of 0b000, the values of Ctype4 to Ctype7 must be + * ignored. + */ + get_clidr_el1(NULL, &clidr); /* Ugly... */ + cache_levels = clidr.val; + for (i = 0; i < 7; i++) + if (((cache_levels >> (i*3)) & 7) == 0) + break; + /* Clear all higher bits. */ + cache_levels &= (1 << (i*3))-1; +} + +/** + * kvm_reset_sys_regs - sets system registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architecturally defined reset values. + */ +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) +{ + size_t num; + const struct sys_reg_desc *table; + + /* Catch someone adding a register without putting in reset entry. */ + memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs)); + + /* Generic chip reset first (so target could override). */ + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + table = get_target_table(vcpu->arch.target, &num); + reset_sys_reg_descs(vcpu, table, num); + + for (num = 1; num < NR_SYS_REGS; num++) + if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242) + panic("Didn't reset vcpu_sys_reg(%zi)", num); +} diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h new file mode 100644 index 000000000000..d50d3722998e --- /dev/null +++ b/arch/arm64/kvm/sys_regs.h @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/coproc.h + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__ +#define __ARM64_KVM_SYS_REGS_LOCAL_H__ + +struct sys_reg_params { + u8 Op0; + u8 Op1; + u8 CRn; + u8 CRm; + u8 Op2; + u8 Rt; + bool is_write; +}; + +struct sys_reg_desc { + /* MRS/MSR instruction which accesses it. */ + u8 Op0; + u8 Op1; + u8 CRn; + u8 CRm; + u8 Op2; + + /* Trapped access from guest, if non-NULL. */ + bool (*access)(struct kvm_vcpu *, + const struct sys_reg_params *, + const struct sys_reg_desc *); + + /* Initialization for vcpu. */ + void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *); + + /* Index into sys_reg[], or 0 if we don't need to save it. */ + int reg; + + /* Value (usually reset value) */ + u64 val; +}; + +static inline void print_sys_reg_instr(const struct sys_reg_params *p) +{ + /* Look, we even formatted it for you to paste into the table! */ + kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", + p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read"); +} + +static inline bool ignore_write(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p) +{ + return true; +} + +static inline bool read_zero(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p) +{ + *vcpu_reg(vcpu, p->Rt) = 0; + return true; +} + +static inline bool write_to_read_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + kvm_debug("sys_reg write to read-only register at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + return false; +} + +static inline bool read_from_write_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + kvm_debug("sys_reg read to write-only register at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + return false; +} + +/* Reset functions */ +static inline void reset_unknown(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + BUG_ON(!r->reg); + BUG_ON(r->reg >= NR_SYS_REGS); + vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; +} + +static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + BUG_ON(!r->reg); + BUG_ON(r->reg >= NR_SYS_REGS); + vcpu_sys_reg(vcpu, r->reg) = r->val; +} + +static inline int cmp_sys_reg(const struct sys_reg_desc *i1, + const struct sys_reg_desc *i2) +{ + BUG_ON(i1 == i2); + if (!i1) + return 1; + else if (!i2) + return -1; + if (i1->Op0 != i2->Op0) + return i1->Op0 - i2->Op0; + if (i1->Op1 != i2->Op1) + return i1->Op1 - i2->Op1; + if (i1->CRn != i2->CRn) + return i1->CRn - i2->CRn; + if (i1->CRm != i2->CRm) + return i1->CRm - i2->CRm; + return i1->Op2 - i2->Op2; +} + + +#define Op0(_x) .Op0 = _x +#define Op1(_x) .Op1 = _x +#define CRn(_x) .CRn = _x +#define CRm(_x) .CRm = _x +#define Op2(_x) .Op2 = _x + +#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d88c8ee00c8b..97277d333e82 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -783,6 +783,7 @@ struct kvm_dirty_tlb { #define KVM_REG_IA64 0x3000000000000000ULL #define KVM_REG_ARM 0x4000000000000000ULL #define KVM_REG_S390 0x5000000000000000ULL +#define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 From 3107348ebf0b00722a27b7a4a32c1f4c34fdb1e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 17:30:48 +0000 Subject: [PATCH 013/287] arm64: KVM: CPU specific system registers handling Add the support code for CPU specific system registers. Not much here yet. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit b990a9d3152bddca62cc1f8bf80518430b98737b) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs_generic_v8.c | 85 ++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 arch/arm64/kvm/sys_regs_generic_v8.c diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c new file mode 100644 index 000000000000..d4e803907312 --- /dev/null +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Based on arch/arm/kvm/coproc_a15.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell + * Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sys_regs.h" + +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1); + return true; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 actlr; + + asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr)); + vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr; +} + +/* + * Implementation specific sys-reg registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + */ +static const struct sys_reg_desc genericv8_sys_regs[] = { + /* ACTLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), + access_actlr, reset_actlr, ACTLR_EL1 }, +}; + +static struct kvm_sys_reg_target_table genericv8_target_table = { + .table64 = { + .table = genericv8_sys_regs, + .num = ARRAY_SIZE(genericv8_sys_regs), + }, +}; + +static int __init sys_reg_genericv8_init(void) +{ + unsigned int i; + + for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++) + BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1], + &genericv8_sys_regs[i]) >= 0); + + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, + &genericv8_target_table); + return 0; +} +late_initcall(sys_reg_genericv8_init); From cac664305a439463255eb7954ecd375b39f4299f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:23:59 +0000 Subject: [PATCH 014/287] arm64: KVM: virtual CPU reset Provide the reset code for a virtual CPU booted in 64bit mode. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit f4672752c321ea36ce099cebdd7a082a8f327505) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/reset.c | 76 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 arch/arm64/kvm/reset.c diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c new file mode 100644 index 000000000000..f6536a06231a --- /dev/null +++ b/arch/arm64/kvm/reset.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/reset.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#include +#include +#include + +/* + * ARMv8 Reset Values + */ +static const struct kvm_regs default_regs_reset = { + .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | + PSR_F_BIT | PSR_D_BIT), +}; + +int kvm_arch_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { + default: + r = 0; + } + + return r; +} + +/** + * kvm_reset_vcpu - sets core registers and sys_regs to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on + * the virtual CPU struct to their architectually defined reset + * values. + */ +int kvm_reset_vcpu(struct kvm_vcpu *vcpu) +{ + const struct kvm_regs *cpu_reset; + + switch (vcpu->arch.target) { + default: + cpu_reset = &default_regs_reset; + break; + } + + /* Reset core registers */ + memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); + + /* Reset system registers */ + kvm_reset_sys_regs(vcpu); + + return 0; +} From 913d79134dee46fc407ad2f3819bb5dce2f47e38 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:29:28 +0000 Subject: [PATCH 015/287] arm64: KVM: kvm_arch and kvm_vcpu_arch definitions Provide the architecture dependent structures for VM and vcpu abstractions. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 4f8d6632ec71372a3b8dbb4775662c2c9025d173) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 186 ++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_host.h diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h new file mode 100644 index 000000000000..4a2622f5e81f --- /dev/null +++ b/arch/arm64/include/asm/kvm_host.h @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/include/asm/kvm_host.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_HOST_H__ +#define __ARM64_KVM_HOST_H__ + +#include +#include +#include + +#define KVM_MAX_VCPUS 4 +#define KVM_USER_MEM_SLOTS 32 +#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +#include +#include + +#define KVM_VCPU_MAX_FEATURES 0 + +/* We don't currently support large pages. */ +#define KVM_HPAGE_GFN_SHIFT(x) 0 +#define KVM_NR_PAGE_SIZES 1 +#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) + +struct kvm_vcpu; +int kvm_target_cpu(void); +int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +int kvm_arch_dev_ioctl_check_extension(long ext); + +struct kvm_arch { + /* The VMID generation used for the virt. memory system */ + u64 vmid_gen; + u32 vmid; + + /* 1-level 2nd stage table and lock */ + spinlock_t pgd_lock; + pgd_t *pgd; + + /* VTTBR value associated with above pgd and vmid */ + u64 vttbr; + + /* Interrupt controller */ + struct vgic_dist vgic; + + /* Timer */ + struct arch_timer_kvm timer; +}; + +#define KVM_NR_MEM_OBJS 40 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +struct kvm_vcpu_fault_info { + u32 esr_el2; /* Hyp Syndrom Register */ + u64 far_el2; /* Hyp Fault Address Register */ + u64 hpfar_el2; /* Hyp IPA Fault Address Register */ +}; + +struct kvm_cpu_context { + struct kvm_regs gp_regs; + u64 sys_regs[NR_SYS_REGS]; +}; + +typedef struct kvm_cpu_context kvm_cpu_context_t; + +struct kvm_vcpu_arch { + struct kvm_cpu_context ctxt; + + /* HYP configuration */ + u64 hcr_el2; + + /* Exception Information */ + struct kvm_vcpu_fault_info fault; + + /* Pointer to host CPU context */ + kvm_cpu_context_t *host_cpu_context; + + /* VGIC state */ + struct vgic_cpu vgic_cpu; + struct arch_timer_cpu timer_cpu; + + /* + * Anything that is not used directly from assembly code goes + * here. + */ + /* dcache set/way operation pending */ + int last_pcpu; + cpumask_t require_dcache_flush; + + /* Don't run the guest */ + bool pause; + + /* IO related fields */ + struct kvm_decode mmio_decode; + + /* Interrupt related fields */ + u64 irq_lines; /* IRQ and FIQ levels */ + + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; + + /* Target CPU and feature flags */ + u32 target; + DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); + + /* Detect first run of a vcpu */ + bool has_run_once; +}; + +#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) +#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)]) + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 halt_wakeup; +}; + +struct kvm_vcpu_init; +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init); +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +struct kvm_one_reg; +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + +#define KVM_ARCH_WANT_MMU_NOTIFIER +struct kvm; +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end); +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + +/* We do not have shadow page tables, hence the empty hooks */ +static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + return 0; +} + +static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + return 0; +} + +struct kvm_vcpu *kvm_arm_get_running_vcpu(void); +struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); + +u64 kvm_call_hyp(void *hypfn, ...); + +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index); + +int kvm_perf_init(void); +int kvm_perf_teardown(void); + +#endif /* __ARM64_KVM_HOST_H__ */ From 56a8fbf4b8d368562273336cf8f43068aaed91a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:29:50 +0000 Subject: [PATCH 016/287] arm64: KVM: MMIO access backend Define the necessary structures to perform an MMIO access. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit d7246bf3571a82834984a42db52261525bc11159) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_mmio.h | 59 +++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_mmio.h diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h new file mode 100644 index 000000000000..fc2f689c0694 --- /dev/null +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_MMIO_H__ +#define __ARM64_KVM_MMIO_H__ + +#include +#include +#include + +/* + * This is annoying. The mmio code requires this, even if we don't + * need any decoding. To be fixed. + */ +struct kvm_decode { + unsigned long rt; + bool sign_extend; +}; + +/* + * The in-kernel MMIO emulation code wants to use a copy of run->mmio, + * which is an anonymous type. Use our own type instead. + */ +struct kvm_exit_mmio { + phys_addr_t phys_addr; + u8 data[8]; + u32 len; + bool is_write; +}; + +static inline void kvm_prepare_mmio(struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + run->mmio.phys_addr = mmio->phys_addr; + run->mmio.len = mmio->len; + run->mmio.is_write = mmio->is_write; + memcpy(run->mmio.data, mmio->data, mmio->len); + run->exit_reason = KVM_EXIT_MMIO; +} + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + +#endif /* __ARM64_KVM_MMIO_H__ */ From f98733fb49495d20251f55dc1ddff88f5761a797 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:37:02 +0000 Subject: [PATCH 017/287] arm64: KVM: guest one-reg interface Let userspace play with the guest registers. Reviewed-by: Christopher Covington Signed-off-by: Marc Zyngier (cherry picked from commit 2f4a07c5f9fe4a5cdb9867e1e2fcab3165846ea7) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/guest.c | 259 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 arch/arm64/kvm/guest.c diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c new file mode 100644 index 000000000000..3d7518a7ebaa --- /dev/null +++ b/arch/arm64/kvm/guest.c @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/guest.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; + return 0; +} + +static u64 core_reg_offset_from_id(u64 id) +{ + return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); +} + +static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* + * Because the kvm_regs structure is a mix of 32, 64 and + * 128bit fields, we index it as if it was a 32bit + * array. Hence below, nr_regs is the number of entries, and + * off the index in the "array". + */ + __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + struct kvm_regs *regs = vcpu_gp_regs(vcpu); + int nr_regs = sizeof(*regs) / sizeof(__u32); + u32 off; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= nr_regs || + (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) + return -ENOENT; + + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + struct kvm_regs *regs = vcpu_gp_regs(vcpu); + int nr_regs = sizeof(*regs) / sizeof(__u32); + __uint128_t tmp; + void *valp = &tmp; + u64 off; + int err = 0; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= nr_regs || + (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) + return -ENOENT; + + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) { + err = -EFAULT; + goto out; + } + + if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { + u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK; + switch (mode) { + case PSR_MODE_EL0t: + case PSR_MODE_EL1t: + case PSR_MODE_EL1h: + break; + default: + err = -EINVAL; + goto out; + } + } + + memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); +out: + return err; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +static unsigned long num_core_regs(void) +{ + return sizeof(struct kvm_regs) / sizeof(__u32); +} + +/** + * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * + * This is for all registers. + */ +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) +{ + return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu); +} + +/** + * kvm_arm_copy_reg_indices - get indices of all registers. + * + * We do core registers right here, then we apppend system regs. + */ +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; + + for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { + if (put_user(core_reg | i, uindices)) + return -EFAULT; + uindices++; + } + + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); +} + +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) + return -EINVAL; + + /* Register group 16 means we want a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return get_core_reg(vcpu, reg); + + return kvm_arm_sys_reg_get_reg(vcpu, reg); +} + +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) + return -EINVAL; + + /* Register group 16 means we set a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return set_core_reg(vcpu, reg); + + return kvm_arm_sys_reg_set_reg(vcpu, reg); +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + if (implementor != ARM_CPU_IMP_ARM) + return -EINVAL; + + switch (part_number) { + case ARM_CPU_PART_AEM_V8: + return KVM_ARM_TARGET_AEM_V8; + case ARM_CPU_PART_FOUNDATION: + return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A57: + /* Currently handled by the generic backend */ + return KVM_ARM_TARGET_CORTEX_A57; + default: + return -EINVAL; + } +} + +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + vcpu->arch.target = phys_target; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + if (init->features[i / 32] & (1 << (i % 32))) { + if (i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + set_bit(i, vcpu->arch.features); + } + } + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; +} From f756a17fc0ab844b88c1282cf5df2ffb03185825 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 17 Dec 2012 17:07:52 +0000 Subject: [PATCH 018/287] arm64: KVM: hypervisor initialization code Provide EL2 with page tables and stack, and set the vectors to point to the full blown world-switch code. Signed-off-by: Marc Zyngier (cherry picked from commit 092bd143cbb481b4ce1d55247a2987eaaf61f967) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 13 ++++ arch/arm64/kvm/hyp-init.S | 107 ++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 arch/arm64/kvm/hyp-init.S diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4a2622f5e81f..2500eb6a4d2a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -183,4 +183,17 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void); +static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t pgd_ptr, + unsigned long hyp_stack_ptr, + unsigned long vector_ptr) +{ + /* + * Call initialization code, and switch to the full blown + * HYP code. + */ + kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, + hyp_stack_ptr, vector_ptr); +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S new file mode 100644 index 000000000000..ba84e6705e20 --- /dev/null +++ b/arch/arm64/kvm/hyp-init.S @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include + + .text + .pushsection .hyp.idmap.text, "ax" + + .align 11 + +ENTRY(__kvm_hyp_init) + ventry __invalid // Synchronous EL2t + ventry __invalid // IRQ EL2t + ventry __invalid // FIQ EL2t + ventry __invalid // Error EL2t + + ventry __invalid // Synchronous EL2h + ventry __invalid // IRQ EL2h + ventry __invalid // FIQ EL2h + ventry __invalid // Error EL2h + + ventry __do_hyp_init // Synchronous 64-bit EL1 + ventry __invalid // IRQ 64-bit EL1 + ventry __invalid // FIQ 64-bit EL1 + ventry __invalid // Error 64-bit EL1 + + ventry __invalid // Synchronous 32-bit EL1 + ventry __invalid // IRQ 32-bit EL1 + ventry __invalid // FIQ 32-bit EL1 + ventry __invalid // Error 32-bit EL1 + +__invalid: + b . + + /* + * x0: HYP boot pgd + * x1: HYP pgd + * x2: HYP stack + * x3: HYP vectors + */ +__do_hyp_init: + + msr ttbr0_el2, x0 + + mrs x4, tcr_el1 + ldr x5, =TCR_EL2_MASK + and x4, x4, x5 + ldr x5, =TCR_EL2_FLAGS + orr x4, x4, x5 + msr tcr_el2, x4 + + ldr x4, =VTCR_EL2_FLAGS + msr vtcr_el2, x4 + + mrs x4, mair_el1 + msr mair_el2, x4 + isb + + mov x4, #SCTLR_EL2_FLAGS + msr sctlr_el2, x4 + isb + + /* MMU is now enabled. Get ready for the trampoline dance */ + ldr x4, =TRAMPOLINE_VA + adr x5, target + bfi x4, x5, #0, #PAGE_SHIFT + br x4 + +target: /* We're now in the trampoline code, switch page tables */ + msr ttbr0_el2, x1 + isb + + /* Invalidate the old TLBs */ + tlbi alle2 + dsb sy + + /* Set the stack and new vectors */ + kern_hyp_va x2 + mov sp, x2 + kern_hyp_va x3 + msr vbar_el2, x3 + + /* Hello, World! */ + eret +ENDPROC(__kvm_hyp_init) + + .ltorg + + .popsection From 066bddb132227937d1f68dc5ec4554786e393345 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:40:18 +0000 Subject: [PATCH 019/287] arm64: KVM: HYP mode world switch implementation The HYP mode world switch in all its glory. Implements save/restore of host/guest registers, EL2 trapping, IPA resolution, and additional services (tlb invalidation). Reviewed-by: Christopher Covington Signed-off-by: Marc Zyngier (cherry picked from commit 55c7401d92e16360e0987afe39355f1eb6300f31) Signed-off-by: Christoffer Dall --- arch/arm64/kernel/asm-offsets.c | 33 ++ arch/arm64/kvm/hyp.S | 617 ++++++++++++++++++++++++++++++++ 2 files changed, 650 insertions(+) create mode 100644 arch/arm64/kvm/hyp.S diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a2a4d810bea3..49c162c03b69 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -104,5 +104,38 @@ int main(void) BLANK(); DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + BLANK(); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); + DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); + DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); + DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); + DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); + DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); + DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); + DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); + DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); + DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); + DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); + DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); + DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); + DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif return 0; } diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S new file mode 100644 index 000000000000..0b18c2e1e043 --- /dev/null +++ b/arch/arm64/kvm/hyp.S @@ -0,0 +1,617 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) +#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x) +#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) + + .text + .pushsection .hyp.text, "ax" + .align PAGE_SHIFT + +__kvm_hyp_code_start: + .globl __kvm_hyp_code_start + +.macro save_common_regs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_XREG_OFFSET(19) + stp x19, x20, [x3] + stp x21, x22, [x3, #16] + stp x23, x24, [x3, #32] + stp x25, x26, [x3, #48] + stp x27, x28, [x3, #64] + stp x29, lr, [x3, #80] + + mrs x19, sp_el0 + mrs x20, elr_el2 // EL1 PC + mrs x21, spsr_el2 // EL1 pstate + + stp x19, x20, [x3, #96] + str x21, [x3, #112] + + mrs x22, sp_el1 + mrs x23, elr_el1 + mrs x24, spsr_el1 + + str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] + str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] + str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] +.endm + +.macro restore_common_regs + // x2: base address for cpu context + // x3: tmp register + + ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] + ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] + ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] + + msr sp_el1, x22 + msr elr_el1, x23 + msr spsr_el1, x24 + + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0 + ldp x19, x20, [x3] + ldr x21, [x3, #16] + + msr sp_el0, x19 + msr elr_el2, x20 // EL1 PC + msr spsr_el2, x21 // EL1 pstate + + add x3, x2, #CPU_XREG_OFFSET(19) + ldp x19, x20, [x3] + ldp x21, x22, [x3, #16] + ldp x23, x24, [x3, #32] + ldp x25, x26, [x3, #48] + ldp x27, x28, [x3, #64] + ldp x29, lr, [x3, #80] +.endm + +.macro save_host_regs + save_common_regs +.endm + +.macro restore_host_regs + restore_common_regs +.endm + +.macro save_fpsimd + // x2: cpu context address + // x3, x4: tmp regs + add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + fpsimd_save x3, 4 +.endm + +.macro restore_fpsimd + // x2: cpu context address + // x3, x4: tmp regs + add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + fpsimd_restore x3, 4 +.endm + +.macro save_guest_regs + // x0 is the vcpu address + // x1 is the return code, do not corrupt! + // x2 is the cpu context + // x3 is a tmp register + // Guest's x0-x3 are on the stack + + // Compute base to save registers + add x3, x2, #CPU_XREG_OFFSET(4) + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + stp x8, x9, [x3, #32] + stp x10, x11, [x3, #48] + stp x12, x13, [x3, #64] + stp x14, x15, [x3, #80] + stp x16, x17, [x3, #96] + str x18, [x3, #112] + + pop x6, x7 // x2, x3 + pop x4, x5 // x0, x1 + + add x3, x2, #CPU_XREG_OFFSET(0) + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + save_common_regs +.endm + +.macro restore_guest_regs + // x0 is the vcpu address. + // x2 is the cpu context + // x3 is a tmp register + + // Prepare x0-x3 for later restore + add x3, x2, #CPU_XREG_OFFSET(0) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + push x4, x5 // Push x0-x3 on the stack + push x6, x7 + + // x4-x18 + ldp x4, x5, [x3, #32] + ldp x6, x7, [x3, #48] + ldp x8, x9, [x3, #64] + ldp x10, x11, [x3, #80] + ldp x12, x13, [x3, #96] + ldp x14, x15, [x3, #112] + ldp x16, x17, [x3, #128] + ldr x18, [x3, #144] + + // x19-x29, lr, sp*, elr*, spsr* + restore_common_regs + + // Last bits of the 64bit state + pop x2, x3 + pop x0, x1 + + // Do not touch any register after this! +.endm + +/* + * Macros to perform system register save/restore. + * + * Ordering here is absolutely critical, and must be kept consistent + * in {save,restore}_sysregs, {save,restore}_guest_32bit_state, + * and in kvm_asm.h. + * + * In other words, don't touch any of these unless you know what + * you are doing. + */ +.macro save_sysregs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) + + mrs x4, vmpidr_el2 + mrs x5, csselr_el1 + mrs x6, sctlr_el1 + mrs x7, actlr_el1 + mrs x8, cpacr_el1 + mrs x9, ttbr0_el1 + mrs x10, ttbr1_el1 + mrs x11, tcr_el1 + mrs x12, esr_el1 + mrs x13, afsr0_el1 + mrs x14, afsr1_el1 + mrs x15, far_el1 + mrs x16, mair_el1 + mrs x17, vbar_el1 + mrs x18, contextidr_el1 + mrs x19, tpidr_el0 + mrs x20, tpidrro_el0 + mrs x21, tpidr_el1 + mrs x22, amair_el1 + mrs x23, cntkctl_el1 + + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + stp x8, x9, [x3, #32] + stp x10, x11, [x3, #48] + stp x12, x13, [x3, #64] + stp x14, x15, [x3, #80] + stp x16, x17, [x3, #96] + stp x18, x19, [x3, #112] + stp x20, x21, [x3, #128] + stp x22, x23, [x3, #144] +.endm + +.macro restore_sysregs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) + + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + ldp x8, x9, [x3, #32] + ldp x10, x11, [x3, #48] + ldp x12, x13, [x3, #64] + ldp x14, x15, [x3, #80] + ldp x16, x17, [x3, #96] + ldp x18, x19, [x3, #112] + ldp x20, x21, [x3, #128] + ldp x22, x23, [x3, #144] + + msr vmpidr_el2, x4 + msr csselr_el1, x5 + msr sctlr_el1, x6 + msr actlr_el1, x7 + msr cpacr_el1, x8 + msr ttbr0_el1, x9 + msr ttbr1_el1, x10 + msr tcr_el1, x11 + msr esr_el1, x12 + msr afsr0_el1, x13 + msr afsr1_el1, x14 + msr far_el1, x15 + msr mair_el1, x16 + msr vbar_el1, x17 + msr contextidr_el1, x18 + msr tpidr_el0, x19 + msr tpidrro_el0, x20 + msr tpidr_el1, x21 + msr amair_el1, x22 + msr cntkctl_el1, x23 +.endm + +.macro activate_traps + ldr x2, [x0, #VCPU_IRQ_LINES] + ldr x1, [x0, #VCPU_HCR_EL2] + orr x2, x2, x1 + msr hcr_el2, x2 + + ldr x2, =(CPTR_EL2_TTA) + msr cptr_el2, x2 + + ldr x2, =(1 << 15) // Trap CP15 Cr=15 + msr hstr_el2, x2 + + mrs x2, mdcr_el2 + and x2, x2, #MDCR_EL2_HPMN_MASK + orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) + msr mdcr_el2, x2 +.endm + +.macro deactivate_traps + mov x2, #HCR_RW + msr hcr_el2, x2 + msr cptr_el2, xzr + msr hstr_el2, xzr + + mrs x2, mdcr_el2 + and x2, x2, #MDCR_EL2_HPMN_MASK + msr mdcr_el2, x2 +.endm + +.macro activate_vm + ldr x1, [x0, #VCPU_KVM] + kern_hyp_va x1 + ldr x2, [x1, #KVM_VTTBR] + msr vttbr_el2, x2 +.endm + +.macro deactivate_vm + msr vttbr_el2, xzr +.endm + +__save_sysregs: + save_sysregs + ret + +__restore_sysregs: + restore_sysregs + ret + +__save_fpsimd: + save_fpsimd + ret + +__restore_fpsimd: + restore_fpsimd + ret + +/* + * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); + * + * This is the world switch. The first half of the function + * deals with entering the guest, and anything from __kvm_vcpu_return + * to the end of the function deals with reentering the host. + * On the enter path, only x0 (vcpu pointer) must be preserved until + * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception + * code) must both be preserved until the epilogue. + * In both cases, x2 points to the CPU context we're saving/restoring from/to. + */ +ENTRY(__kvm_vcpu_run) + kern_hyp_va x0 + msr tpidr_el2, x0 // Save the vcpu register + + // Host context + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + save_host_regs + bl __save_fpsimd + bl __save_sysregs + + activate_traps + activate_vm + + // Guest context + add x2, x0, #VCPU_CONTEXT + + bl __restore_sysregs + bl __restore_fpsimd + restore_guest_regs + + // That's it, no more messing around. + eret + +__kvm_vcpu_return: + // Assume x0 is the vcpu pointer, x1 the return code + // Guest's x0-x3 are on the stack + + // Guest context + add x2, x0, #VCPU_CONTEXT + + save_guest_regs + bl __save_fpsimd + bl __save_sysregs + + deactivate_traps + deactivate_vm + + // Host context + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + bl __restore_sysregs + bl __restore_fpsimd + restore_host_regs + + mov x0, x1 + ret +END(__kvm_vcpu_run) + +// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); +ENTRY(__kvm_tlb_flush_vmid_ipa) + kern_hyp_va x0 + ldr x2, [x0, #KVM_VTTBR] + msr vttbr_el2, x2 + isb + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + tlbi ipas2e1is, x1 + dsb sy + tlbi vmalle1is + dsb sy + isb + + msr vttbr_el2, xzr + ret +ENDPROC(__kvm_tlb_flush_vmid_ipa) + +ENTRY(__kvm_flush_vm_context) + tlbi alle1is + ic ialluis + dsb sy + ret +ENDPROC(__kvm_flush_vm_context) + +__kvm_hyp_panic: + // Guess the context by looking at VTTBR: + // If zero, then we're already a host. + // Otherwise restore a minimal host context before panicing. + mrs x0, vttbr_el2 + cbz x0, 1f + + mrs x0, tpidr_el2 + + deactivate_traps + deactivate_vm + + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + bl __restore_sysregs + +1: adr x0, __hyp_panic_str + adr x1, 2f + ldp x2, x3, [x1] + sub x0, x0, x2 + add x0, x0, x3 + mrs x1, spsr_el2 + mrs x2, elr_el2 + mrs x3, esr_el2 + mrs x4, far_el2 + mrs x5, hpfar_el2 + mrs x6, par_el1 + mrs x7, tpidr_el2 + + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, lr + ldr lr, =panic + msr elr_el2, lr + eret + + .align 3 +2: .quad HYP_PAGE_OFFSET + .quad PAGE_OFFSET +ENDPROC(__kvm_hyp_panic) + +__hyp_panic_str: + .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0" + + .align 2 + +ENTRY(kvm_call_hyp) + hvc #0 + ret +ENDPROC(kvm_call_hyp) + +.macro invalid_vector label, target + .align 2 +\label: + b \target +ENDPROC(\label) +.endm + + /* None of these should ever happen */ + invalid_vector el2t_sync_invalid, __kvm_hyp_panic + invalid_vector el2t_irq_invalid, __kvm_hyp_panic + invalid_vector el2t_fiq_invalid, __kvm_hyp_panic + invalid_vector el2t_error_invalid, __kvm_hyp_panic + invalid_vector el2h_sync_invalid, __kvm_hyp_panic + invalid_vector el2h_irq_invalid, __kvm_hyp_panic + invalid_vector el2h_fiq_invalid, __kvm_hyp_panic + invalid_vector el2h_error_invalid, __kvm_hyp_panic + invalid_vector el1_sync_invalid, __kvm_hyp_panic + invalid_vector el1_irq_invalid, __kvm_hyp_panic + invalid_vector el1_fiq_invalid, __kvm_hyp_panic + invalid_vector el1_error_invalid, __kvm_hyp_panic + +el1_sync: // Guest trapped into EL2 + push x0, x1 + push x2, x3 + + mrs x1, esr_el2 + lsr x2, x1, #ESR_EL2_EC_SHIFT + + cmp x2, #ESR_EL2_EC_HVC64 + b.ne el1_trap + + mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest + cbnz x3, el1_trap // called HVC + + /* Here, we're pretty sure the host called HVC. */ + pop x2, x3 + pop x0, x1 + + push lr, xzr + + /* + * Compute the function address in EL2, and shuffle the parameters. + */ + kern_hyp_va x0 + mov lr, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + blr lr + + pop lr, xzr + eret + +el1_trap: + /* + * x1: ESR + * x2: ESR_EC + */ + cmp x2, #ESR_EL2_EC_DABT + mov x0, #ESR_EL2_EC_IABT + ccmp x2, x0, #4, ne + b.ne 1f // Not an abort we care about + + /* This is an abort. Check for permission fault */ + and x2, x1, #ESR_EL2_FSC_TYPE + cmp x2, #FSC_PERM + b.ne 1f // Not a permission fault + + /* + * Check for Stage-1 page table walk, which is guaranteed + * to give a valid HPFAR_EL2. + */ + tbnz x1, #7, 1f // S1PTW is set + + /* + * Permission fault, HPFAR_EL2 is invalid. + * Resolve the IPA the hard way using the guest VA. + * Stage-1 translation already validated the memory access rights. + * As such, we can use the EL1 translation regime, and don't have + * to distinguish between EL0 and EL1 access. + */ + mrs x2, far_el2 + at s1e1r, x2 + isb + + /* Read result */ + mrs x3, par_el1 + tbnz x3, #0, 3f // Bail out if we failed the translation + ubfx x3, x3, #12, #36 // Extract IPA + lsl x3, x3, #4 // and present it like HPFAR + b 2f + +1: mrs x3, hpfar_el2 + mrs x2, far_el2 + +2: mrs x0, tpidr_el2 + str x1, [x0, #VCPU_ESR_EL2] + str x2, [x0, #VCPU_FAR_EL2] + str x3, [x0, #VCPU_HPFAR_EL2] + + mov x1, #ARM_EXCEPTION_TRAP + b __kvm_vcpu_return + + /* + * Translation failed. Just return to the guest and + * let it fault again. Another CPU is probably playing + * behind our back. + */ +3: pop x2, x3 + pop x0, x1 + + eret + +el1_irq: + push x0, x1 + push x2, x3 + mrs x0, tpidr_el2 + mov x1, #ARM_EXCEPTION_IRQ + b __kvm_vcpu_return + + .ltorg + + .align 11 + +ENTRY(__kvm_hyp_vector) + ventry el2t_sync_invalid // Synchronous EL2t + ventry el2t_irq_invalid // IRQ EL2t + ventry el2t_fiq_invalid // FIQ EL2t + ventry el2t_error_invalid // Error EL2t + + ventry el2h_sync_invalid // Synchronous EL2h + ventry el2h_irq_invalid // IRQ EL2h + ventry el2h_fiq_invalid // FIQ EL2h + ventry el2h_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync // Synchronous 32-bit EL1 + ventry el1_irq // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__kvm_hyp_vector) + +__kvm_hyp_code_end: + .globl __kvm_hyp_code_end + + .popsection From b0353aa3722552c9058737452c9bec86d73ccdad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:40:41 +0000 Subject: [PATCH 020/287] arm64: KVM: Exit handling Handle the exit of a VM, decoding the exit reason from HYP mode and calling the corresponding handler. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit c4b1afd022e93eada6ee4b209be37101cd4b3494) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/handle_exit.c | 119 +++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 arch/arm64/kvm/handle_exit.c diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c new file mode 100644 index 000000000000..c65d1154f969 --- /dev/null +++ b/arch/arm64/kvm/handle_exit.c @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/handle_exit.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); + +static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* + * Guest called HVC instruction: + * Let it know we don't want that by injecting an undefined exception. + */ + kvm_debug("hvc: %x (at %08lx)", kvm_vcpu_get_hsr(vcpu) & ((1 << 16) - 1), + *vcpu_pc(vcpu)); + kvm_debug(" HSR: %8x", kvm_vcpu_get_hsr(vcpu)); + kvm_inject_undefined(vcpu); + return 1; +} + +static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* We don't support SMC; don't do that. */ + kvm_debug("smc: at %08lx", *vcpu_pc(vcpu)); + kvm_inject_undefined(vcpu); + return 1; +} + +/** + * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * @vcpu: the vcpu pointer + * + * Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. + */ +static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_vcpu_block(vcpu); + return 1; +} + +static exit_handle_fn arm_exit_handlers[] = { + [ESR_EL2_EC_WFI] = kvm_handle_wfi, + [ESR_EL2_EC_HVC64] = handle_hvc, + [ESR_EL2_EC_SMC64] = handle_smc, + [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg, + [ESR_EL2_EC_IABT] = kvm_handle_guest_abort, + [ESR_EL2_EC_DABT] = kvm_handle_guest_abort, +}; + +static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + + if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || + !arm_exit_handlers[hsr_ec]) { + kvm_err("Unkown exception class: hsr: %#08x\n", + (unsigned int)kvm_vcpu_get_hsr(vcpu)); + BUG(); + } + + return arm_exit_handlers[hsr_ec]; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to userspace. + */ +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index) +{ + exit_handle_fn exit_handler; + + switch (exception_index) { + case ARM_EXCEPTION_IRQ: + return 1; + case ARM_EXCEPTION_TRAP: + /* + * See ARM ARM B1.14.1: "Hyp traps on instructions + * that fail their condition code check" + */ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + + exit_handler = kvm_get_exit_handler(vcpu); + + return exit_handler(vcpu, run); + default: + kvm_pr_unimpl("Unsupported exception type: %d", + exception_index); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return 0; + } +} From 4f0c6d89a863d5a86d2dea50a9e114559e73d97f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 7 Dec 2012 17:54:54 +0000 Subject: [PATCH 021/287] arm64: KVM: Plug the VGIC Add support for the in-kernel GIC emulation. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 1f17f3b6044d8a81a74dc6c962b3b38a7336106b) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 88 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 0b18c2e1e043..8dc27a367d77 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -306,6 +306,90 @@ __kvm_hyp_code_start: msr vttbr_el2, xzr .endm +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +.macro save_vgic_state + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* Save all interesting registers */ + ldr w4, [x2, #GICH_HCR] + ldr w5, [x2, #GICH_VMCR] + ldr w6, [x2, #GICH_MISR] + ldr w7, [x2, #GICH_EISR0] + ldr w8, [x2, #GICH_EISR1] + ldr w9, [x2, #GICH_ELRSR0] + ldr w10, [x2, #GICH_ELRSR1] + ldr w11, [x2, #GICH_APR] + + str w4, [x3, #VGIC_CPU_HCR] + str w5, [x3, #VGIC_CPU_VMCR] + str w6, [x3, #VGIC_CPU_MISR] + str w7, [x3, #VGIC_CPU_EISR] + str w8, [x3, #(VGIC_CPU_EISR + 4)] + str w9, [x3, #VGIC_CPU_ELRSR] + str w10, [x3, #(VGIC_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_CPU_APR] + + /* Clear GICH_HCR */ + str wzr, [x2, #GICH_HCR] + + /* Save list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_CPU_LR +1: ldr w5, [x2], #4 + str w5, [x3], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: +.endm + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +.macro restore_vgic_state + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* We only restore a minimal set of registers */ + ldr w4, [x3, #VGIC_CPU_HCR] + ldr w5, [x3, #VGIC_CPU_VMCR] + ldr w6, [x3, #VGIC_CPU_APR] + + str w4, [x2, #GICH_HCR] + str w5, [x2, #GICH_VMCR] + str w6, [x2, #GICH_APR] + + /* Restore list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_CPU_LR +1: ldr w5, [x3], #4 + str w5, [x2], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: +.endm + __save_sysregs: save_sysregs ret @@ -348,6 +432,8 @@ ENTRY(__kvm_vcpu_run) activate_traps activate_vm + restore_vgic_state + // Guest context add x2, x0, #VCPU_CONTEXT @@ -369,6 +455,8 @@ __kvm_vcpu_return: bl __save_fpsimd bl __save_sysregs + save_vgic_state + deactivate_traps deactivate_vm From 615fd459aa42eb0450667fb191c1931a1da749b3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 May 2013 18:31:28 +0100 Subject: [PATCH 022/287] ARM: KVM: timer: allow DT matching for ARMv8 cores ARMv8 cores have the exact same timer as ARMv7 cores. Make sure the KVM timer code can match it in the device tree. Signed-off-by: Marc Zyngier (cherry picked from commit f61701e0a24a09aa4a44baf24e57dcc5e706afa8) Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 2d00b2925780..6f485eaf643b 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -195,6 +195,7 @@ static struct notifier_block kvm_timer_cpu_nb = { static const struct of_device_id arch_timer_of_match[] = { { .compatible = "arm,armv7-timer", }, + { .compatible = "arm,armv8-timer", }, {}, }; From 5b12bf6aa91a284eb32fdb5f02ebc770149dbf50 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 7 Dec 2012 17:52:03 +0000 Subject: [PATCH 023/287] arm64: KVM: Plug the arch timer Add support for the in-kernel timer emulation. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 003300de6c3e51934fb52eb2677f6f4fb4996cbd) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 56 ++++++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/reset.c | 12 +++++++++ 2 files changed, 68 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 8dc27a367d77..8b510835b440 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -390,6 +390,60 @@ __kvm_hyp_code_start: 2: .endm +.macro save_timer_state + // x0: vcpu pointer + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr w3, [x2, #KVM_TIMER_ENABLED] + cbz w3, 1f + + mrs x3, cntv_ctl_el0 + and x3, x3, #3 + str w3, [x0, #VCPU_TIMER_CNTV_CTL] + bic x3, x3, #1 // Clear Enable + msr cntv_ctl_el0, x3 + + isb + + mrs x3, cntv_cval_el0 + str x3, [x0, #VCPU_TIMER_CNTV_CVAL] + +1: + // Allow physical timer/counter access for the host + mrs x2, cnthctl_el2 + orr x2, x2, #3 + msr cnthctl_el2, x2 + + // Clear cntvoff for the host + msr cntvoff_el2, xzr +.endm + +.macro restore_timer_state + // x0: vcpu pointer + // Disallow physical timer access for the guest + // Physical counter access is allowed + mrs x2, cnthctl_el2 + orr x2, x2, #1 + bic x2, x2, #2 + msr cnthctl_el2, x2 + + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr w3, [x2, #KVM_TIMER_ENABLED] + cbz w3, 1f + + ldr x3, [x2, #KVM_TIMER_CNTVOFF] + msr cntvoff_el2, x3 + ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL] + msr cntv_cval_el0, x2 + isb + + ldr w2, [x0, #VCPU_TIMER_CNTV_CTL] + and x2, x2, #3 + msr cntv_ctl_el0, x2 +1: +.endm + __save_sysregs: save_sysregs ret @@ -433,6 +487,7 @@ ENTRY(__kvm_vcpu_run) activate_vm restore_vgic_state + restore_timer_state // Guest context add x2, x0, #VCPU_CONTEXT @@ -455,6 +510,7 @@ __kvm_vcpu_return: bl __save_fpsimd bl __save_sysregs + save_timer_state save_vgic_state deactivate_traps diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f6536a06231a..766150ac76ed 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -23,6 +23,8 @@ #include #include +#include + #include #include #include @@ -36,6 +38,11 @@ static const struct kvm_regs default_regs_reset = { PSR_F_BIT | PSR_D_BIT), }; +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + int kvm_arch_dev_ioctl_check_extension(long ext) { int r; @@ -58,11 +65,13 @@ int kvm_arch_dev_ioctl_check_extension(long ext) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { + const struct kvm_irq_level *cpu_vtimer_irq; const struct kvm_regs *cpu_reset; switch (vcpu->arch.target) { default: cpu_reset = &default_regs_reset; + cpu_vtimer_irq = &default_vtimer_irq; break; } @@ -72,5 +81,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset system registers */ kvm_reset_sys_regs(vcpu); + /* Reset timer */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return 0; } From 5732aca89a79f4f9446145c8d921963c81b4157f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 12 Dec 2012 18:52:05 +0000 Subject: [PATCH 024/287] arm64: KVM: PSCI implementation Wire the PSCI backend into the exit handling code. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit dcd2e40c1e1cce302498d16d095b0f8a30326f74) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_psci.h | 23 +++++++++++++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 16 ++++++++++++++++ arch/arm64/kvm/handle_exit.c | 16 +++++++--------- 4 files changed, 47 insertions(+), 10 deletions(-) create mode 100644 arch/arm64/include/asm/kvm_psci.h diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2500eb6a4d2a..2fdeb326c3ee 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -34,7 +34,7 @@ #include #include -#define KVM_VCPU_MAX_FEATURES 0 +#define KVM_VCPU_MAX_FEATURES 1 /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h new file mode 100644 index 000000000000..e301a4816355 --- /dev/null +++ b/arch/arm64/include/asm/kvm_psci.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_PSCI_H__ +#define __ARM64_KVM_PSCI_H__ + +bool kvm_psci_call(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index ebac919dc0ca..fb60f9037057 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -69,6 +69,8 @@ struct kvm_regs { #define KVM_VGIC_V2_DIST_SIZE 0x1000 #define KVM_VGIC_V2_CPU_SIZE 0x2000 +#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ + struct kvm_vcpu_init { __u32 target; __u32 features[7]; @@ -141,6 +143,20 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* PSCI interface */ +#define KVM_PSCI_FN_BASE 0x95c1ba5e +#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) + +#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0) +#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1) +#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) +#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) + +#define KVM_PSCI_RET_SUCCESS 0 +#define KVM_PSCI_RET_NI ((unsigned long)-1) +#define KVM_PSCI_RET_INVAL ((unsigned long)-2) +#define KVM_PSCI_RET_DENIED ((unsigned long)-3) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index c65d1154f969..4766b7f3515e 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -24,26 +24,24 @@ #include #include #include +#include typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* - * Guest called HVC instruction: - * Let it know we don't want that by injecting an undefined exception. - */ - kvm_debug("hvc: %x (at %08lx)", kvm_vcpu_get_hsr(vcpu) & ((1 << 16) - 1), - *vcpu_pc(vcpu)); - kvm_debug(" HSR: %8x", kvm_vcpu_get_hsr(vcpu)); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* We don't support SMC; don't do that. */ - kvm_debug("smc: at %08lx", *vcpu_pc(vcpu)); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } From ea4ebae1594250431731dcdacd6da33b865fd3b1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:41:44 +0000 Subject: [PATCH 025/287] arm64: KVM: Build system integration Only the Makefile is plugged in. The Kconfig stuff is in a separate patch to allow for an easier merge process. Reviewed-by: Christopher Covington Signed-off-by: Marc Zyngier (cherry picked from commit 6211753fdfd05af9e08f54c8d0ba3ee516034878) Signed-off-by: Christoffer Dall --- arch/arm64/Makefile | 1 + arch/arm64/kvm/Makefile | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 arch/arm64/kvm/Makefile diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b6ccf8a36e2d..7ab6b358cc35 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -37,6 +37,7 @@ TEXT_OFFSET := 0x00080000 export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ +core-$(CONFIG_KVM) += arch/arm64/kvm/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile new file mode 100644 index 000000000000..dca110556683 --- /dev/null +++ b/arch/arm64/kvm/Makefile @@ -0,0 +1,23 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm +CFLAGS_arm.o := -I. +CFLAGS_mmu.o := -I. + +KVM=../../../virt/kvm +ARM=../../../arch/arm/kvm + +obj-$(CONFIG_KVM_ARM_HOST) += kvm.o + +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o + +kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o +kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o +kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o + +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o From 8ee55043e2fc5e6c90209f478c70b600dce523a9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 19:17:50 +0000 Subject: [PATCH 026/287] arm64: KVM: define 32bit specific registers Define the 32bit specific registers (SPSRs, cp15...). Most CPU registers are directly mapped to a 64bit register (r0->x0...). Only the SPSRs have separate registers. cp15 registers are also mapped into their 64bit counterpart in most cases. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 40033a614ea3db196d57c477ca328f44eb1e4df0) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 38 ++++++++++++++++++++++++++++++- arch/arm64/include/asm/kvm_host.h | 5 +++- arch/arm64/include/uapi/asm/kvm.h | 7 +++++- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 591ac219964a..c92de4163eba 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -42,7 +42,43 @@ #define TPIDR_EL1 18 /* Thread ID, Privileged */ #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ -#define NR_SYS_REGS 21 +/* 32bit specific registers. Keep them at the end of the range */ +#define DACR32_EL2 21 /* Domain Access Control Register */ +#define IFSR32_EL2 22 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 23 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 24 /* Debug Vector Catch Register */ +#define TEECR32_EL1 25 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 26 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 27 + +/* 32bit mapping */ +#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ +#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ +#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ +#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ +#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ +#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ +#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ +#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ +#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ +#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ +#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ +#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ +#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ +#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ +#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ +#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ +#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ +#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ +#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ +#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ +#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ +#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ +#define c10_AMAIR (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ +#define NR_CP15_REGS (NR_SYS_REGS * 2) #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2fdeb326c3ee..3f5830b3ca3f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -84,7 +84,10 @@ struct kvm_vcpu_fault_info { struct kvm_cpu_context { struct kvm_regs gp_regs; - u64 sys_regs[NR_SYS_REGS]; + union { + u64 sys_regs[NR_SYS_REGS]; + u32 cp15[NR_CP15_REGS]; + }; }; typedef struct kvm_cpu_context kvm_cpu_context_t; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index fb60f9037057..5b1110c49df5 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -23,7 +23,12 @@ #define __ARM_KVM_H__ #define KVM_SPSR_EL1 0 -#define KVM_NR_SPSR 1 +#define KVM_SPSR_SVC KVM_SPSR_EL1 +#define KVM_SPSR_ABT 1 +#define KVM_SPSR_UND 2 +#define KVM_SPSR_IRQ 3 +#define KVM_SPSR_FIQ 4 +#define KVM_NR_SPSR 5 #ifndef __ASSEMBLY__ #include From 934f190b989d879c8ef59e26c9d6eb2d10632f0a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 19:40:29 +0000 Subject: [PATCH 027/287] arm64: KVM: 32bit GP register access Allow access to the 32bit register file through the usual API. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit b547631fc64e249a3c507e6ce854642507fa7c1c) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_emulate.h | 15 ++- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/regmap.c | 168 +++++++++++++++++++++++++++ 3 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kvm/regmap.c diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6c1725e93b0b..20a1a3931d8d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -28,6 +28,9 @@ #include #include +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); + void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); @@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) { - return false; /* 32bit? Bahhh... */ + return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); } static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) @@ -64,16 +67,23 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) { + *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; } static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_reg32(vcpu, reg_num); + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; } /* Get vcpu SPSR for current mode */ static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) { + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_spsr32(vcpu); + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; } @@ -81,6 +91,9 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; + if (vcpu_mode_is_32bit(vcpu)) + return mode > COMPAT_PSR_MODE_USR; + return mode != PSR_MODE_EL0t; } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index dca110556683..a2169ec8d93b 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -15,7 +15,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o +kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c new file mode 100644 index 000000000000..bbc6ae32e4af --- /dev/null +++ b/arch/arm64/kvm/regmap.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/emulate.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#define VCPU_NR_MODES 6 +#define REG_OFFSET(_reg) \ + (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) + +#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) + +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { + /* USR Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), + REG_OFFSET(pc) + }, + + /* FIQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), + REG_OFFSET(compat_r8_fiq), /* r8 */ + REG_OFFSET(compat_r9_fiq), /* r9 */ + REG_OFFSET(compat_r10_fiq), /* r10 */ + REG_OFFSET(compat_r11_fiq), /* r11 */ + REG_OFFSET(compat_r12_fiq), /* r12 */ + REG_OFFSET(compat_sp_fiq), /* r13 */ + REG_OFFSET(compat_lr_fiq), /* r14 */ + REG_OFFSET(pc) + }, + + /* IRQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_irq), /* r13 */ + REG_OFFSET(compat_lr_irq), /* r14 */ + REG_OFFSET(pc) + }, + + /* SVC Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_svc), /* r13 */ + REG_OFFSET(compat_lr_svc), /* r14 */ + REG_OFFSET(pc) + }, + + /* ABT Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_abt), /* r13 */ + REG_OFFSET(compat_lr_abt), /* r14 */ + REG_OFFSET(pc) + }, + + /* UND Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_und), /* r13 */ + REG_OFFSET(compat_lr_und), /* r14 */ + REG_OFFSET(pc) + }, +}; + +/* + * Return a pointer to the register number valid in the current mode of + * the virtual CPU. + */ +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) +{ + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + + switch (mode) { + case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC: + mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ + break; + + case COMPAT_PSR_MODE_ABT: + mode = 4; + break; + + case COMPAT_PSR_MODE_UND: + mode = 5; + break; + + case COMPAT_PSR_MODE_SYS: + mode = 0; /* SYS maps to USR */ + break; + + default: + BUG(); + } + + return reg_array + vcpu_reg_offsets[mode][reg_num]; +} + +/* + * Return the SPSR for the current mode of the virtual CPU. + */ +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu) +{ + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + switch (mode) { + case COMPAT_PSR_MODE_SVC: + mode = KVM_SPSR_SVC; + break; + case COMPAT_PSR_MODE_ABT: + mode = KVM_SPSR_ABT; + break; + case COMPAT_PSR_MODE_UND: + mode = KVM_SPSR_UND; + break; + case COMPAT_PSR_MODE_IRQ: + mode = KVM_SPSR_IRQ; + break; + case COMPAT_PSR_MODE_FIQ: + mode = KVM_SPSR_FIQ; + break; + default: + BUG(); + } + + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode]; +} From 4129306976df31cf80f13a85d6886e2f1245662b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 19:54:04 +0000 Subject: [PATCH 028/287] arm64: KVM: 32bit conditional execution emulation As conditional instructions can trap on AArch32, add the thinest possible emulation layer to keep 32bit guests happy. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 27b190bd9fbfee34536cb858f0b5924d294aac38) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_emulate.h | 13 ++- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/emulate.c | 158 +++++++++++++++++++++++++++ 3 files changed, 170 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/kvm/emulate.c diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 20a1a3931d8d..eec073875218 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -31,6 +31,9 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); + void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); @@ -57,12 +60,18 @@ static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) { - return true; /* No conditionals on arm64 */ + if (vcpu_mode_is_32bit(vcpu)) + return kvm_condition_valid32(vcpu); + + return true; } static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) { - *vcpu_pc(vcpu) += 4; + if (vcpu_mode_is_32bit(vcpu)) + kvm_skip_instr32(vcpu, is_wide_instr); + else + *vcpu_pc(vcpu) += 4; } static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a2169ec8d93b..72a9fd583ad3 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -15,7 +15,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o +kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c new file mode 100644 index 000000000000..124418d17049 --- /dev/null +++ b/arch/arm64/kvm/emulate.c @@ -0,0 +1,158 @@ +/* + * (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + + if (esr & ESR_EL2_CV) + return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT; + + return -1; +} + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & COMPAT_PSR_T_BIT); + + BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK)); + + if (!(cpsr & COMPAT_PSR_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~COMPAT_PSR_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT); + if (is_thumb && !is_wide_instr) + *vcpu_pc(vcpu) += 2; + else + *vcpu_pc(vcpu) += 4; + kvm_adjust_itstate(vcpu); +} From 33056d384f68642d62ba6dc6fe0a17ef92473135 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Feb 2013 10:32:33 +0000 Subject: [PATCH 029/287] arm64: KVM: 32bit handling of coprocessor traps Provide the necessary infrastructure to trap coprocessor accesses that occur when running 32bit guests. Also wire SMC and HVC trapped in 32bit mode while were at it. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 62a89c44954f09072bf07a714c8f68bda14ab87e) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_coproc.h | 5 + arch/arm64/kvm/handle_exit.c | 7 ++ arch/arm64/kvm/sys_regs.c | 181 ++++++++++++++++++++++++++-- 3 files changed, 186 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 9b4477acb554..9a59301cd014 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -32,11 +32,16 @@ struct kvm_sys_reg_table { struct kvm_sys_reg_target_table { struct kvm_sys_reg_table table64; + struct kvm_sys_reg_table table32; }; void kvm_register_target_sys_reg_table(unsigned int target, struct kvm_sys_reg_target_table *table); +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); #define kvm_coproc_table_init kvm_sys_reg_table_init diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 4766b7f3515e..9beaca033437 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -62,6 +62,13 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) static exit_handle_fn arm_exit_handlers[] = { [ESR_EL2_EC_WFI] = kvm_handle_wfi, + [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, + [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, + [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store, + [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_access, + [ESR_EL2_EC_HVC32] = handle_hvc, + [ESR_EL2_EC_SMC32] = handle_smc, [ESR_EL2_EC_HVC64] = handle_hvc, [ESR_EL2_EC_SMC64] = handle_smc, [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 52fff0ae3442..94923609753b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -38,6 +38,10 @@ * types are different. My gut feeling is that it should be pretty * easy to merge, but that would be an ABI breakage -- again. VFP * would also need to be abstracted. + * + * For AArch32, we only take care of what is being trapped. Anything + * that has to do with init and userspace access has to go via the + * 64bit interface. */ /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ @@ -166,6 +170,16 @@ static const struct sys_reg_desc sys_reg_descs[] = { { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), access_dcsw }, + /* TEECR32_EL1 */ + { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, reset_val, TEECR32_EL1, 0 }, + /* TEEHBR32_EL1 */ + { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), + NULL, reset_val, TEEHBR32_EL1, 0 }, + /* DBGVCR32_EL2 */ + { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), + NULL, reset_val, DBGVCR32_EL2, 0 }, + /* MPIDR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101), NULL, reset_mpidr, MPIDR_EL1 }, @@ -276,6 +290,39 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* TPIDRRO_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), NULL, reset_unknown, TPIDRRO_EL0 }, + + /* DACR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, DACR32_EL2 }, + /* IFSR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001), + NULL, reset_unknown, IFSR32_EL2 }, + /* FPEXC32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000), + NULL, reset_val, FPEXC32_EL2, 0x70 }, +}; + +/* Trapped cp15 registers */ +static const struct sys_reg_desc cp15_regs[] = { + /* + * DC{C,I,CI}SW operations: + */ + { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, }; /* Target specific emulation tables */ @@ -288,13 +335,20 @@ void kvm_register_target_sys_reg_table(unsigned int target, } /* Get specific register table for this target. */ -static const struct sys_reg_desc *get_target_table(unsigned target, size_t *num) +static const struct sys_reg_desc *get_target_table(unsigned target, + bool mode_is_64, + size_t *num) { struct kvm_sys_reg_target_table *table; table = target_tables[target]; - *num = table->table64.num; - return table->table64.table; + if (mode_is_64) { + *num = table->table64.num; + return table->table64.table; + } else { + *num = table->table32.num; + return table->table32.table; + } } static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, @@ -322,13 +376,126 @@ static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, return NULL; } +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + +int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + +static void emulate_cp15(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + size_t num; + const struct sys_reg_desc *table, *r; + + table = get_target_table(vcpu->arch.target, false, &num); + + /* Search target-specific then generic table. */ + r = find_reg(params, table, num); + if (!r) + r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs)); + + if (likely(r)) { + /* + * Not having an accessor means that we have + * configured a trap that we don't know how to + * handle. This certainly qualifies as a gross bug + * that should be fixed right away. + */ + BUG_ON(!r->access); + + if (likely(r->access(vcpu, params, r))) { + /* Skip instruction, since it was emulated */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return; + } + /* If access function fails, it should complain. */ + } + + kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); +} + +/** + * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + u32 hsr = kvm_vcpu_get_hsr(vcpu); + int Rt2 = (hsr >> 10) & 0xf; + + params.CRm = (hsr >> 1) & 0xf; + params.Rt = (hsr >> 5) & 0xf; + params.is_write = ((hsr & 1) == 0); + + params.Op0 = 0; + params.Op1 = (hsr >> 16) & 0xf; + params.Op2 = 0; + params.CRn = 0; + + /* + * Massive hack here. Store Rt2 in the top 32bits so we only + * have one register to deal with. As we use the same trap + * backends between AArch32 and AArch64, we get away with it. + */ + if (params.is_write) { + u64 val = *vcpu_reg(vcpu, params.Rt); + val &= 0xffffffff; + val |= *vcpu_reg(vcpu, Rt2) << 32; + *vcpu_reg(vcpu, params.Rt) = val; + } + + emulate_cp15(vcpu, ¶ms); + + /* Do the opposite hack for the read side */ + if (!params.is_write) { + u64 val = *vcpu_reg(vcpu, params.Rt); + val >>= 32; + *vcpu_reg(vcpu, Rt2) = val; + } + + return 1; +} + +/** + * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + params.CRm = (hsr >> 1) & 0xf; + params.Rt = (hsr >> 5) & 0xf; + params.is_write = ((hsr & 1) == 0); + params.CRn = (hsr >> 10) & 0xf; + params.Op0 = 0; + params.Op1 = (hsr >> 14) & 0x7; + params.Op2 = (hsr >> 17) & 0x7; + + emulate_cp15(vcpu, ¶ms); + return 1; +} + static int emulate_sys_reg(struct kvm_vcpu *vcpu, const struct sys_reg_params *params) { size_t num; const struct sys_reg_desc *table, *r; - table = get_target_table(vcpu->arch.target, &num); + table = get_target_table(vcpu->arch.target, true, &num); /* Search target-specific then generic table. */ r = find_reg(params, table, num); @@ -438,7 +605,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if (!index_to_params(id, ¶ms)) return NULL; - table = get_target_table(vcpu->arch.target, &num); + table = get_target_table(vcpu->arch.target, true, &num); r = find_reg(¶ms, table, num); if (!r) r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); @@ -762,7 +929,7 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) size_t num; /* We check for duplicates here, to allow arch-specific overrides. */ - i1 = get_target_table(vcpu->arch.target, &num); + i1 = get_target_table(vcpu->arch.target, true, &num); end1 = i1 + num; i2 = sys_reg_descs; end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); @@ -874,7 +1041,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) /* Generic chip reset first (so target could override). */ reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); - table = get_target_table(vcpu->arch.target, &num); + table = get_target_table(vcpu->arch.target, true, &num); reset_sys_reg_descs(vcpu, table, num); for (num = 1; num < NR_SYS_REGS; num++) From a6df8b5dd61c84f691c41663a1d2bb0e05342053 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Feb 2013 10:50:18 +0000 Subject: [PATCH 030/287] arm64: KVM: CPU specific 32bit coprocessor access Enable handling of CPU specific 32bit coprocessor access. Not much here either. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 06c7654d2fb8bac7b1af4340ad59434a5d89b86a) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs_generic_v8.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index d4e803907312..4268ab9356b1 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -59,11 +59,21 @@ static const struct sys_reg_desc genericv8_sys_regs[] = { access_actlr, reset_actlr, ACTLR_EL1 }, }; +static const struct sys_reg_desc genericv8_cp15_regs[] = { + /* ACTLR */ + { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), + access_actlr }, +}; + static struct kvm_sys_reg_target_table genericv8_target_table = { .table64 = { .table = genericv8_sys_regs, .num = ARRAY_SIZE(genericv8_sys_regs), }, + .table32 = { + .table = genericv8_cp15_regs, + .num = ARRAY_SIZE(genericv8_cp15_regs), + }, }; static int __init sys_reg_genericv8_init(void) From ad0ed2f67ccf23f49c201cb05c542d46e04fb9c1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Feb 2013 10:52:10 +0000 Subject: [PATCH 031/287] arm64: KVM: 32bit specific register world switch Allow registers specific to 32bit guests to be saved/restored during the world switch. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit b4afad06c19e3489767532f86ff453a1d1e28b8c) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 70 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 8b510835b440..ff985e3d8b72 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -266,6 +266,74 @@ __kvm_hyp_code_start: msr cntkctl_el1, x23 .endm +.macro skip_32bit_state tmp, target + // Skip 32bit state if not needed + mrs \tmp, hcr_el2 + tbnz \tmp, #HCR_RW_SHIFT, \target +.endm + +.macro skip_tee_state tmp, target + // Skip ThumbEE state if not needed + mrs \tmp, id_pfr0_el1 + tbz \tmp, #12, \target +.endm + +.macro save_guest_32bit_state + skip_32bit_state x3, 1f + + add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) + mrs x4, spsr_abt + mrs x5, spsr_und + mrs x6, spsr_irq + mrs x7, spsr_fiq + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) + mrs x4, dacr32_el2 + mrs x5, ifsr32_el2 + mrs x6, fpexc32_el2 + mrs x7, dbgvcr32_el2 + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + skip_tee_state x8, 1f + + add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) + mrs x4, teecr32_el1 + mrs x5, teehbr32_el1 + stp x4, x5, [x3] +1: +.endm + +.macro restore_guest_32bit_state + skip_32bit_state x3, 1f + + add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + msr spsr_abt, x4 + msr spsr_und, x5 + msr spsr_irq, x6 + msr spsr_fiq, x7 + + add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + msr dacr32_el2, x4 + msr ifsr32_el2, x5 + msr fpexc32_el2, x6 + msr dbgvcr32_el2, x7 + + skip_tee_state x8, 1f + + add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) + ldp x4, x5, [x3] + msr teecr32_el1, x4 + msr teehbr32_el1, x5 +1: +.endm + .macro activate_traps ldr x2, [x0, #VCPU_IRQ_LINES] ldr x1, [x0, #VCPU_HCR_EL2] @@ -494,6 +562,7 @@ ENTRY(__kvm_vcpu_run) bl __restore_sysregs bl __restore_fpsimd + restore_guest_32bit_state restore_guest_regs // That's it, no more messing around. @@ -509,6 +578,7 @@ __kvm_vcpu_return: save_guest_regs bl __save_fpsimd bl __save_sysregs + save_guest_32bit_state save_timer_state save_vgic_state From 31960c92ba1763c3c5db1d09fb0f4547a39c8d4d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 11:29:35 +0000 Subject: [PATCH 032/287] arm64: KVM: 32bit guest fault injection Add fault injection capability for 32bit guests. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit e82e030556e42e823e174e0c3bd97988d1a09d1f) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/inject_fault.c | 79 ++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 54f656271266..81a02a8762b0 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -1,5 +1,5 @@ /* - * Fault injection for 64bit guests. + * Fault injection for both 32 and 64bit guests. * * Copyright (C) 2012,2013 - ARM Ltd * Author: Marc Zyngier @@ -29,6 +29,74 @@ PSR_I_BIT | PSR_D_BIT) #define EL1_EXCEPT_SYNC_OFFSET 0x200 +static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long cpsr; + unsigned long new_spsr_value = *vcpu_cpsr(vcpu); + bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); + u32 return_offset = (is_thumb) ? 4 : 0; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + + cpsr = mode | COMPAT_PSR_I_BIT; + + if (sctlr & (1 << 30)) + cpsr |= COMPAT_PSR_T_BIT; + if (sctlr & (1 << 25)) + cpsr |= COMPAT_PSR_E_BIT; + + *vcpu_cpsr(vcpu) = cpsr; + + /* Note: These now point to the banked copies */ + *vcpu_spsr(vcpu) = new_spsr_value; + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += vcpu_cp15(vcpu, c12_VBAR); + + *vcpu_pc(vcpu) = vect_offset; +} + +static void inject_undef32(struct kvm_vcpu *vcpu) +{ + prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 vect_offset; + u32 *far, *fsr; + bool is_lpae; + + if (is_pabt) { + vect_offset = 12; + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vect_offset = 16; + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset); + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) + *fsr = 1 << 9 | 0x34; + else + *fsr = 0x14; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -98,6 +166,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu) */ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_abt32(vcpu, false, addr); + inject_abt64(vcpu, false, addr); } @@ -111,6 +182,9 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) */ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_abt32(vcpu, true, addr); + inject_abt64(vcpu, true, addr); } @@ -122,5 +196,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) */ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_undef32(vcpu); + inject_undef64(vcpu); } From 80e531580f7bbb453119afd17bc752d9763676b5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Feb 2013 10:46:46 +0000 Subject: [PATCH 033/287] arm64: KVM: enable initialization of a 32bit vcpu Wire the init of a 32bit vcpu by allowing 32bit modes in pstate, and providing sensible defaults out of reset state. This feature is of course conditioned by the presence of 32bit capability on the physical CPU, and is checked by the KVM_CAP_ARM_EL1_32BIT capability. Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 0d854a60b1d7d39a37b25dd28f63cfa0df637b91) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/uapi/asm/kvm.h | 1 + arch/arm64/kvm/guest.c | 6 ++++++ arch/arm64/kvm/reset.c | 26 +++++++++++++++++++++++++- include/uapi/linux/kvm.h | 1 + 5 files changed, 34 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3f5830b3ca3f..644d73956864 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -34,7 +34,7 @@ #include #include -#define KVM_VCPU_MAX_FEATURES 1 +#define KVM_VCPU_MAX_FEATURES 2 /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 5b1110c49df5..5031f4263937 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -75,6 +75,7 @@ struct kvm_regs { #define KVM_VGIC_V2_CPU_SIZE 0x2000 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ +#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ struct kvm_vcpu_init { __u32 target; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3d7518a7ebaa..2c3ff67a8ecb 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -99,6 +99,12 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK; switch (mode) { + case COMPAT_PSR_MODE_USR: + case COMPAT_PSR_MODE_FIQ: + case COMPAT_PSR_MODE_IRQ: + case COMPAT_PSR_MODE_SVC: + case COMPAT_PSR_MODE_ABT: + case COMPAT_PSR_MODE_UND: case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 766150ac76ed..70a7816535cd 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -38,16 +38,32 @@ static const struct kvm_regs default_regs_reset = { PSR_F_BIT | PSR_D_BIT), }; +static const struct kvm_regs default_regs_reset32 = { + .regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT | + COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), +}; + static const struct kvm_irq_level default_vtimer_irq = { .irq = 27, .level = 1, }; +static bool cpu_has_32bit_el1(void) +{ + u64 pfr0; + + pfr0 = read_cpuid(ID_AA64PFR0_EL1); + return !!(pfr0 & 0x20); +} + int kvm_arch_dev_ioctl_check_extension(long ext) { int r; switch (ext) { + case KVM_CAP_ARM_EL1_32BIT: + r = cpu_has_32bit_el1(); + break; default: r = 0; } @@ -70,7 +86,15 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) switch (vcpu->arch.target) { default: - cpu_reset = &default_regs_reset; + if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { + if (!cpu_has_32bit_el1()) + return -EINVAL; + cpu_reset = &default_regs_reset32; + vcpu->arch.hcr_el2 &= ~HCR_RW; + } else { + cpu_reset = &default_regs_reset; + } + cpu_vtimer_irq = &default_vtimer_irq; break; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 97277d333e82..acccd08be6c7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_MPIC 90 #define KVM_CAP_PPC_RTAS 91 #define KVM_CAP_IRQ_XICS 92 +#define KVM_CAP_ARM_EL1_32BIT 93 #ifdef KVM_CAP_IRQ_ROUTING From d3845bf4d5f6b104ec346dabf58817aa9d66d740 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Apr 2013 17:46:31 +0100 Subject: [PATCH 034/287] arm64: KVM: userspace API documentation Unsurprisingly, the arm64 userspace API is extremely similar to the 32bit one, the only significant difference being the ONE_REG register mapping. Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 379e04c79e8a9ded8a202f1e266f0c5830185bea) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 58 +++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 5f91eda91647..9bfadeb8be31 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -280,7 +280,7 @@ kvm_run' (see below). 4.11 KVM_GET_REGS Capability: basic -Architectures: all except ARM +Architectures: all except ARM, arm64 Type: vcpu ioctl Parameters: struct kvm_regs (out) Returns: 0 on success, -1 on error @@ -301,7 +301,7 @@ struct kvm_regs { 4.12 KVM_SET_REGS Capability: basic -Architectures: all except ARM +Architectures: all except ARM, arm64 Type: vcpu ioctl Parameters: struct kvm_regs (in) Returns: 0 on success, -1 on error @@ -587,7 +587,7 @@ struct kvm_fpu { 4.24 KVM_CREATE_IRQCHIP Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64, ARM +Architectures: x86, ia64, ARM, arm64 Type: vm ioctl Parameters: none Returns: 0 on success, -1 on error @@ -595,14 +595,14 @@ Returns: 0 on success, -1 on error Creates an interrupt controller model in the kernel. On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 -only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM, a GIC is +only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is created. 4.25 KVM_IRQ_LINE Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64, arm +Architectures: x86, ia64, arm, arm64 Type: vm ioctl Parameters: struct kvm_irq_level Returns: 0 on success, -1 on error @@ -612,9 +612,10 @@ On some architectures it is required that an interrupt controller model has been previously created with KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level to be set to 1 and then back to 0. -ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip -(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for -specific cpus. The irq field is interpreted like this: +ARM/arm64 can signal an interrupt either at the CPU level, or at the +in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to +use PPIs designated for specific cpus. The irq field is interpreted +like this:  bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | field: | irq_type | vcpu_index | irq_id | @@ -1831,6 +1832,22 @@ ARM 32-bit VFP control registers have the following id bit patterns: ARM 64-bit FP registers have the following id bit patterns: 0x4030 0000 0012 0 + +arm64 registers are mapped using the lower 32 bits. The upper 16 of +that is the register group type, or coprocessor number: + +arm64 core/FP-SIMD registers have the following id bit patterns. Note +that the size of the access is variable, as the kvm_regs structure +contains elements ranging from 32 to 128 bits. The index is a 32bit +value in the kvm_regs structure seen as a 32bit array. + 0x60x0 0000 0010 + +arm64 CCSIDR registers are demultiplexed by CSSELR value: + 0x6020 0000 0011 00 + +arm64 system registers have the following id bit patterns: + 0x6030 0000 0013 + 4.69 KVM_GET_ONE_REG Capability: KVM_CAP_ONE_REG @@ -2264,7 +2281,7 @@ current state. "addr" is ignored. 4.77 KVM_ARM_VCPU_INIT Capability: basic -Architectures: arm +Architectures: arm, arm64 Type: vcpu ioctl Parameters: struct struct kvm_vcpu_init (in) Returns: 0 on success; -1 on error @@ -2283,12 +2300,14 @@ should be created before this ioctl is invoked. Possible features: - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. Depends on KVM_CAP_ARM_PSCI. + - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode. + Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). 4.78 KVM_GET_REG_LIST Capability: basic -Architectures: arm +Architectures: arm, arm64 Type: vcpu ioctl Parameters: struct kvm_reg_list (in/out) Returns: 0 on success; -1 on error @@ -2308,7 +2327,7 @@ KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 4.80 KVM_ARM_SET_DEVICE_ADDR Capability: KVM_CAP_ARM_SET_DEVICE_ADDR -Architectures: arm +Architectures: arm, arm64 Type: vm ioctl Parameters: struct kvm_arm_device_address (in) Returns: 0 on success, -1 on error @@ -2329,18 +2348,19 @@ can access emulated or directly exposed devices, which the host kernel needs to know about. The id field is an architecture specific identifier for a specific device. -ARM divides the id field into two parts, a device id and an address type id -specific to the individual device. +ARM/arm64 divides the id field into two parts, a device id and an +address type id specific to the individual device.  bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | field: | 0x00000000 | device id | addr type id | -ARM currently only require this when using the in-kernel GIC support for the -hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id. When -setting the base address for the guest's mapping of the VGIC virtual CPU -and distributor interface, the ioctl must be called after calling -KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling -this ioctl twice for any of the base addresses will return -EEXIST. +ARM/arm64 currently only require this when using the in-kernel GIC +support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 +as the device id. When setting the base address for the guest's +mapping of the VGIC virtual CPU and distributor interface, the ioctl +must be called after calling KVM_CREATE_IRQCHIP, but before calling +KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the +base addresses will return -EEXIST. 4.82 KVM_PPC_RTAS_DEFINE_TOKEN From 1a12f6e7bcaaf0ec1a6a01fe5fb9341eb2f74ffe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Apr 2013 17:49:40 +0100 Subject: [PATCH 035/287] arm64: KVM: MAINTAINERS update Elect myself as the KVM/arm64 maintainer. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 6394a3ec02ab39147aab9ea56d0dabafd3dcae60) Signed-off-by: Christoffer Dall --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 48c748080c96..823010fce98a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4719,6 +4719,15 @@ F: arch/arm/include/uapi/asm/kvm* F: arch/arm/include/asm/kvm* F: arch/arm/kvm/ +KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) +M: Marc Zyngier +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: kvmarm@lists.cs.columbia.edu +S: Maintained +F: arch/arm64/include/uapi/asm/kvm* +F: arch/arm64/include/asm/kvm* +F: arch/arm64/kvm/ + KEXEC M: Eric Biederman W: http://kernel.org/pub/linux/utils/kernel/kexec/ From 092f9fbf3845c1c6e0bb14f3eae1be583c071a7d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 2 May 2013 14:31:03 +0100 Subject: [PATCH 036/287] arm64: KVM: document kernel object mappings in HYP HYP mode has access to some of the kernel pages. Document the memory mapping and the offset between kernel VA and HYP VA. Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit aa4a73a0a23a65a2f531d01f1865d1e61c6acb55) Signed-off-by: Christoffer Dall --- Documentation/arm64/memory.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index c6941f815f15..d50fa618371b 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -102,3 +102,10 @@ Translation table lookup with 64KB pages: | | +--------------------------> [41:29] L2 index (only 38:29 used) | +-------------------------------> [47:42] L1 index (not used) +-------------------------------------------------> [63] TTBR0/1 + +When using KVM, the hypervisor maps kernel pages in EL2, at a fixed +offset from the kernel VA (top 24bits of the kernel VA set to zero): + +Start End Size Use +----------------------------------------------------------------------- +0000004000000000 0000007fffffffff 256GB kernel objects mapped in HYP From 146844e0e0dc7bcb2a080a2fdfd792674bff105b Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 30 Apr 2013 12:02:15 +0530 Subject: [PATCH 037/287] ARM: KVM: Allow host virt timer irq to be different from guest timer virt irq The arch_timer irq numbers (or PPI numbers) are implementation dependent, so the host virtual timer irq number can be different from guest virtual timer irq number. This patch ensures that host virtual timer irq number is read from DTB and guest virtual timer irq is determined based on vcpu target type. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit 5ae7f87a56fab10b8f9b135a8377c144397293ca) Signed-off-by: Christoffer Dall --- arch/arm/kvm/reset.c | 12 ++++++++++++ include/kvm/arm_arch_timer.h | 4 ++++ virt/kvm/arm/arch_timer.c | 29 ++++++++++++++++++++--------- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b80256b554cd..b7840e7aa452 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -27,6 +27,8 @@ #include #include +#include + /****************************************************************************** * Cortex-A15 Reset Values */ @@ -37,6 +39,11 @@ static struct kvm_regs a15_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; +static const struct kvm_irq_level a15_vtimer_irq = { + .irq = 27, + .level = 1, +}; + /******************************************************************************* * Exported reset function @@ -52,6 +59,7 @@ static struct kvm_regs a15_regs_reset = { int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_regs *cpu_reset; + const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A15: @@ -59,6 +67,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) return -EINVAL; cpu_reset = &a15_regs_reset; vcpu->arch.midr = read_cpuid_id(); + cpu_vtimer_irq = &a15_vtimer_irq; break; default: return -ENODEV; @@ -70,5 +79,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); + /* Reset arch_timer context */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return 0; } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 68cb9e1dfb81..6d9aeddc09bf 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -61,6 +61,8 @@ struct arch_timer_cpu { #ifdef CONFIG_KVM_ARM_TIMER int kvm_timer_hyp_init(void); int kvm_timer_init(struct kvm *kvm); +void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); @@ -76,6 +78,8 @@ static inline int kvm_timer_init(struct kvm *kvm) return 0; } +static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq) {} static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 6f485eaf643b..c2e1ef4604e8 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -30,9 +30,7 @@ static struct timecounter *timecounter; static struct workqueue_struct *wqueue; -static struct kvm_irq_level timer_irq = { - .level = 1, -}; +static unsigned int host_vtimer_irq; static cycle_t kvm_phys_timer_read(void) { @@ -67,8 +65,8 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - vcpu->arch.timer_cpu.irq->irq, - vcpu->arch.timer_cpu.irq->level); + timer->irq->irq, + timer->irq->level); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) @@ -156,6 +154,20 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) timer_arm(timer, ns); } +void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + /* + * The vcpu timer irq number cannot be determined in + * kvm_timer_vcpu_init() because it is called much before + * kvm_vcpu_set_target(). To handle this, we determine + * vcpu timer irq number when the vcpu is reset. + */ + timer->irq = irq; +} + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -163,12 +175,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->timer.function = kvm_timer_expire; - timer->irq = &timer_irq; } static void kvm_timer_init_interrupt(void *info) { - enable_percpu_irq(timer_irq.irq, 0); + enable_percpu_irq(host_vtimer_irq, 0); } @@ -182,7 +193,7 @@ static int kvm_timer_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(timer_irq.irq); + disable_percpu_irq(host_vtimer_irq); break; } @@ -230,7 +241,7 @@ int kvm_timer_hyp_init(void) goto out; } - timer_irq.irq = ppi; + host_vtimer_irq = ppi; err = register_cpu_notifier(&kvm_timer_cpu_nb); if (err) { From 2da084abe92d8aa40c8972c461114528c336ab54 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Wed, 1 May 2013 17:49:28 +0100 Subject: [PATCH 038/287] ARM: KVM: Don't handle PSCI calls via SMC Currently, kvmtool unconditionally declares that HVC should be used to call PSCI, so the function numbers in the DT tell the guest nothing about the function ID namespace or calling convention for SMC. We already assume that the guest will examine and honour the DT, since there is no way it could possibly guess the KVM-specific PSCI function IDs otherwise. So let's not encourage guests to violate what's specified in the DT by using SMC to make the call. [ Modified to apply to top of kvm/arm tree - Christoffer ] Signed-off-by: Dave P Martin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 24a7f675752e06729589d40a5256970998a21502) Signed-off-by: Christoffer Dall --- arch/arm/kvm/handle_exit.c | 3 --- arch/arm/kvm/psci.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 3d74a0be47db..df4c82d47ad7 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -52,9 +52,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; - kvm_inject_undefined(vcpu); return 1; } diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 7ee5bb7a3667..86a693a02ba3 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * kvm_psci_call - handle PSCI call if r0 value is in range * @vcpu: Pointer to the VCPU struct * - * Handle PSCI calls from guests through traps from HVC or SMC instructions. + * Handle PSCI calls from guests through traps from HVC instructions. * The calling convention is similar to SMC calls to the secure world where * the function number is placed in r0 and this function returns true if the * function number specified in r0 is withing the PSCI range, and false From fd741ad3dd186a0df45994059de3a08517a856bc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 12:11:35 +0100 Subject: [PATCH 039/287] ARM: KVM: remove dead prototype for __kvm_tlb_flush_vmid __kvm_tlb_flush_vmid has been renamed to __kvm_tlb_flush_vmid_ipa, and the old prototype should have been removed when the code was modified. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 368074d908b785588778f00b4384376cd636f4a1) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_asm.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 4bb08e3e52bc..a2f43ddcc300 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -74,8 +74,6 @@ extern char __kvm_hyp_vector[]; extern char __kvm_hyp_code_start[]; extern char __kvm_hyp_code_end[]; -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); - extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); From 3be39e7f5aaca996a006717658c517650f345476 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 12:11:37 +0100 Subject: [PATCH 040/287] ARM: KVM: use phys_addr_t instead of unsigned long long for HYP PGDs HYP PGDs are passed around as phys_addr_t, except just before calling into the hypervisor init code, where they are cast to a rather weird unsigned long long. Just keep them around as phys_addr_t, which is what makes the most sense. Reported-by: Catalin Marinas Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit dac288f7b38a7439502b77dabcdf8a9a5c4ae721) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 4 ++-- arch/arm/kvm/arm.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index ff5aaf10e6ec..1f3cee2e210e 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -190,8 +190,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, - unsigned long long pgd_ptr, +static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ef1703b9587b..741f66a2edbd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -800,8 +800,8 @@ long kvm_arch_vm_ioctl(struct file *filp, static void cpu_init_hyp_mode(void *dummy) { - unsigned long long boot_pgd_ptr; - unsigned long long pgd_ptr; + phys_addr_t boot_pgd_ptr; + phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; @@ -809,8 +809,8 @@ static void cpu_init_hyp_mode(void *dummy) /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); - boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); - pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); + boot_pgd_ptr = kvm_mmu_get_boot_httbr(); + pgd_ptr = kvm_mmu_get_httbr(); stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; From a7232858be9e29371ef8ed2a39658443344b7765 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 12:11:38 +0100 Subject: [PATCH 041/287] ARM: KVM: don't special case PC when doing an MMIO Admitedly, reading a MMIO register to load PC is very weird. Writing PC to a MMIO register is probably even worse. But the architecture doesn't forbid any of these, and injecting a Prefetch Abort is the wrong thing to do anyway. Remove this check altogether, and let the adventurous guest wander into LaLaLand if they feel compelled to do so. Reported-by: Catalin Marinas Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 8734f16fb2aa4ff0bb57ad6532661a38bc8ff957) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 ----- arch/arm/kvm/mmio.c | 6 ------ 2 files changed, 11 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 82b4babead2c..a464e8d7b6c5 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) return cpsr_mode > USR_MODE;; } -static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg) -{ - return reg == 15; -} - static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu) { return vcpu->arch.fault.hsr; diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 72a12f2171b2..b8e06b7a2833 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -86,12 +86,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - if (kvm_vcpu_reg_is_pc(vcpu, rt)) { - /* IO memory trying to read/write pc */ - kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - mmio->is_write = is_write; mmio->phys_addr = fault_ipa; mmio->len = len; From 99f2cf12576cffa1a0168b0cc05216de54ca12f4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 12:11:39 +0100 Subject: [PATCH 042/287] ARM: KVM: get rid of S2_PGD_SIZE S2_PGD_SIZE defines the number of pages used by a stage-2 PGD and is unused, except for a VM_BUG_ON check that missuses the define. As the check is very unlikely to ever triggered except in circumstances where KVM is the least of our worries, just kill both the define and the VM_BUG_ON check. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 4db845c3d8e2f8a219e8ac48834dd4fe085e5d63) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 1 - arch/arm/kvm/mmu.c | 3 --- 2 files changed, 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 124623e5ef14..64e96960de29 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -135,7 +135,6 @@ #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) #define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) -#define S2_PGD_SIZE (1 << S2_PGD_ORDER) /* Virtualization Translation Control Register (VTCR) bits */ #define VTCR_SH0 (3 << 12) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 84ba67b982c0..ca6bea4859b4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -382,9 +382,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (!pgd) return -ENOMEM; - /* stage-2 pgd must be aligned to its size */ - VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); - memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; From d144c1d2cd86381c80187f5ccee5eb27dad6b21b Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 6 Jun 2013 18:02:54 -0700 Subject: [PATCH 043/287] arm/kvm: Cleanup KVM_ARM_MAX_VCPUS logic Commit d21a1c83c7595e387545632e44cd7797b76e19cc (ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally) changed the Kconfig logic for KVM_ARM_MAX_VCPUS to work around a build error arising from the use of KVM_ARM_MAX_VCPUS when CONFIG_KVM=n. The resulting Kconfig logic is a bit awkward and leaves a KVM_ARM_MAX_VCPUS always defined in the kernel config file. This change reverts the Kconfig logic back and adds a simple preprocessor conditional in kvm_host.h to handle when CONFIG_KVM_ARM_MAX_VCPUS is undefined. Signed-off-by: Geoff Levand Signed-off-by: Christoffer Dall (cherry picked from commit f2dda9d829818b055510187059cdfa4ece10c82d) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 5 +++++ arch/arm/kvm/Kconfig | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 1f3cee2e210e..7d22517d8071 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -25,7 +25,12 @@ #include #include +#if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#else +#define KVM_MAX_VCPUS 0 +#endif + #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 370e1a8af6ac..49dd64e579c2 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -41,9 +41,9 @@ config KVM_ARM_HOST Provides host support for ARM processors. config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST - default 4 if KVM_ARM_HOST - default 0 + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 help Static number of max supported virtual CPUs per VM. From 8847470177f0d453fe41bc8de72b89a41a264830 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Jun 2013 22:33:22 +0200 Subject: [PATCH 044/287] ARM: kvm: don't include drivers/virtio/Kconfig The virtio configuration has recently moved and is now visible everywhere. Including the file again from KVM as we used to need earlier now causes dependency problems: warning: (CAIF_VIRTIO && VIRTIO_PCI && VIRTIO_MMIO && REMOTEPROC && RPMSG) selects VIRTIO which has unmet direct dependencies (VIRTUALIZATION) Cc: Christoffer Dall Signed-off-by: Arnd Bergmann Signed-off-by: Christoffer Dall (cherry picked from commit 8bd4ffd6b3a98f00267051dc095076ea2ff06ea8) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 49dd64e579c2..ebf5015508b5 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -67,6 +67,4 @@ config KVM_ARM_TIMER ---help--- Adds support for the Architected Timers in virtual machines -source drivers/virtio/Kconfig - endif # VIRTUALIZATION From 54993349e584212be7d554a2188f6fef3efeb701 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 4 Jul 2013 13:34:32 +0100 Subject: [PATCH 045/287] arm64: KVM: Kconfig integration Finally plug KVM/arm64 into the config system, making it possible to enable KVM support on AArch64 CPUs. Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini (cherry picked from commit c3eb5b14449a0949e9764d39374a2ea63faae14f) Signed-off-by: Christoffer Dall --- arch/arm64/Kconfig | 2 ++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/Kconfig | 51 +++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 arch/arm64/kvm/Kconfig diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 43068cf44c2d..de9eeb43f622 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -275,6 +275,8 @@ source "drivers/Kconfig" source "fs/Kconfig" +source "arch/arm64/kvm/Kconfig" + source "arch/arm64/Kconfig.debug" source "security/Kconfig" diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 49c162c03b69..666e231d410b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig new file mode 100644 index 000000000000..21e90820bd23 --- /dev/null +++ b/arch/arm64/kvm/Kconfig @@ -0,0 +1,51 @@ +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool "Kernel-based Virtual Machine (KVM) support" + select MMU_NOTIFIER + select PREEMPT_NOTIFIERS + select ANON_INODES + select KVM_MMIO + select KVM_ARM_HOST + select KVM_ARM_VGIC + select KVM_ARM_TIMER + ---help--- + Support hosting virtualized guest machines. + + If unsure, say N. + +config KVM_ARM_HOST + bool + ---help--- + Provides host support for ARM processors. + +config KVM_ARM_VGIC + bool + depends on KVM_ARM_HOST && OF + select HAVE_KVM_IRQCHIP + ---help--- + Adds support for a hardware assisted, in-kernel GIC emulation. + +config KVM_ARM_TIMER + bool + depends on KVM_ARM_VGIC + ---help--- + Adds support for the Architected Timers in virtual machines. + +endif # VIRTUALIZATION From a043deb0317044bdeda876d2170de0b24c2bfba8 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 4 Jul 2013 13:40:29 +0900 Subject: [PATCH 046/287] KVM: Introduce kvm_arch_memslots_updated() This is called right after the memslots is updated, i.e. when the result of update_memslots() gets installed in install_new_memslots(). Since the memslots needs to be updated twice when we delete or move a memslot, kvm_arch_commit_memory_region() does not correspond to this exactly. In the following patch, x86 will use this new API to check if the mmio generation has reached its maximum value, in which case mmio sptes need to be flushed out. Signed-off-by: Takuya Yoshikawa Acked-by: Alexander Graf Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini (cherry picked from commit e59dbe09f8e6fb8f6ee19dc79d1a2f14299e4cd2) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 4 ++++ arch/ia64/kvm/kvm-ia64.c | 4 ++++ arch/mips/kvm/kvm_mips.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 5 ++++- 8 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 741f66a2edbd..9c697db2787e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -219,6 +219,10 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5b2dc0d10c8f..bdfd8789b376 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1560,6 +1560,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index dd203e59e6fd..a7b044536de4 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -208,6 +208,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6316ee336e88..ae63ae4a1a5f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -420,6 +420,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return kvmppc_core_create_memslot(slot, npages); } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 698fb826e149..e515b2d4a947 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -981,6 +981,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e8ba99c34180..894003d79d22 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6976,6 +6976,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return -ENOMEM; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8db53cfaccdb..15018c572ac5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -497,6 +497,7 @@ int __kvm_set_memory_region(struct kvm *kvm, void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); +void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b547a1ceecbc..7ca9939ee9a7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -731,7 +731,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, update_memslots(slots, new, kvm->memslots->generation); rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); - return old_memslots; + + kvm_arch_memslots_updated(kvm); + + return old_memslots; } /* From 3b180da00d212c6968ab821dcf85f3776929a44d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 6 Aug 2013 13:50:54 -0700 Subject: [PATCH 047/287] ARM: KVM: Fix unaligned unmap_range leak The unmap_range function did not properly cover the case when the start address was not aligned to PMD_SIZE or PUD_SIZE and an entire pte table or pmd table was cleared, causing us to leak memory when incrementing the addr. The fix is to always move onto the next page table entry boundary instead of adding the full size of the VA range covered by the corresponding table level entry. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit d3840b26614d8ce3db53c98061d9fcb1b9ccb0dd) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index ca6bea4859b4..80a83ec4a9ae 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -132,37 +132,37 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, pmd_t *pmd; pte_t *pte; unsigned long long addr = start, end = start + size; - u64 range; + u64 next; while (addr < end) { pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { - addr += PUD_SIZE; + addr = pud_addr_end(addr, end); continue; } pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { - addr += PMD_SIZE; + addr = pmd_addr_end(addr, end); continue; } pte = pte_offset_kernel(pmd, addr); clear_pte_entry(kvm, pte, addr); - range = PAGE_SIZE; + next = addr + PAGE_SIZE; /* If we emptied the pte, walk back up the ladder */ if (pte_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); - range = PMD_SIZE; + next = pmd_addr_end(addr, end); if (pmd_empty(pmd)) { clear_pud_entry(kvm, pud, addr); - range = PUD_SIZE; + next = pud_addr_end(addr, end); } } - addr += range; + addr = next; } } From ce7fc7403b536adb6b0871f3e4d07a4da08ad63b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Aug 2013 13:05:48 +0100 Subject: [PATCH 048/287] arm64: KVM: fix 2-level page tables unmapping When using 64kB pages, we only have two levels of page tables, meaning that PGD, PUD and PMD are fused. In this case, trying to refcount PUDs and PMDs independently is a a complete disaster, as they are the same. We manage to get it right for the allocation (stage2_set_pte uses {pmd,pud}_none), but the unmapping path clears both pud and pmd refcounts, which fails spectacularly with 2-level page tables. The fix is to avoid calling clear_pud_entry when both the pmd and pud pages are empty. For this, and instead of introducing another pud_empty function, consolidate both pte_empty and pmd_empty into page_empty (the code is actually identical) and use that to also test the validity of the pud. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 979acd5e18c3e5cb7e3308c699d79553af5af8c6) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 80a83ec4a9ae..0988d9e04dd4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -85,6 +85,12 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } +static bool page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { pmd_t *pmd_table = pmd_offset(pud, 0); @@ -103,12 +109,6 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) put_page(virt_to_page(pmd)); } -static bool pmd_empty(pmd_t *pmd) -{ - struct page *pmd_page = virt_to_page(pmd); - return page_count(pmd_page) == 1; -} - static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) { if (pte_present(*pte)) { @@ -118,12 +118,6 @@ static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) } } -static bool pte_empty(pte_t *pte) -{ - struct page *pte_page = virt_to_page(pte); - return page_count(pte_page) == 1; -} - static void unmap_range(struct kvm *kvm, pgd_t *pgdp, unsigned long long start, u64 size) { @@ -153,10 +147,10 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, next = addr + PAGE_SIZE; /* If we emptied the pte, walk back up the ladder */ - if (pte_empty(pte)) { + if (page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); next = pmd_addr_end(addr, end); - if (pmd_empty(pmd)) { + if (page_empty(pmd) && !page_empty(pud)) { clear_pud_entry(kvm, pud, addr); next = pud_addr_end(addr, end); } From ebd362b7533b7866928c9bca7727c48558aedbd2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 7 Jun 2013 11:02:34 +0100 Subject: [PATCH 049/287] arm64: KVM: perform save/restore of PAR_EL1 Not saving PAR_EL1 is an unfortunate oversight. If the guest performs an AT* operation and gets scheduled out before reading the result of the translation from PAREL1, it could become corrupted by another guest or the host. Saving this register is made slightly more complicated as KVM also uses it on the permission fault handling path, leading to an ugly "stash and restore" sequence. Fortunately, this is already a slow path so we don't really care. Also, Linux doesn't do any AT* operation, so Linux guests are not impacted by this bug. Signed-off-by: Marc Zyngier (cherry picked from commit 1bbd80549810637b7381ab0649ba7c7d62f1342a) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 17 ++++++++++------- arch/arm64/kvm/hyp.S | 10 ++++++++++ arch/arm64/kvm/sys_regs.c | 3 +++ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index c92de4163eba..b25763bc0ec4 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -42,14 +42,15 @@ #define TPIDR_EL1 18 /* Thread ID, Privileged */ #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ +#define PAR_EL1 21 /* Physical Address Register */ /* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 21 /* Domain Access Control Register */ -#define IFSR32_EL2 22 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 23 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 24 /* Debug Vector Catch Register */ -#define TEECR32_EL1 25 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 26 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 27 +#define DACR32_EL2 22 /* Domain Access Control Register */ +#define IFSR32_EL2 23 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */ +#define TEECR32_EL1 26 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 28 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ @@ -69,6 +70,8 @@ #define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ #define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ #define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ +#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ #define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ #define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ #define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index ff985e3d8b72..218802f68b20 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -214,6 +214,7 @@ __kvm_hyp_code_start: mrs x21, tpidr_el1 mrs x22, amair_el1 mrs x23, cntkctl_el1 + mrs x24, par_el1 stp x4, x5, [x3] stp x6, x7, [x3, #16] @@ -225,6 +226,7 @@ __kvm_hyp_code_start: stp x18, x19, [x3, #112] stp x20, x21, [x3, #128] stp x22, x23, [x3, #144] + str x24, [x3, #160] .endm .macro restore_sysregs @@ -243,6 +245,7 @@ __kvm_hyp_code_start: ldp x18, x19, [x3, #112] ldp x20, x21, [x3, #128] ldp x22, x23, [x3, #144] + ldr x24, [x3, #160] msr vmpidr_el2, x4 msr csselr_el1, x5 @@ -264,6 +267,7 @@ __kvm_hyp_code_start: msr tpidr_el1, x21 msr amair_el1, x22 msr cntkctl_el1, x23 + msr par_el1, x24 .endm .macro skip_32bit_state tmp, target @@ -753,6 +757,10 @@ el1_trap: */ tbnz x1, #7, 1f // S1PTW is set + /* Preserve PAR_EL1 */ + mrs x3, par_el1 + push x3, xzr + /* * Permission fault, HPFAR_EL2 is invalid. * Resolve the IPA the hard way using the guest VA. @@ -766,6 +774,8 @@ el1_trap: /* Read result */ mrs x3, par_el1 + pop x0, xzr // Restore PAR_EL1 from the stack + msr par_el1, x0 tbnz x3, #0, 3f // Bail out if we failed the translation ubfx x3, x3, #12, #36 // Extract IPA lsl x3, x3, #4 // and present it like HPFAR diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 94923609753b..02e9d09e1d80 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -211,6 +211,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* FAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), NULL, reset_unknown, FAR_EL1 }, + /* PAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000), + NULL, reset_unknown, PAR_EL1 }, /* PMINTENSET_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), From e336bcc2613d7332083819ac8f148313e9471f59 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 11 Jun 2013 18:05:25 +0100 Subject: [PATCH 050/287] arm64: KVM: add missing dsb before invalidating Stage-2 TLBs When performing a Stage-2 TLB invalidation, it is necessary to make sure the write to the page tables is observable by all CPUs. For this purpose, add dsb instructions to __kvm_tlb_flush_vmid_ipa and __kvm_flush_vm_context before doing the TLB invalidation itself. Signed-off-by: Marc Zyngier (cherry picked from commit f142e5eeb724cfbedd203b32b3b542d78dbe2545) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 218802f68b20..1ac0bbbdddb2 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -604,6 +604,8 @@ END(__kvm_vcpu_run) // void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); ENTRY(__kvm_tlb_flush_vmid_ipa) + dsb ishst + kern_hyp_va x0 ldr x2, [x0, #KVM_VTTBR] msr vttbr_el2, x2 @@ -625,6 +627,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) ENDPROC(__kvm_tlb_flush_vmid_ipa) ENTRY(__kvm_flush_vm_context) + dsb ishst tlbi alle1is ic ialluis dsb sy From fbcac5446f1c5be6e54e282e1dbed74c58671a72 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 22 Jul 2013 04:40:38 +0100 Subject: [PATCH 051/287] arm64: KVM: use 'int' instead of 'u32' for variable 'target' in kvm_host.h. 'target' will be set to '-1' in kvm_arch_vcpu_init(), and it need check 'target' whether less than zero or not in kvm_vcpu_initialized(). So need define target as 'int' instead of 'u32', just like ARM has done. The related warning: arch/arm64/kvm/../../../arch/arm/kvm/arm.c:497:2: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits] Signed-off-by: Chen Gang [Marc: reformated the Subject line to fit the series] Signed-off-by: Marc Zyngier (cherry picked from commit 6c8c0c4dc0e98ee2191211d66e9f876e95787073) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 644d73956864..0859a4ddd1e7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -129,7 +129,7 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_cache; /* Target CPU and feature flags */ - u32 target; + int target; DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); /* Detect first run of a vcpu */ From 54118be422ac3ff0af613676d47f8d46cc9f8801 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 29 Jul 2013 20:46:04 -0700 Subject: [PATCH 052/287] KVM: ARM: Squash len warning The 'len' variable was declared an unsigned and then checked for less than 0, which results in warnings on some compilers. Since len is assigned an int, make it an int. Signed-off-by: Christoffer Dall (cherry picked from commit 2184a60de26b94bc5a88de3e5a960ef9ff54ba5a) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index b8e06b7a2833..0c25d9487d53 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -63,7 +63,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_exit_mmio *mmio) { - unsigned long rt, len; + unsigned long rt; + int len; bool is_write, sign_extend; if (kvm_vcpu_dabt_isextabt(vcpu)) { From d829a739332e33ffc9753c607325f461e9c994d8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 13 May 2013 12:08:06 +0100 Subject: [PATCH 053/287] ARM: kvm: use inner-shareable barriers after TLB flushing When flushing the TLB at PL2 in response to remapping at stage-2 or VMID rollover, we have a dsb instruction to ensure completion of the command before continuing. Since we only care about other processors for TLB invalidation, use the inner-shareable variant of the dsb instruction instead. Acked-by: Marc Zyngier Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon (cherry picked from commit e3ab547f57bd626201d4b715b696c80ad1ef4ba2) Signed-off-by: Christoffer Dall --- arch/arm/kvm/init.S | 2 +- arch/arm/kvm/interrupts.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index f048338135f7..1b9844d369cc 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -142,7 +142,7 @@ target: @ We're now in the trampoline code, switch page tables @ Invalidate the old TLBs mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH - dsb + dsb ish eret diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 16cd4ba5d7fd..f85052facffc 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -55,7 +55,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) mcrr p15, 6, r2, r3, c2 @ Write VTTBR isb mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) - dsb + dsb ish isb mov r2, #0 mov r3, #0 @@ -79,7 +79,7 @@ ENTRY(__kvm_flush_vm_context) mcr p15, 4, r0, c8, c3, 4 /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */ mcr p15, 0, r0, c7, c1, 0 - dsb + dsb ish isb @ Not necessary if followed by eret bx lr From 678b5999d8ded1bff60ef8e3bfb95c93d63c5ebc Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 6 Aug 2013 05:34:16 +0100 Subject: [PATCH 054/287] ARM: 7808/1: KVM: mm: Get rid of L_PTE_USER ref from PAGE_S2_DEVICE THe L_PTE_USER actually has nothing to do with stage 2 mappings and the L_PTE_S2_RDWR value sets the readable bit, which was what L_PTE_USER was used for before proper handling of stage 2 memory defines. Changelog: [v3]: Drop call to kvm_set_s2pte_writable in mmu.c [v2]: Change default mappings to be r/w instead of r/o, as per Marc Zyngier's suggestion. Cc: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Russell King (cherry picked from commit 8947c09d05da9f0436f423518f449beaa5ea1bdc) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/pgtable.h | 2 +- arch/arm/kvm/mmu.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 9bcd262a9008..8afc60c55e82 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -97,7 +97,7 @@ extern pgprot_t pgprot_s2_device; #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) -#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_USER | L_PTE_S2_RDONLY) +#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR) #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 0988d9e04dd4..b0de86b56c13 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -489,7 +489,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); - kvm_set_s2pte_writable(&pte); ret = mmu_topup_memory_cache(&cache, 2, 2); if (ret) From effab3928a14e5574f32801eb665ce7c6a98ef14 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Sat, 24 Aug 2013 22:14:07 +0200 Subject: [PATCH 055/287] kvm: use anon_inode_getfd() with O_CLOEXEC flag KVM uses anon_inode_get() to allocate file descriptors as part of some of its ioctls. But those ioctls are lacking a flag argument allowing userspace to choose options for the newly opened file descriptor. In such case it's advised to use O_CLOEXEC by default so that userspace is allowed to choose, without race, if the file descriptor is going to be inherited across exec(). This patch set O_CLOEXEC flag on all file descriptors created with anon_inode_getfd() to not leak file descriptors across exec(). Signed-off-by: Yann Droneaud Link: http://lkml.kernel.org/r/cover.1377372576.git.ydroneaud@opteya.com Reviewed-by: Paolo Bonzini Signed-off-by: Gleb Natapov (cherry picked from commit 24009b0549de563006705b9af8694fc8fc9a5aa1) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7ca9939ee9a7..b5afee319073 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1896,7 +1896,7 @@ static struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); + return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } /* @@ -2305,7 +2305,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return ret; } - ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { ops->destroy(dev); return ret; @@ -2589,7 +2589,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } #endif - r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); + r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); if (r < 0) kvm_put_kvm(kvm); From 4ace5f4542de34fb99068fa2285947d6bbfee26a Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 25 Jul 2013 03:04:38 +0200 Subject: [PATCH 056/287] kvm: optimize away THP checks in kvm_is_mmio_pfn() The checks on PG_reserved in the page structure on head and tail pages aren't necessary because split_huge_page wouldn't transfer the PG_reserved bit from head to tail anyway. This was a forward-thinking check done in the case PageReserved was set by a driver-owned page mapped in userland with something like remap_pfn_range in a VM_PFNMAP region, but using hugepmds (not possible right now). It was meant to be very safe, but it's overkill as it's unlikely split_huge_page could ever run without the driver noticing and tearing down the hugepage itself. And if a driver in the future will really want to map a reserved hugepage in userland using an huge pmd it should simply take care of marking all subpages reserved too to keep KVM safe. This of course would require such a hypothetical driver to tear down the huge pmd itself and splitting the hugepage itself, instead of relaying on split_huge_page, but that sounds very reasonable, especially considering split_huge_page wouldn't currently transfer the reserved bit anyway. Signed-off-by: Andrea Arcangeli Signed-off-by: Gleb Natapov (cherry picked from commit 11feeb498086a3a5907b8148bdf1786a9b18fc55) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b5afee319073..6c9130bd16c8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,28 +102,8 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { - if (pfn_valid(pfn)) { - int reserved; - struct page *tail = pfn_to_page(pfn); - struct page *head = compound_trans_head(tail); - reserved = PageReserved(head); - if (head != tail) { - /* - * "head" is not a dangling pointer - * (compound_trans_head takes care of that) - * but the hugepage may have been splitted - * from under us (and we may not hold a - * reference count on the head page so it can - * be reused before we run PageReferenced), so - * we've to check PageTail before returning - * what we just read. - */ - smp_rmb(); - if (PageTail(tail)) - return reserved; - } - return PageReserved(tail); - } + if (pfn_valid(pfn)) + return PageReserved(pfn_to_page(pfn)); return true; } From 1752d0c7e51f1a5e8d1f021d6b4eef1845c6da06 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Aug 2013 11:08:22 +0100 Subject: [PATCH 057/287] ARM: KVM: vgic: simplify vgic_get_target_reg vgic_get_target_reg is quite complicated, for no good reason. Actually, it is fairly easy to write it in a much more efficient way by using the target CPU array instead of the bitmap. Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 986af8e0789a41ac4844e6eefed4a33e86524918) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 17c5ac7d10ed..a2d478aec046 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -432,19 +432,13 @@ static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, static u32 vgic_get_target_reg(struct kvm *kvm, int irq) { struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; + int i; u32 val = 0; irq -= VGIC_NR_PRIVATE_IRQS; - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) - if (test_bit(irq + i, bmap)) - val |= 1 << (c + i * 8); - } + for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) + val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); return val; } From 635c887f2931a200a52694170122e2b62d1b6218 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Aug 2013 11:08:23 +0100 Subject: [PATCH 058/287] ARM: KVM: vgic: fix GICD_ICFGRn access All the code in handle_mmio_cfg_reg() assumes the offset has been shifted right to accomodate for the 2:1 bit compression, but this is only done when getting the register address. Shift the offset early so the code works mostly unchanged. Reported-by: Zhaobo (Bob, ERC) Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 6545eae3d7a1b6dc2edb8ede9107998aee1207ef) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a2d478aec046..902789ff4abb 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -541,8 +541,12 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { u32 val; - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); + u32 *reg; + + offset >>= 1; + reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, + vcpu->vcpu_id, offset); + if (offset & 2) val = *reg >> 16; else From a144ec826c81facd70fd157aef6f7b7030687f68 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 29 Aug 2013 11:08:24 +0100 Subject: [PATCH 059/287] ARM: KVM: Bugfix: vgic_bytemap_get_reg per cpu regs For bytemaps each IRQ field is 1 byte wide, so we pack 4 irq fields in one word and since there are 32 private (per cpu) irqs, we have 8 private u32 fields on the vgic_bytemap struct. We shift the offset from the base of the register group right by 2, giving us the word index instead of the field index. But then there are 8 private words, not 4, which is also why we subtract 8 words from the offset of the shared words. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 8d98915b6bda499e47d19166101d0bbcfd409c80) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 902789ff4abb..685fc72fc751 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -149,7 +149,7 @@ static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) { offset >>= 2; BUG_ON(offset > (VGIC_NR_IRQS / 4)); - if (offset < 4) + if (offset < 8) return x->percpu[cpuid] + offset; else return x->shared + offset - 8; From de5324b84480246ad6064cd3469d362995c2e929 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 29 Aug 2013 11:08:25 +0100 Subject: [PATCH 060/287] ARM: KVM: vgic: Bump VGIC_NR_IRQS to 256 The Versatile Express TC2 board, which we use as our main emulated platform in QEMU, defines 160+32 == 192 interrupts, so limiting the number of interrupts to 128 is not quite going to cut it for real board emulation. Note that this didn't use to be a problem because QEMU was buggy and only defined 128 interrupts until recently. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 9b2d2e0df8a49414b1e5bc89148c9984dd87782a) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 343744e4809c..7e2d15837b02 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -26,7 +26,7 @@ #include #include -#define VGIC_NR_IRQS 128 +#define VGIC_NR_IRQS 256 #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) From 07557caabbdc70c2644e6f195fd07ca8be7d16b2 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 8 Aug 2013 20:35:07 -0700 Subject: [PATCH 061/287] ARM: KVM: Fix kvm_set_pte assignment THe kvm_set_pte function was actually assigning the entire struct to the structure member, which should work because the structure only has that one member, but it is still not very nice. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 0963e5d0f22f9d197dbf206d8b5b2a150722cf5e) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 472ac7091003..9b28c41f4ba9 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -64,7 +64,7 @@ void kvm_clear_hyp_idmap(void); static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) { - pte_val(*pte) = new_pte; + *pte = new_pte; /* * flush_pmd_entry just takes a void pointer and cleans the necessary * cache entries, so we can reuse the function for ptes. From 14cffe44b8aad9a839fcbcf35f9d2bbd90f572c0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 8 Aug 2013 20:34:22 -0700 Subject: [PATCH 062/287] ARM: KVM: Simplify tracepoint text The tracepoint for kvm_guest_fault was extremely long, make it a slightly bit shorter. Cc: Sergei Shtylyov Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 6e72cc5700fe6b8776d537b736dab64b21ae0f1f) Signed-off-by: Christoffer Dall --- arch/arm/kvm/trace.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index a8e73ed5ad5b..b1d640f78623 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault, __entry->ipa = ipa; ), - TP_printk("guest fault at PC %#08lx (hxfar %#08lx, " - "ipa %#16llx, hsr %#08lx", - __entry->vcpu_pc, __entry->hxfar, - __entry->ipa, __entry->hsr) + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", + __entry->ipa, __entry->hsr, + __entry->hxfar, __entry->vcpu_pc) ); TRACE_EVENT(kvm_irq_line, From 411b0c990125d54d48314d8ecd6e800bc9660509 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 19 Aug 2013 14:16:57 -0700 Subject: [PATCH 063/287] ARM: KVM: Work around older compiler bug Compilers before 4.6 do not behave well with unnamed fields in structure initializers and therefore produces build errors: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10676 By refering to the unnamed union using braces, both older and newer compilers produce the same result. Acked-by: Marc Zyngier Reported-by: Russell King Tested-by: Russell King Signed-off-by: Christoffer Dall (cherry picked from commit 6833d83891140aedab7841589b7c7dbd7b600235) Signed-off-by: Christoffer Dall --- arch/arm/kvm/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b7840e7aa452..71e08baee209 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -40,7 +40,7 @@ static struct kvm_regs a15_regs_reset = { }; static const struct kvm_irq_level a15_vtimer_irq = { - .irq = 27, + { .irq = 27 }, .level = 1, }; From 215ed7558daf77d296ee388f64df34b0d479ea07 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 14 Aug 2013 12:33:48 -0700 Subject: [PATCH 064/287] ARM: KVM: Add newlines to panic strings The panic strings are hard to read and on narrow terminals some characters are simply truncated off the panic message. Make is slightly prettier with a newline in the Hyp panic strings. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 1fe40f6d39d23f39e643607a3e1883bfc74f1244) Signed-off-by: Christoffer Dall --- arch/arm/kvm/interrupts.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index f85052facffc..ddc15539bad2 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -492,10 +492,10 @@ __kvm_hyp_code_end: .section ".rodata" und_die_str: - .ascii "unexpected undefined exception in Hyp mode at: %#08x" + .ascii "unexpected undefined exception in Hyp mode at: %#08x\n" pabt_die_str: - .ascii "unexpected prefetch abort in Hyp mode at: %#08x" + .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n" dabt_die_str: - .ascii "unexpected data abort in Hyp mode at: %#08x" + .ascii "unexpected data abort in Hyp mode at: %#08x\n" svc_die_str: - .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x" + .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n" From 02ef4f0c0d0930d9d44e9929caaee12043447010 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Sep 2013 13:52:33 +0200 Subject: [PATCH 065/287] KVM: mmu: allow page tables to be in read-only slots Page tables in a read-only memory slot will currently cause a triple fault because the page walker uses gfn_to_hva and it fails on such a slot. OVMF uses such a page table; however, real hardware seems to be fine with that as long as the accessed/dirty bits are set. Save whether the slot is readonly, and later check it when updating the accessed and dirty bits. Reviewed-by: Xiao Guangrong Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit ba6a3541545542721ce821d1e7e5ce35752e6fdf) Signed-off-by: Christoffer Dall --- arch/x86/kvm/paging_tmpl.h | 20 +++++++++++++++++++- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 14 +++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index da20860b457a..e1af2394a23f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -69,6 +69,7 @@ struct guest_walker { pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; + bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -130,6 +131,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (pte == orig_pte) continue; + /* + * If the slot is read-only, simply do not process the accessed + * and dirty bits. This is the correct thing to do if the slot + * is ROM, and page tables in read-as-ROM/write-as-MMIO slots + * are only supported if the accessed and dirty bits are already + * set in the ROM (so that MMIO writes are never needed). + * + * Note that NPT does not allow this at all and faults, since + * it always wants nested page table entries for the guest + * page tables to be writable. And EPT works but will simply + * overwrite the read-only memory to set the accessed and dirty + * bits. + */ + if (unlikely(!walker->pte_writable[level - 1])) + continue; + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); if (ret) return ret; @@ -204,7 +221,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, goto error; real_gfn = gpa_to_gfn(real_gfn); - host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, + &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15018c572ac5..e2befc2b1647 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -519,6 +519,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6c9130bd16c8..fcbcf5312f93 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) EXPORT_SYMBOL_GPL(gfn_to_hva); /* - * The hva returned by this function is only allowed to be read. - * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). + * If writable is set to false, the hva returned by this function is only + * allowed to be read. */ -static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) { + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + if (writable) + *writable = !memslot_is_readonly(slot); + return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); } @@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int r; unsigned long addr; - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; r = kvm_read_hva(data, (void __user *)addr + offset, len); @@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, gfn_t gfn = gpa >> PAGE_SHIFT; int offset = offset_in_page(gpa); - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; pagefault_disable(); From ed363014a225b257a32ad69b1ef0da615aecb50a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 4 Sep 2013 22:32:23 +0200 Subject: [PATCH 066/287] kvm: free resources after canceling async_pf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we cancel 'async_pf_execute()', we should behave as if the work was never scheduled in 'kvm_setup_async_pf()'. Fixes a bug when we can't unload module because the vm wasn't destroyed. Signed-off-by: Radim Krčmář Reviewed-by: Paolo Bonzini Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 28b441e24088081c1e213139d1303b451a34a4f4) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index ea475cd03511..8a39dda7a325 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -101,8 +101,11 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) typeof(*work), queue); cancel_work_sync(&work->work); list_del(&work->queue); - if (!work->done) /* work was canceled */ + if (!work->done) { /* work was canceled */ + mmdrop(work->mm); + kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); + } } spin_lock(&vcpu->async_pf.lock); From 78169fda11a0b68310885d8991399bc3a51e5b2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 4 Sep 2013 22:32:24 +0200 Subject: [PATCH 067/287] kvm: remove .done from struct kvm_async_pf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit '.done' is used to mark the completion of 'async_pf_execute()', but 'cancel_work_sync()' returns true when the work was canceled, so we use it instead. Signed-off-by: Radim Krčmář Reviewed-by: Paolo Bonzini Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 98fda169290b3b28c0f2db2b8f02290c13da50ef) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 - virt/kvm/async_pf.c | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e2befc2b1647..dbbd78215204 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -176,7 +176,6 @@ struct kvm_async_pf { unsigned long addr; struct kvm_arch_async_pf arch; struct page *page; - bool done; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8a39dda7a325..b197950ac4d5 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -75,7 +75,6 @@ static void async_pf_execute(struct work_struct *work) spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); apf->page = page; - apf->done = true; spin_unlock(&vcpu->async_pf.lock); /* @@ -99,9 +98,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) struct kvm_async_pf *work = list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); - cancel_work_sync(&work->work); list_del(&work->queue); - if (!work->done) { /* work was canceled */ + if (cancel_work_sync(&work->work)) { mmdrop(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); @@ -166,7 +164,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, return 0; work->page = NULL; - work->done = false; work->vcpu = vcpu; work->gva = gva; work->addr = gfn_to_hva(vcpu->kvm, gfn); From c3832c083abf356e0df1b581df22ee8a21034841 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 11 Sep 2013 15:27:41 -0700 Subject: [PATCH 068/287] ARM: kvm: rename cpu_reset to avoid name clash cpu_reset is already #defined in as processor.reset, so it expands here and causes problems. Cc: Signed-off-by: Olof Johansson Signed-off-by: Christoffer Dall (cherry picked from commit ac570e0493815e0b41681c89cb50d66421429d27) Signed-off-by: Christoffer Dall --- arch/arm/kvm/reset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 71e08baee209..c02ba4af599f 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -58,14 +58,14 @@ static const struct kvm_irq_level a15_vtimer_irq = { */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - struct kvm_regs *cpu_reset; + struct kvm_regs *reset_regs; const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A15: if (vcpu->vcpu_id > a15_max_cpu_idx) return -EINVAL; - cpu_reset = &a15_regs_reset; + reset_regs = &a15_regs_reset; vcpu->arch.midr = read_cpuid_id(); cpu_vtimer_irq = &a15_vtimer_irq; break; @@ -74,7 +74,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } /* Reset core registers */ - memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); From ae0e4b34f8d96028faefddb83994cfd2c8cb8681 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Sep 2013 12:57:17 +0200 Subject: [PATCH 069/287] KVM: cleanup (physical) CPU hotplug Remove the useless argument, and do not do anything if there are no VMs running at the time of the hotplug. Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 4fa92fb25ae5a2d79d872ab54df511c831b1f363) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fcbcf5312f93..7103b989ba54 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2681,10 +2681,11 @@ static void hardware_enable_nolock(void *junk) } } -static void hardware_enable(void *junk) +static void hardware_enable(void) { raw_spin_lock(&kvm_lock); - hardware_enable_nolock(junk); + if (kvm_usage_count) + hardware_enable_nolock(NULL); raw_spin_unlock(&kvm_lock); } @@ -2698,10 +2699,11 @@ static void hardware_disable_nolock(void *junk) kvm_arch_hardware_disable(NULL); } -static void hardware_disable(void *junk) +static void hardware_disable(void) { raw_spin_lock(&kvm_lock); - hardware_disable_nolock(junk); + if (kvm_usage_count) + hardware_disable_nolock(NULL); raw_spin_unlock(&kvm_lock); } @@ -2748,20 +2750,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, { int cpu = (long)v; - if (!kvm_usage_count) - return NOTIFY_OK; - val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); - hardware_disable(NULL); + hardware_disable(); break; case CPU_STARTING: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - hardware_enable(NULL); + hardware_enable(); break; } return NOTIFY_OK; From 753f251708b5322430cf4902264d432a5daf06e4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Sep 2013 12:58:35 +0200 Subject: [PATCH 070/287] KVM: protect kvm_usage_count with its own spinlock The VM list need not be protected by a raw spinlock. Separate the two so that kvm_lock can be made non-raw. Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 4a937f96f3a29c58b7edd349d2e4dfac371efdf2) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/locking.txt | 6 +++++- virt/kvm/kvm_main.c | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index 41b7ac9884b5..b1f5de22d090 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -135,7 +135,11 @@ Name: kvm_lock Type: raw_spinlock Arch: any Protects: - vm_list - - hardware virtualization enable/disable + +Name: kvm_count_lock +Type: raw_spinlock_t +Arch: any +Protects: - hardware virtualization enable/disable Comment: 'raw' because hardware enabling/disabling must be atomic /wrt migration. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7103b989ba54..6bc0481bf8e5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -71,6 +71,7 @@ MODULE_LICENSE("GPL"); */ DEFINE_RAW_SPINLOCK(kvm_lock); +static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; @@ -2683,10 +2684,10 @@ static void hardware_enable_nolock(void *junk) static void hardware_enable(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); if (kvm_usage_count) hardware_enable_nolock(NULL); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static void hardware_disable_nolock(void *junk) @@ -2701,10 +2702,10 @@ static void hardware_disable_nolock(void *junk) static void hardware_disable(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); if (kvm_usage_count) hardware_disable_nolock(NULL); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static void hardware_disable_all_nolock(void) @@ -2718,16 +2719,16 @@ static void hardware_disable_all_nolock(void) static void hardware_disable_all(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); hardware_disable_all_nolock(); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static int hardware_enable_all(void) { int r = 0; - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); kvm_usage_count++; if (kvm_usage_count == 1) { @@ -2740,7 +2741,7 @@ static int hardware_enable_all(void) } } - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); return r; } @@ -3050,7 +3051,7 @@ static int kvm_suspend(void) static void kvm_resume(void) { if (kvm_usage_count) { - WARN_ON(raw_spin_is_locked(&kvm_lock)); + WARN_ON(raw_spin_is_locked(&kvm_count_lock)); hardware_enable_nolock(NULL); } } From 9c904866d4424d18852872878a57a899e48f3838 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 25 Sep 2013 13:53:07 +0200 Subject: [PATCH 071/287] KVM: Convert kvm_lock back to non-raw spinlock In commit e935b8372cf8 ("KVM: Convert kvm_lock to raw_spinlock"), the kvm_lock was made a raw lock. However, the kvm mmu_shrink() function tries to grab the (non-raw) mmu_lock within the scope of the raw locked kvm_lock being held. This leads to the following: BUG: sleeping function called from invalid context at kernel/rtmutex.c:659 in_atomic(): 1, irqs_disabled(): 0, pid: 55, name: kswapd0 Preemption disabled at:[] mmu_shrink+0x5c/0x1b0 [kvm] Pid: 55, comm: kswapd0 Not tainted 3.4.34_preempt-rt Call Trace: [] __might_sleep+0xfd/0x160 [] rt_spin_lock+0x24/0x50 [] mmu_shrink+0xec/0x1b0 [kvm] [] shrink_slab+0x17d/0x3a0 [] ? mem_cgroup_iter+0x130/0x260 [] balance_pgdat+0x54a/0x730 [] ? set_pgdat_percpu_threshold+0xa7/0xd0 [] kswapd+0x18f/0x490 [] ? get_parent_ip+0x11/0x50 [] ? __init_waitqueue_head+0x50/0x50 [] ? balance_pgdat+0x730/0x730 [] kthread+0xdb/0xe0 [] ? finish_task_switch+0x52/0x100 [] kernel_thread_helper+0x4/0x10 [] ? __init_kthread_worker+0x After the previous patch, kvm_lock need not be a raw spinlock anymore, so change it back. Reported-by: Paul Gortmaker Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 2f303b74a62fb74983c0a66e2df353be963c527c) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/locking.txt | 2 +- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/x86.c | 8 ++++---- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 18 +++++++++--------- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index b1f5de22d090..ba035c33d01c 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -132,7 +132,7 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update(). ------------ Name: kvm_lock -Type: raw_spinlock +Type: spinlock_t Arch: any Protects: - vm_list diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 004cc87b781c..3c1877bbfe6a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4220,7 +4220,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) if (nr_to_scan == 0) goto out; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -4256,7 +4256,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) break; } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); out: return percpu_counter_read_positive(&kvm_total_used_mmu_pages); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 894003d79d22..96765c116c26 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5104,7 +5104,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) @@ -5114,7 +5114,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va send_ipi = 1; } } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -5261,12 +5261,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); atomic_set(&kvm_guest_has_master_clock, 0); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dbbd78215204..97e39fc02020 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -133,7 +133,7 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -extern raw_spinlock_t kvm_lock; +extern spinlock_t kvm_lock; extern struct list_head vm_list; struct kvm_io_range { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6bc0481bf8e5..8b47fd241a61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -70,7 +70,7 @@ MODULE_LICENSE("GPL"); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_RAW_SPINLOCK(kvm_lock); +DEFINE_SPINLOCK(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -491,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return kvm; @@ -582,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm) struct mm_struct *mm = kvm->mm; kvm_arch_sync_events(kvm); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_del(&kvm->vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) kvm_io_bus_destroy(kvm->buses[i]); @@ -2974,10 +2974,10 @@ static int vm_stat_get(void *_offset, u64 *val) struct kvm *kvm; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) *val += *(u32 *)((void *)kvm + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } @@ -2991,12 +2991,12 @@ static int vcpu_stat_get(void *_offset, u64 *val) int i; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) *val += *(u32 *)((void *)vcpu + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } From 5c1d6aafed853c88a645ac52134e217389c2cc8a Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Wed, 11 Sep 2013 18:34:22 +0530 Subject: [PATCH 072/287] KVM: ARM: Fix typo in comments of inject_abt() Very minor typo in comments of inject_abt() when we update fault status register for injecting prefetch abort. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit b373e492f3a3469c615c2ae218d2f723900bf981) Signed-off-by: Christoffer Dall --- arch/arm/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index bdede9e7da51..d6c005283678 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; if (is_pabt) { - /* Set DFAR and DFSR */ + /* Set IFAR and IFSR */ vcpu->arch.cp15[c6_IFAR] = addr; is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); /* Always give debug fault for now - should give guest a clue */ From f166457e087b577af2ee72ec843d43ef6653e0f8 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:05 +0530 Subject: [PATCH 073/287] ARM: KVM: Implement kvm_vcpu_preferred_target() function This patch implements kvm_vcpu_preferred_target() function for KVM ARM which will help us implement KVM_ARM_PREFERRED_TARGET ioctl for user space. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit 4a6fee805d5e278e4733bf933cb5b184b7a8be1f) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 1 + arch/arm/kvm/guest.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 7d22517d8071..76f3c1978442 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -154,6 +154,7 @@ struct kvm_vcpu_stat { struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); struct kvm_one_reg; diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 152d03612181..ec98209fda71 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -222,6 +222,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, return kvm_reset_vcpu(vcpu); } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; From 9778336fe11f088c3ddbd12714ab88f5760a1de4 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:06 +0530 Subject: [PATCH 074/287] ARM64: KVM: Implement kvm_vcpu_preferred_target() function This patch implements kvm_vcpu_preferred_target() function for KVM ARM64 which will help us implement KVM_ARM_PREFERRED_TARGET ioctl for user space. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit 473bdc0e6565ebb22455657a40daa21b6b4ee16b) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/guest.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0859a4ddd1e7..4cc8c7078f39 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -151,6 +151,7 @@ struct kvm_vcpu_stat { struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); struct kvm_one_reg; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2c3ff67a8ecb..3f0731e53274 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -248,6 +248,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, return kvm_reset_vcpu(vcpu); } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; From df50e5da2c1c65f1db1e9186112b57a8dd0b41ae Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:07 +0530 Subject: [PATCH 075/287] ARM/ARM64: KVM: Implement KVM_ARM_PREFERRED_TARGET ioctl For implementing CPU=host, we need a mechanism for querying preferred VCPU target type on underlying Host. This patch implements KVM_ARM_PREFERRED_TARGET vm ioctl which returns struct kvm_vcpu_init instance containing information about preferred VCPU target type and target specific features available for it. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit 42c4e0c77ac91505ab94284b14025e3a0865c0a5) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 13 +++++++++++++ include/uapi/linux/kvm.h | 1 + 2 files changed, 14 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9c697db2787e..cc5adb9349ef 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -797,6 +797,19 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); } + case KVM_ARM_PREFERRED_TARGET: { + int err; + struct kvm_vcpu_init init; + + err = kvm_vcpu_preferred_target(&init); + if (err) + return err; + + if (copy_to_user(argp, &init, sizeof(init))) + return -EFAULT; + + return 0; + } default: return -EINVAL; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index acccd08be6c7..cba62019348d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1011,6 +1011,7 @@ struct kvm_s390_ucas_mapping { /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) From 89c774beb24924963aa146784a149e47efb29d82 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 1 Oct 2013 19:58:36 +0300 Subject: [PATCH 076/287] Fix NULL dereference in gfn_to_hva_prot() gfn_to_memslot() can return NULL or invalid slot. We need to check slot validity before accessing it. Reviewed-by: Paolo Bonzini Signed-off-by: Gleb Natapov (cherry picked from commit a2ac07fe292ea41296049dfdbfeed203e2467ee7) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8b47fd241a61..c5bc5aef11f5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1065,10 +1065,12 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - if (writable) + unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); + + if (!kvm_is_error_hva(hva) && writable) *writable = !memslot_is_readonly(slot); - return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); + return hva; } static int kvm_read_hva(void *data, void __user *hva, int len) From 708854902d50f40783a81a629c4131383249236d Mon Sep 17 00:00:00 2001 From: Andre Richter Date: Wed, 2 Oct 2013 12:23:26 +0200 Subject: [PATCH 077/287] virt/kvm/iommu.c: Add leading zeros to device's BDF notation in debug messages When KVM (de)assigns PCI(e) devices to VMs, a debug message is printed including the BDF notation of the respective device. Currently, the BDF notation does not have the commonly used leading zeros. This produces messages like "assign device 0:1:8.0", which look strange at first sight. The patch fixes this by exchanging the printk(KERN_DEBUG ...) with dev_info() and also inserts "kvm" into the debug message, so that it is obvious where the message comes from. Also reduces LoC. Acked-by: Alex Williamson Signed-off-by: Andre Richter Signed-off-by: Gleb Natapov (cherry picked from commit 29242cb5c63b1f8e12e8055ba1a6c3e0004fa86d) Signed-off-by: Christoffer Dall --- virt/kvm/iommu.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 72a130bc448a..a3b14109049b 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -190,11 +190,7 @@ int kvm_assign_device(struct kvm *kvm, pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; - printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", - assigned_dev->host_segnr, - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); + dev_info(&pdev->dev, "kvm assign device\n"); return 0; out_unmap: @@ -220,11 +216,7 @@ int kvm_deassign_device(struct kvm *kvm, pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; - printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", - assigned_dev->host_segnr, - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); + dev_info(&pdev->dev, "kvm deassign device\n"); return 0; } From c1b378c34e3f2856c0ce060a8dad0877c6d6bfb9 Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Thu, 26 Sep 2013 16:49:27 +0100 Subject: [PATCH 078/287] KVM: ARM: Fix calculation of virtual CPU ID KVM does not have a notion of multiple clusters for CPUs, just a linear array of CPUs. When using a system with cores in more than one cluster, the current method for calculating the virtual MPIDR will leak the (physical) cluster information into the virtual MPIDR. One effect of this is that Linux under KVM fails to boot multiple CPUs that aren't in the 0th cluster. This patch does away with exposing the real MPIDR fields in favour of simply using the virtual CPU number (but preserving the U bit, as before). Signed-off-by: Jonathan Austin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 1158fca401e09665c440a9fe4fd4f131ee85c13b) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc_a15.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index cf93472b9dd6..bbd4b888dbf3 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -27,14 +27,11 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { /* - * Compute guest MPIDR: - * (Even if we present only one VCPU to the guest on an SMP - * host we don't set the U bit in the MPIDR, or vice versa, as - * revealing the underlying hardware properties is likely to - * be the best choice). + * Compute guest MPIDR. No need to mess around with different clusters + * but we read the 'U' bit from the underlying hardware directly. */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK) - | (vcpu->vcpu_id & MPIDR_LEVEL_MASK); + vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK) + | vcpu->vcpu_id; } #include "coproc.h" From e82866a69bd6e3ed1a1405ba19f1d5e66e5129a2 Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Thu, 26 Sep 2013 16:49:26 +0100 Subject: [PATCH 079/287] KVM: ARM: fix the size of TTBCR_{T0SZ,T1SZ} masks The T{0,1}SZ fields of TTBCR are 3 bits wide when using the long descriptor format. Likewise, the T0SZ field of the HTCR is 3-bits. KVM currently defines TTBCR_T{0,1}SZ as 3, not 7. The T0SZ mask is used to calculate the value for the HTCR, both to pick out TTBCR.T0SZ and mask off the equivalent field in the HTCR during read-modify-write. The incorrect mask size causes the (UNKNOWN) reset value of HTCR.T0SZ to leak in to the calculated HTCR value. Linux will hang when initializing KVM if HTCR's reset value has bit 2 set (sometimes the case on A7/TC2) Fixing T0SZ allows A7 cores to boot and T1SZ is also fixed for completeness. Signed-off-by: Jonathan Austin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 5e497046f005528464f9600a4ee04f49df713596) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 64e96960de29..d556f03bca17 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -95,12 +95,12 @@ #define TTBCR_IRGN1 (3 << 24) #define TTBCR_EPD1 (1 << 23) #define TTBCR_A1 (1 << 22) -#define TTBCR_T1SZ (3 << 16) +#define TTBCR_T1SZ (7 << 16) #define TTBCR_SH0 (3 << 12) #define TTBCR_ORGN0 (3 << 10) #define TTBCR_IRGN0 (3 << 8) #define TTBCR_EPD0 (1 << 7) -#define TTBCR_T0SZ 3 +#define TTBCR_T0SZ (7 << 0) #define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) /* Hyp System Trap Register */ From 5e1ddf60b6830d405ebb026e3d9570e9236a6600 Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Thu, 26 Sep 2013 16:49:28 +0100 Subject: [PATCH 080/287] KVM: ARM: Add support for Cortex-A7 This patch adds support for running Cortex-A7 guests on Cortex-A7 hosts. As Cortex-A7 is architecturally compatible with A15, this patch is largely just generalising existing code. Areas where 'implementation defined' behaviour is identical for A7 and A15 is moved to allow it to be used by both cores. The check to ensure that coprocessor register tables are sorted correctly is also moved in to 'common' code to avoid each new cpu doing its own check (and possibly forgetting to do so!) Signed-off-by: Jonathan Austin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e8c2d99f8277d68d28a9f99d16289712bc2aee7f) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_asm.h | 2 +- arch/arm/include/uapi/asm/kvm.h | 3 +- arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/coproc.c | 114 ++++++++++++++++++++++++++++++++ arch/arm/kvm/coproc_a15.c | 114 +------------------------------- arch/arm/kvm/coproc_a7.c | 54 +++++++++++++++ arch/arm/kvm/guest.c | 4 +- arch/arm/kvm/reset.c | 15 +++-- 8 files changed, 184 insertions(+), 124 deletions(-) create mode 100644 arch/arm/kvm/coproc_a7.c diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index a2f43ddcc300..661da11f76f4 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -39,7 +39,7 @@ #define c6_IFAR 17 /* Instruction Fault Address Register */ #define c7_PAR 18 /* Physical Address Register */ #define c7_PAR_high 19 /* PAR top 32 bits */ -#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ +#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */ #define c10_PRRR 21 /* Primary Region Remap Register */ #define c10_NMRR 22 /* Normal Memory Remap Register */ #define c12_VBAR 23 /* Vector Base Address Register */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index c1ee007523d7..c498b60c0505 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -63,7 +63,8 @@ struct kvm_regs { /* Supported Processor Types */ #define KVM_ARM_TARGET_CORTEX_A15 0 -#define KVM_ARM_NUM_TARGETS 1 +#define KVM_ARM_TARGET_CORTEX_A7 1 +#define KVM_ARM_NUM_TARGETS 2 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index d99bee4950e5..789bca9e64a7 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index db9cf692d4dd..a629f2c1d0f9 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -71,6 +71,92 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + /* + * Compute guest MPIDR. No need to mess around with different clusters + * but we read the 'U' bit from the underlying hardware directly. + */ + vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK) + | vcpu->vcpu_id; +} + +/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; + return true; +} + +/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */ +static bool access_cbar(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p); + return read_zero(vcpu, p); +} + +/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */ +static bool access_l2ctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; + return true; +} + +static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 l2ctlr, ncores; + + asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); + l2ctlr &= ~(3 << 24); + ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; + l2ctlr |= (ncores & 3) << 24; + + vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 actlr; + + /* ACTLR contains SMP bit: make sure you create all cpus first! */ + asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + /* Make the SMP bit consistent with the guest configuration */ + if (atomic_read(&vcpu->kvm->online_vcpus) > 1) + actlr |= 1U << 6; + else + actlr &= ~(1U << 6); + + vcpu->arch.cp15[c1_ACTLR] = actlr; +} + +/* + * TRM entries: A7:4.3.50, A15:4.3.49 + * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). + */ +static bool access_l2ectlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = 0; + return true; +} + /* See note at ARM ARM B1.14.4 */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, @@ -153,10 +239,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg cp15_regs[] = { + /* MPIDR: we use VMPIDR for guest access. */ + { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, + NULL, reset_mpidr, c0_MPIDR }, + /* CSSELR: swapped by interrupt.S. */ { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32, NULL, reset_unknown, c0_CSSELR }, + /* ACTLR: trapped by HCR.TAC bit. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, + access_actlr, reset_actlr, c1_ACTLR }, + + /* CPACR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, + NULL, reset_val, c1_CPACR, 0x00000000 }, + /* TTBR0/TTBR1: swapped by interrupt.S. */ { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, @@ -194,6 +292,13 @@ static const struct coproc_reg cp15_regs[] = { { CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw}, { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw}, { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw}, + /* + * L2CTLR access (guest wants to know #CPUs). + */ + { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, + access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, + { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, + /* * Dummy performance monitor implementation. */ @@ -234,6 +339,9 @@ static const struct coproc_reg cp15_regs[] = { /* CNTKCTL: swapped by interrupt.S. */ { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, NULL, reset_val, c14_CNTKCTL, 0x00000000 }, + + /* The Configuration Base Address Register. */ + { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; /* Target specific emulation tables */ @@ -241,6 +349,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) { + unsigned int i; + + for (i = 1; i < table->num; i++) + BUG_ON(cmp_reg(&table->table[i-1], + &table->table[i]) >= 0); + target_tables[table->target] = table; } diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index bbd4b888dbf3..bb0cac1410cc 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -17,98 +17,12 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include -#include -#include -#include -#include #include +#include #include -static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - /* - * Compute guest MPIDR. No need to mess around with different clusters - * but we read the 'U' bit from the underlying hardware directly. - */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK) - | vcpu->vcpu_id; -} - #include "coproc.h" -/* A15 TRM 4.3.28: RO WI */ -static bool access_actlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; - return true; -} - -/* A15 TRM 4.3.60: R/O. */ -static bool access_cbar(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return write_to_read_only(vcpu, p); - return read_zero(vcpu, p); -} - -/* A15 TRM 4.3.48: R/O WI. */ -static bool access_l2ctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; - return true; -} - -static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 l2ctlr, ncores; - - asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); - l2ctlr &= ~(3 << 24); - ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; - l2ctlr |= (ncores & 3) << 24; - - vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 actlr; - - /* ACTLR contains SMP bit: make sure you create all cpus first! */ - asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); - /* Make the SMP bit consistent with the guest configuration */ - if (atomic_read(&vcpu->kvm->online_vcpus) > 1) - actlr |= 1U << 6; - else - actlr &= ~(1U << 6); - - vcpu->arch.cp15[c1_ACTLR] = actlr; -} - -/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */ -static bool access_l2ectlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = 0; - return true; -} - /* * A15-specific CP15 registers. * CRn denotes the primary register number, but is copied to the CRm in the @@ -118,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg a15_regs[] = { - /* MPIDR: we use VMPIDR for guest access. */ - { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, - NULL, reset_mpidr, c0_MPIDR }, - /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c1_SCTLR, 0x00C50078 }, - /* ACTLR: trapped by HCR.TAC bit. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, - access_actlr, reset_actlr, c1_ACTLR }, - /* CPACR: swapped by interrupt.S. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c1_CPACR, 0x00000000 }, - - /* - * L2CTLR access (guest wants to know #CPUs). - */ - { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, - access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, - { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, - - /* The Configuration Base Address Register. */ - { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; static struct kvm_coproc_target_table a15_target_table = { @@ -151,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = { static int __init coproc_a15_init(void) { - unsigned int i; - - for (i = 1; i < ARRAY_SIZE(a15_regs); i++) - BUG_ON(cmp_reg(&a15_regs[i-1], - &a15_regs[i]) >= 0); - kvm_register_target_coproc_table(&a15_target_table); return 0; } diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c new file mode 100644 index 000000000000..1df767331588 --- /dev/null +++ b/arch/arm/kvm/coproc_a7.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Copyright (C) 2013 - ARM Ltd + * + * Authors: Rusty Russell + * Christoffer Dall + * Jonathan Austin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include +#include +#include + +#include "coproc.h" + +/* + * Cortex-A7 specific CP15 registers. + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. + */ +static const struct coproc_reg a7_regs[] = { + /* SCTLR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, + NULL, reset_val, c1_SCTLR, 0x00C50878 }, +}; + +static struct kvm_coproc_target_table a7_target_table = { + .target = KVM_ARM_TARGET_CORTEX_A7, + .table = a7_regs, + .num = ARRAY_SIZE(a7_regs), +}; + +static int __init coproc_a7_init(void) +{ + kvm_register_target_coproc_table(&a7_target_table); + return 0; +} +late_initcall(coproc_a7_init); diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index ec98209fda71..20f8d97904af 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void) return -EINVAL; switch (part_number) { + case ARM_CPU_PART_CORTEX_A7: + return KVM_ARM_TARGET_CORTEX_A7; case ARM_CPU_PART_CORTEX_A15: return KVM_ARM_TARGET_CORTEX_A15; default: @@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, { unsigned int i; - /* We can only do a cortex A15 for now. */ + /* We can only cope with guest==host and only on A15/A7 (for now). */ if (init->target != kvm_target_cpu()) return -EINVAL; diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index c02ba4af599f..d153e64d1255 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -30,16 +30,16 @@ #include /****************************************************************************** - * Cortex-A15 Reset Values + * Cortex-A15 and Cortex-A7 Reset Values */ -static const int a15_max_cpu_idx = 3; +static const int cortexa_max_cpu_idx = 3; -static struct kvm_regs a15_regs_reset = { +static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; -static const struct kvm_irq_level a15_vtimer_irq = { +static const struct kvm_irq_level cortexa_vtimer_irq = { { .irq = 27 }, .level = 1, }; @@ -62,12 +62,13 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { + case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: - if (vcpu->vcpu_id > a15_max_cpu_idx) + if (vcpu->vcpu_id > cortexa_max_cpu_idx) return -EINVAL; - reset_regs = &a15_regs_reset; + reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); - cpu_vtimer_irq = &a15_vtimer_irq; + cpu_vtimer_irq = &cortexa_vtimer_irq; break; default: return -ENODEV; From c7a9a5f3f02ff35d7ee820e98c1b9d46ab425808 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 14:22:28 -0700 Subject: [PATCH 081/287] KVM: Move gfn_to_index to x86 specific code The gfn_to_index function relies on huge page defines which either may not make sense on systems that don't support huge pages or are defined in an unconvenient way for other architectures. Since this is x86-specific, move the function to arch/x86/include/asm/kvm_host.h. Signed-off-by: Christoffer Dall Signed-off-by: Gleb Natapov (cherry picked from commit 6d9d41e57440e32a3400f37aa05ef7a1a09ced64) Signed-off-by: Christoffer Dall --- arch/x86/include/asm/kvm_host.h | 7 +++++++ include/linux/kvm_host.h | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3741c653767c..53db582487c9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,6 +79,13 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) +static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) +{ + /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ + return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - + (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); +} + #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 97e39fc02020..b5267c4f5392 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -827,13 +827,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn) return gfn_to_memslot(kvm, gfn)->id; } -static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) -{ - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ - return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); -} - static inline gfn_t hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) { From 4b0745341e63f3878fcc1240d44d1ddcab068ce1 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 14:22:29 -0700 Subject: [PATCH 082/287] KVM: ARM: Get rid of KVM_HPAGE defines The KVM_HPAGE_DEFINES are a little artificial on ARM, since the huge page size is statically defined at compile time and there is only a single huge page size. Now when the main kvm code relying on these defines has been moved to the x86 specific part of the world, we can get rid of these. Signed-off-by: Christoffer Dall Signed-off-by: Gleb Natapov (cherry picked from commit dc6f6763dfeaf2dfec906bb78875dcea162accd9) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 76f3c1978442..8a6f6db14ee4 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -38,11 +38,6 @@ #define KVM_VCPU_MAX_FEATURES 1 -/* We don't currently support large pages. */ -#define KVM_HPAGE_GFN_SHIFT(x) 0 -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) - #include struct kvm_vcpu; From 1feff9299a9b7b1f137166f062772cd2044c7cbb Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 14:22:30 -0700 Subject: [PATCH 083/287] KVM: arm64: Get rid of KVM_HPAGE defines Now when the main kvm code relying on these defines has been moved to the x86 specific part of the world, we can get rid of these. Signed-off-by: Christoffer Dall Signed-off-by: Gleb Natapov (cherry picked from commit ef0cfe71c2b1710cd4ae747537e36c56f9a26ccf) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4cc8c7078f39..5d85a02d1231 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -36,11 +36,6 @@ #define KVM_VCPU_MAX_FEATURES 2 -/* We don't currently support large pages. */ -#define KVM_HPAGE_GFN_SHIFT(x) 0 -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) - struct kvm_vcpu; int kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); From 0a2e6af7befb4c0336f2c789d172bbb5438aec72 Mon Sep 17 00:00:00 2001 From: chai wen Date: Mon, 14 Oct 2013 22:22:33 +0800 Subject: [PATCH 084/287] KVM: Drop FOLL_GET in GUP when doing async page fault Page pinning is not mandatory in kvm async page fault processing since after async page fault event is delivered to a guest it accesses page once again and does its own GUP. Drop the FOLL_GET flag in GUP in async_pf code, and do some simplifying in check/clear processing. Suggested-by: Gleb Natapov Signed-off-by: Gu zheng Signed-off-by: chai wen Signed-off-by: Gleb Natapov (cherry picked from commit f2e106692d5189303997ad7b96de8d8123aa5613) Signed-off-by: Christoffer Dall --- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 2 +- include/trace/events/kvm.h | 10 ++++------ virt/kvm/async_pf.c | 17 +++++------------ 4 files changed, 12 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 96765c116c26..e4985fc604fe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7118,7 +7118,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) int r; if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || - is_error_page(work->page)) + work->wakeup_all) return; r = kvm_mmu_reload(vcpu); @@ -7228,7 +7228,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct x86_exception fault; trace_kvm_async_pf_ready(work->arch.token, work->gva); - if (is_error_page(work->page)) + if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b5267c4f5392..384fb0a74924 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -175,7 +175,7 @@ struct kvm_async_pf { gva_t gva; unsigned long addr; struct kvm_arch_async_pf arch; - struct page *page; + bool wakeup_all; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 7005d1109ec9..131a0bda7aec 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready, TRACE_EVENT( kvm_async_pf_completed, - TP_PROTO(unsigned long address, struct page *page, u64 gva), - TP_ARGS(address, page, gva), + TP_PROTO(unsigned long address, u64 gva), + TP_ARGS(address, gva), TP_STRUCT__entry( __field(unsigned long, address) - __field(pfn_t, pfn) __field(u64, gva) ), TP_fast_assign( __entry->address = address; - __entry->pfn = page ? page_to_pfn(page) : 0; __entry->gva = gva; ), - TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva, - __entry->address, __entry->pfn) + TP_printk("gva %#llx address %#lx", __entry->gva, + __entry->address) ); #endif diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index b197950ac4d5..8631d9c14320 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) static void async_pf_execute(struct work_struct *work) { - struct page *page = NULL; struct kvm_async_pf *apf = container_of(work, struct kvm_async_pf, work); struct mm_struct *mm = apf->mm; @@ -68,13 +67,12 @@ static void async_pf_execute(struct work_struct *work) use_mm(mm); down_read(&mm->mmap_sem); - get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); + get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); - apf->page = page; spin_unlock(&vcpu->async_pf.lock); /* @@ -82,7 +80,7 @@ static void async_pf_execute(struct work_struct *work) * this point */ - trace_kvm_async_pf_completed(addr, page, gva); + trace_kvm_async_pf_completed(addr, gva); if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); @@ -112,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.done.next, typeof(*work), link); list_del(&work->link); - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } spin_unlock(&vcpu->async_pf.lock); @@ -133,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->link); spin_unlock(&vcpu->async_pf.lock); - if (work->page) - kvm_arch_async_page_ready(vcpu, work); + kvm_arch_async_page_ready(vcpu, work); kvm_arch_async_page_present(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } } @@ -163,7 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, if (!work) return 0; - work->page = NULL; + work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; work->addr = gfn_to_hva(vcpu->kvm, gfn); @@ -203,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) if (!work) return -ENOMEM; - work->page = KVM_ERR_PTR_BAD_PAGE; + work->wakeup_all = true; INIT_LIST_HEAD(&work->queue); /* for list_del to work */ spin_lock(&vcpu->async_pf.lock); From 841372680792857d33c4c49d21be8a3cbde0490b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:18:00 +0530 Subject: [PATCH 085/287] kvm: Add struct kvm arg to memslot APIs We will use that in the later patch to find the kvm ops handler Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf (cherry picked from commit 5587027ce9d59a57aecaa190be1c8e560aaff45d) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 5 +++-- arch/ia64/kvm/kvm-ia64.c | 5 +++-- arch/mips/kvm/kvm_mips.c | 5 +++-- arch/powerpc/include/asm/kvm_ppc.h | 6 ++++-- arch/powerpc/kvm/booke.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 9 +++++---- arch/s390/kvm/kvm-s390.c | 5 +++-- arch/x86/kvm/x86.c | 5 +++-- include/linux/kvm_host.h | 5 +++-- virt/kvm/kvm_main.c | 12 ++++++------ 10 files changed, 35 insertions(+), 26 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index cc5adb9349ef..e312e4a53f8d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index bdfd8789b376..985bf80c622e 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index a7b044536de4..73b34827826c 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return -ENOIOCTLCMD; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a5287fe03d77..e2dd05c81bc6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -143,9 +143,11 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void); extern void kvm_release_hpt(struct kvmppc_linear_info *li); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); -extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +extern void kvmppc_core_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +extern int kvmppc_core_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1a1b51189773..0a91f47e264b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1592,12 +1592,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ae63ae4a1a5f..750835a4ef70 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -409,15 +409,16 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - kvmppc_core_free_memslot(free, dont); + kvmppc_core_free_memslot(kvm, free, dont); } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { - return kvmppc_core_create_memslot(slot, npages); + return kvmppc_core_create_memslot(kvm, slot, npages); } void kvm_arch_memslots_updated(struct kvm *kvm) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e515b2d4a947..54612d0e79dd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -971,12 +971,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e4985fc604fe..68b139fe0dbd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6897,7 +6897,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { int i; @@ -6918,7 +6918,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, } } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { int i; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 384fb0a74924..4de0a8fedf3f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -493,9 +493,10 @@ int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages); void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c5bc5aef11f5..c777a6e582f0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -541,13 +541,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) /* * Free any memory in @free but not in @dont. */ -static void kvm_free_physmem_slot(struct kvm_memory_slot *free, +static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); - kvm_arch_free_memslot(free, dont); + kvm_arch_free_memslot(kvm, free, dont); free->npages = 0; } @@ -558,7 +558,7 @@ void kvm_free_physmem(struct kvm *kvm) struct kvm_memory_slot *memslot; kvm_for_each_memslot(memslot, slots) - kvm_free_physmem_slot(memslot, NULL); + kvm_free_physmem_slot(kvm, memslot, NULL); kfree(kvm->memslots); } @@ -822,7 +822,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (change == KVM_MR_CREATE) { new.userspace_addr = mem->userspace_addr; - if (kvm_arch_create_memslot(&new, npages)) + if (kvm_arch_create_memslot(kvm, &new, npages)) goto out_free; } @@ -898,7 +898,7 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, &old, change); - kvm_free_physmem_slot(&old, &new); + kvm_free_physmem_slot(kvm, &old, &new); kfree(old_memslots); return 0; @@ -906,7 +906,7 @@ int __kvm_set_memory_region(struct kvm *kvm, out_slots: kfree(slots); out_free: - kvm_free_physmem_slot(&new, &old); + kvm_free_physmem_slot(kvm, &new, &old); out: return r; } From 46a037fd36656b8283c282319876dd8e9c4f1087 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Oct 2013 18:38:13 +0100 Subject: [PATCH 086/287] ARM: KVM: Yield CPU when vcpu executes a WFE On an (even slightly) oversubscribed system, spinlocks are quickly becoming a bottleneck, as some vcpus are spinning, waiting for a lock to be released, while the vcpu holding the lock may not be running at all. This creates contention, and the observed slowdown is 40x for hackbench. No, this isn't a typo. The solution is to trap blocking WFEs and tell KVM that we're now spinning. This ensures that other vpus will get a scheduling boost, allowing the lock to be released more quickly. Also, using CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT slightly improves the performance when the VM is severely overcommited. Quick test to estimate the performance: hackbench 1 process 1000 2xA15 host (baseline): 1.843s 2xA15 guest w/o patch: 2.083s 4xA15 guest w/o patch: 80.212s 8xA15 guest w/o patch: Could not be bothered to find out 2xA15 guest w/ patch: 2.102s 4xA15 guest w/ patch: 3.205s 8xA15 guest w/ patch: 6.887s So we go from a 40x degradation to 1.5x in the 2x overcommit case, which is vaguely more acceptable. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 58d5ec8f8ee318b26b29207874fbaee626973952) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 4 +++- arch/arm/kvm/Kconfig | 1 + arch/arm/kvm/handle_exit.c | 6 +++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index d556f03bca17..fe395b7b1ce2 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -67,7 +67,7 @@ */ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_SWIO | HCR_TIDCP) + HCR_TWE | HCR_SWIO | HCR_TIDCP) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) /* System Control Register (SCTLR) bits */ @@ -208,6 +208,8 @@ #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_WFI_IS_WFE (1U << 0) + #define HSR_HVC_IMM_MASK ((1UL << 16) - 1) #define HSR_DABT_S1PTW (1U << 7) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index ebf5015508b5..466bd299b1a8 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -20,6 +20,7 @@ config KVM bool "Kernel-based Virtual Machine (KVM) support" select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST depends on ARM_VIRT_EXT && ARM_LPAE diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index df4c82d47ad7..c4c496f7619c 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -84,7 +84,11 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) { trace_kvm_wfi(*vcpu_pc(vcpu)); - kvm_vcpu_block(vcpu); + if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) + kvm_vcpu_on_spin(vcpu); + else + kvm_vcpu_block(vcpu); + return 1; } From 1391c263d202d6ce8b74f361cd3607c69ae26e77 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 15 Oct 2013 18:10:42 -0700 Subject: [PATCH 087/287] KVM: ARM: Update comments for kvm_handle_wfi Update comments to reflect what is really going on and add the TWE bit to the comments in kvm_arm.h. Also renames the function to kvm_handle_wfx like is done on arm64 for consistency and uber-correctness. Signed-off-by: Christoffer Dall (cherry picked from commit 86ed81aa2e1ce05a4e7f0819f0dfc34e8d8fb910) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/kvm/handle_exit.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index fe395b7b1ce2..1d3153c7eb41 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -57,6 +57,7 @@ * TSC: Trap SMC * TSW: Trap cache operations by set/way * TWI: Trap WFI + * TWE: Trap WFE * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain * FB: Force broadcast of all maintainance operations diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index c4c496f7619c..a92079011a83 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -73,15 +73,17 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) } /** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests * @vcpu: the vcpu pointer * @run: the kvm_run structure pointer * - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will - * halt execution of world-switches and schedule other host processes until - * there is an incoming IRQ or FIQ to the VM. + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. */ -static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { trace_kvm_wfi(*vcpu_pc(vcpu)); if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) @@ -93,7 +95,7 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) } static exit_handle_fn arm_exit_handlers[] = { - [HSR_EC_WFI] = kvm_handle_wfi, + [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, [HSR_EC_CP14_MR] = kvm_handle_cp14_access, From cd709bb9fb35e687107761d5feed331d4c2c48e5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 1 Nov 2012 17:14:45 +0100 Subject: [PATCH 088/287] KVM: ARM: Support hugetlbfs backed huge pages Support huge pages in KVM/ARM and KVM/ARM64. The pud_huge checking on the unmap path may feel a bit silly as the pud_huge check is always defined to false, but the compiler should be smart about this. Note: This deals only with VMAs marked as huge which are allocated by users through hugetlbfs only. Transparent huge pages can only be detected by looking at the underlying pages (or the page tables themselves) and this patch so far simply maps these on a page-by-page level in the Stage-2 page tables. Cc: Catalin Marinas Cc: Russell King Acked-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit ad361f093c1e31d0b43946210a32ab4ff5c49850) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 17 ++- arch/arm/include/asm/pgtable-3level.h | 2 + arch/arm/kvm/mmu.c | 169 +++++++++++++++++++------ arch/arm64/include/asm/kvm_mmu.h | 12 +- arch/arm64/include/asm/pgtable-hwdef.h | 2 + 5 files changed, 158 insertions(+), 44 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 9b28c41f4ba9..77de4a41cc50 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -62,6 +62,12 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); +static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) +{ + *pmd = new_pmd; + flush_pmd_entry(pmd); +} + static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) { *pte = new_pte; @@ -103,9 +109,15 @@ static inline void kvm_set_s2pte_writable(pte_t *pte) pte_val(*pte) |= L_PTE_S2_RDWR; } +static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +{ + pmd_val(*pmd) |= L_PMD_S2_RDWR; +} + struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, + unsigned long size) { /* * If we are going to insert an instruction page and the icache is @@ -120,8 +132,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) * need any kind of flushing (DDI 0406C.b - Page B3-1392). */ if (icache_is_pipt()) { - unsigned long hva = gfn_to_hva(kvm, gfn); - __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); + __cpuc_coherent_user_range(hva, hva + size); } else if (!icache_is_vivt_asid_tagged()) { /* any kind of VIPT cache */ __flush_icache_all(); diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 86b8fe398b95..ad52938bb264 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -113,6 +113,8 @@ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ + /* * Hyp-mode PL2 PTE definitions for LPAE. */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b0de86b56c13..745d8b1630cc 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define kvm_pmd_huge(_x) (pmd_huge(_x)) + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { /* @@ -93,19 +96,29 @@ static bool page_empty(void *ptr) static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); + if (pud_huge(*pud)) { + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pmd_free(NULL, pmd_table); + } put_page(virt_to_page(pud)); } static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pte_free_kernel(NULL, pte_table); + if (kvm_pmd_huge(*pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); + } put_page(virt_to_page(pmd)); } @@ -136,18 +149,32 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, continue; } + if (pud_huge(*pud)) { + /* + * If we are dealing with a huge pud, just clear it and + * move on. + */ + clear_pud_entry(kvm, pud, addr); + addr = pud_addr_end(addr, end); + continue; + } + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { addr = pmd_addr_end(addr, end); continue; } - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - next = addr + PAGE_SIZE; + if (!kvm_pmd_huge(*pmd)) { + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(kvm, pte, addr); + next = addr + PAGE_SIZE; + } - /* If we emptied the pte, walk back up the ladder */ - if (page_empty(pte)) { + /* + * If the pmd entry is to be cleared, walk back up the ladder + */ + if (kvm_pmd_huge(*pmd) || page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); next = pmd_addr_end(addr, end); if (page_empty(pmd) && !page_empty(pud)) { @@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm) kvm->arch.pgd = NULL; } - -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr, const pte_t *new_pte, bool iomap) +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, old_pte; - /* Create 2nd stage page table mapping - Level 1 */ pgd = kvm->arch.pgd + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { if (!cache) - return 0; /* ignore calls from kvm_set_spte_hva */ + return NULL; pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); } - pmd = pmd_offset(pud, addr); + return pmd_offset(pud, addr); +} - /* Create 2nd stage page table mapping - Level 2 */ +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; + + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + + /* + * Mapping in huge pages should only happen through a fault. If a + * page is merged into a transparent huge page, the individual + * subpages of that huge page should be unmapped through MMU + * notifiers before we get here. + * + * Merging of CompoundPages is not supported; they should become + * splitting first, unmapped, merged, and mapped back in on-demand. + */ + VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); + + old_pmd = *pmd; + kvm_set_pmd(pmd, *new_pmd); + if (pmd_present(old_pmd)) + kvm_tlb_flush_vmid_ipa(kvm, addr); + else + get_page(virt_to_page(pmd)); + return 0; +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, bool iomap) +{ + pmd_t *pmd; + pte_t *pte, old_pte; + + /* Create stage-2 page table mapping - Level 1 */ + pmd = stage2_get_pmd(kvm, cache, addr); + if (!pmd) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ @@ -508,15 +577,18 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - gfn_t gfn, struct kvm_memory_slot *memslot, + struct kvm_memory_slot *memslot, unsigned long fault_status) { - pte_t new_pte; - pfn_t pfn; int ret; - bool write_fault, writable; + bool write_fault, writable, hugetlb = false; unsigned long mmu_seq; + gfn_t gfn = fault_ipa >> PAGE_SHIFT; + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); + struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct vm_area_struct *vma; + pfn_t pfn; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { @@ -524,6 +596,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + /* Let's check if we will get back a huge page backed by hugetlbfs */ + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (is_vm_hugetlb_page(vma)) { + hugetlb = true; + gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; + } + up_read(¤t->mm->mmap_sem); + /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) @@ -541,26 +622,38 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ smp_rmb(); - pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; - new_pte = pfn_pte(pfn, PAGE_S2); - coherent_icache_guest_page(vcpu->kvm, gfn); - - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; - if (writable) { - kvm_set_s2pte_writable(&new_pte); - kvm_set_pfn_dirty(pfn); + + if (hugetlb) { + pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); + new_pmd = pmd_mkhuge(new_pmd); + if (writable) { + kvm_set_s2pmd_writable(&new_pmd); + kvm_set_pfn_dirty(pfn); + } + coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + } else { + pte_t new_pte = pfn_pte(pfn, PAGE_S2); + if (writable) { + kvm_set_s2pte_writable(&new_pte); + kvm_set_pfn_dirty(pfn); + } + coherent_icache_guest_page(kvm, hva, PAGE_SIZE); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); } - stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); + out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); + spin_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return 0; + return ret; } /** @@ -629,7 +722,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) memslot = gfn_to_memslot(vcpu->kvm, gfn); - ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); if (ret == 0) ret = 1; out_unlock: diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index efe609c6a3c9..680f74e67497 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -91,6 +91,7 @@ int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) +#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) static inline bool kvm_is_write_fault(unsigned long esr) { @@ -116,13 +117,18 @@ static inline void kvm_set_s2pte_writable(pte_t *pte) pte_val(*pte) |= PTE_S2_RDWR; } +static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +{ + pmd_val(*pmd) |= PMD_S2_RDWR; +} + struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, + unsigned long size) { if (!icache_is_aliasing()) { /* PIPT */ - unsigned long hva = gfn_to_hva(kvm, gfn); - flush_icache_range(hva, hva + PAGE_SIZE); + flush_icache_range(hva, hva + size); } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ /* any kind of VIPT cache */ __flush_icache_all(); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 2e9d83673ef6..b1d2e26c3c88 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -85,6 +85,8 @@ #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ + /* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ From ad8b3ca795b8534c292e8916ffdfc36b4bd09940 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 15:32:01 -0700 Subject: [PATCH 089/287] KVM: ARM: Transparent huge page (THP) support Support transparent huge pages in KVM/ARM and KVM/ARM64. The transparent_hugepage_adjust is not very pretty, but this is also how it's solved on x86 and seems to be simply an artifact on how THPs behave. This should eventually be shared across architectures if possible, but that can always be changed down the road. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 9b5fdb9781f74fb15827e465bfb5aa63211953c8) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 58 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 745d8b1630cc..371958370de4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; -#define kvm_pmd_huge(_x) (pmd_huge(_x)) +#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { @@ -576,12 +576,53 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, return ret; } +static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) +{ + pfn_t pfn = *pfnp; + gfn_t gfn = *ipap >> PAGE_SHIFT; + + if (PageTransCompound(pfn_to_page(pfn))) { + unsigned long mask; + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + mask = PTRS_PER_PMD - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); + *pfnp = pfn; + } + + return true; + } + + return false; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long fault_status) { int ret; - bool write_fault, writable, hugetlb = false; + bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); @@ -602,6 +643,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_vm_hugetlb_page(vma)) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; + } else { + /* + * Pages belonging to VMAs not aligned to the PMD mapping + * granularity cannot be mapped using block descriptors even + * if the pages belong to a THP for the process, because the + * stage-2 block descriptor will cover more than a single THP + * and we loose atomicity for unmapping, updates, and splits + * of the THP or other pages in the stage-2 block range. + */ + if (vma->vm_start & ~PMD_MASK) + force_pte = true; } up_read(¤t->mm->mmap_sem); @@ -629,6 +681,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; + if (!hugetlb && !force_pte) + hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); if (hugetlb) { pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); From c0acda6fd7c83ca3cc8c5892247ece8c9651c9f3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Oct 2013 18:19:04 +0100 Subject: [PATCH 090/287] ARM: KVM: Fix MPIDR computing to support virtual clusters In order to be able to support more than 4 A7 or A15 CPUs, we need to fix the MPIDR computing to reflect the fact that both A15 and A7 can only exist in clusters of at most 4 CPUs. Fix the MPIDR computing to allow virtual clusters to be exposed to the guest. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 2d1d841bd44e24b58a3d3cc4fa793670aaa38fbf) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index a629f2c1d0f9..631e6bd0e05f 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -74,11 +74,13 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { /* - * Compute guest MPIDR. No need to mess around with different clusters - * but we read the 'U' bit from the underlying hardware directly. + * Compute guest MPIDR. We build a virtual cluster out of the + * vcpu_id, but we read the 'U' bit from the underlying + * hardware directly. */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK) - | vcpu->vcpu_id; + vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | + ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | + (vcpu->vcpu_id & 3)); } /* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ From 8c02aa50010946b98e5077e5e05b5d99e6286ab1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Oct 2013 18:19:05 +0100 Subject: [PATCH 091/287] ARM: KVM: fix L2CTLR to be per-cluster The L2CTLR register contains the number of CPUs in this cluster. Make sure the register content is actually relevant to the vcpu that is being configured by computing the number of cores that are part of its cluster. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 9cbb6d969cb6561de45d917b8bb9281cb374bb35) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 631e6bd0e05f..78c0885d6501 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -124,6 +124,10 @@ static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); l2ctlr &= ~(3 << 24); ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; + /* How many cores in the current cluster and the next ones */ + ncores -= (vcpu->vcpu_id & ~3); + /* Cap it to the maximum number of cores in a single cluster */ + ncores = min(ncores, 3U); l2ctlr |= (ncores & 3) << 24; vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; From c93a79267ff11590dd7835c6622bd9f29ef43e73 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Oct 2013 18:19:06 +0100 Subject: [PATCH 092/287] ARM: KVM: drop limitation to 4 CPU VMs Now that the KVM/arm code knows about affinity, remove the hard limit of 4 vcpus per VM. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 7999b4d18211bcfb40e3574cf75e94518e9fa2c6) Signed-off-by: Christoffer Dall --- arch/arm/kvm/reset.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index d153e64d1255..f558c073c023 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -33,8 +33,6 @@ * Cortex-A15 and Cortex-A7 Reset Values */ -static const int cortexa_max_cpu_idx = 3; - static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; @@ -64,8 +62,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: - if (vcpu->vcpu_id > cortexa_max_cpu_idx) - return -EINVAL; reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); cpu_vtimer_irq = &cortexa_vtimer_irq; From 3d6b7ab3028ceacadd0a29b4757a788e24987d10 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Oct 2013 18:19:03 +0100 Subject: [PATCH 093/287] arm/arm64: KVM: PSCI: use MPIDR to identify a target CPU The KVM PSCI code blindly assumes that vcpu_id and MPIDR are the same thing. This is true when vcpus are organized as a flat topology, but is wrong when trying to emulate any other topology (such as A15 clusters). Change the KVM PSCI CPU_ON code to look at the MPIDR instead of the vcpu_id to pick a target CPU. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 79c648806f9034abf54332b78043bb242189d953) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm/kvm/psci.c | 17 +++++++++++++---- arch/arm64/include/asm/kvm_emulate.h | 5 +++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index a464e8d7b6c5..708e4d8a647f 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -157,4 +157,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; } +static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.cp15[c0_MPIDR]; +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 86a693a02ba3..311263124acf 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -34,22 +35,30 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu = NULL, *tmp; wait_queue_head_t *wq; unsigned long cpu_id; + unsigned long mpidr; phys_addr_t target_pc; + int i; cpu_id = *vcpu_reg(source_vcpu, 1); if (vcpu_mode_is_32bit(source_vcpu)) cpu_id &= ~((u32) 0); - if (cpu_id >= atomic_read(&kvm->online_vcpus)) + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { + vcpu = tmp; + break; + } + } + + if (!vcpu) return KVM_PSCI_RET_INVAL; target_pc = *vcpu_reg(source_vcpu, 2); - vcpu = kvm_get_vcpu(kvm, cpu_id); - wq = kvm_arch_vcpu_wq(vcpu); if (!waitqueue_active(wq)) return KVM_PSCI_RET_INVAL; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index eec073875218..6df93cdc652b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -177,4 +177,9 @@ static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; } +static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +{ + return vcpu_sys_reg(vcpu, MPIDR_EL1); +} + #endif /* __ARM64_KVM_EMULATE_H__ */ From b695de83412eb8ff7c77b215fa24bca5b1871618 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 24 Oct 2013 09:56:39 +0800 Subject: [PATCH 094/287] KVM: Mapping IOMMU pages after updating memslot In kvm_iommu_map_pages(), we need to know the page size via call kvm_host_page_size(). And it will check whether the target slot is valid before return the right page size. Currently, we will map the iommu pages when creating a new slot. But we call kvm_iommu_map_pages() during preparing the new slot. At that time, the new slot is not visible by domain(still in preparing). So we cannot get the right page size from kvm_host_page_size() and this will break the IOMMU super page logic. The solution is to map the iommu pages after we insert the new slot into domain. Signed-off-by: Yang Zhang Tested-by: Patrick Lu Signed-off-by: Paolo Bonzini (cherry picked from commit e0230e1327fb862c9b6cde24ae62d55f9db62c9b) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c777a6e582f0..340c97cf55b6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -873,21 +873,6 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - /* - * IOMMU mapping: New slots need to be mapped. Old slots need to be - * un-mapped and re-mapped if their base changes. Since base change - * unmapping is handled above with slot deletion, mapping alone is - * needed here. Anything else the iommu might care about for existing - * slots (size changes, userspace addr changes and read-only flag - * changes) is disallowed above, so any other attribute changes getting - * here can be skipped. - */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { - r = kvm_iommu_map_pages(kvm, &new); - if (r) - goto out_slots; - } - /* actual memory is freed via old in kvm_free_physmem_slot below */ if (change == KVM_MR_DELETE) { new.dirty_bitmap = NULL; @@ -901,6 +886,20 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_free_physmem_slot(kvm, &old, &new); kfree(old_memslots); + /* + * IOMMU mapping: New slots need to be mapped. Old slots need to be + * un-mapped and re-mapped if their base changes. Since base change + * unmapping is handled above with slot deletion, mapping alone is + * needed here. Anything else the iommu might care about for existing + * slots (size changes, userspace addr changes and read-only flag + * changes) is disallowed above, so any other attribute changes getting + * here can be skipped. + */ + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + r = kvm_iommu_map_pages(kvm, &new); + return r; + } + return 0; out_slots: From c0cdef185a6b5c8f3c5e6b923e95774c36764011 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Aug 2013 11:41:13 +0100 Subject: [PATCH 095/287] arm64: KVM: Yield CPU when vcpu executes a WFE On an (even slightly) oversubscribed system, spinlocks are quickly becoming a bottleneck, as some vcpus are spinning, waiting for a lock to be released, while the vcpu holding the lock may not be running at all. The solution is to trap blocking WFEs and tell KVM that we're now spinning. This ensures that other vpus will get a scheduling boost, allowing the lock to be released more quickly. Also, using CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT slightly improves the performance when the VM is severely overcommited. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit d241aac798eb042e605f78c31a4122e583b2cd13) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 8 ++++++-- arch/arm64/kvm/Kconfig | 1 + arch/arm64/kvm/handle_exit.c | 18 +++++++++++++----- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index a5f28e2720c7..c98ef4771c73 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -63,6 +63,7 @@ * TAC: Trap ACTLR * TSC: Trap SMC * TSW: Trap cache operations by set/way + * TWE: Trap WFE * TWI: Trap WFI * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain @@ -72,8 +73,9 @@ * FMO: Override CPSR.F and enable signaling with VF * SWIO: Turn set/way invalidates into set/way clean+invalidate */ -#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ - HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ +#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ + HCR_BSU_IS | HCR_FB | HCR_TAC | \ + HCR_AMO | HCR_IMO | HCR_FMO | \ HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) @@ -242,4 +244,6 @@ #define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 +#define ESR_EL2_EC_WFI_ISS_WFE (1 << 0) + #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 21e90820bd23..4480ab339a00 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST select KVM_ARM_VGIC diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 9beaca033437..8da56067c304 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -47,21 +47,29 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) } /** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event + * instruction executed by a guest + * * @vcpu: the vcpu pointer * - * Simply call kvm_vcpu_block(), which will halt execution of + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of * world-switches and schedule other host processes until there is an * incoming IRQ or FIQ to the VM. */ -static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_vcpu_block(vcpu); + if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE) + kvm_vcpu_on_spin(vcpu); + else + kvm_vcpu_block(vcpu); + return 1; } static exit_handle_fn arm_exit_handlers[] = { - [ESR_EL2_EC_WFI] = kvm_handle_wfi, + [ESR_EL2_EC_WFI] = kvm_handle_wfx, [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access, From e845f9d367f2ac8197a142fd29d1baa8fcc4dd75 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 30 Oct 2013 12:12:13 +0100 Subject: [PATCH 096/287] KVM: use a more sensible error number when debugfs directory creation fails I don't know if this was due to cut and paste, or somebody was really using a D20 to pick the error code for kvm_init_debugfs as suggested by Linus (EFAULT is 14, so the possibility cannot be entirely ruled out). In any case, this patch fixes it. Reported-by: Tim Gardner Signed-off-by: Paolo Bonzini (cherry picked from commit 0c8eb04a6241da28deb108181213b791c378123b) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 340c97cf55b6..d6b7d797cb16 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3010,7 +3010,7 @@ static const struct file_operations *stat_fops[] = { static int kvm_init_debug(void) { - int r = -EFAULT; + int r = -EEXIST; struct kvm_stats_debugfs_item *p; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); From b4fe3057a0c543ed42aa2a69a6ab9a5312571c34 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 22 Sep 2013 16:44:50 +0200 Subject: [PATCH 097/287] kvm: Add KVM_GET_EMULATED_CPUID Add a kvm ioctl which states which system functionality kvm emulates. The format used is that of CPUID and we return the corresponding CPUID bits set for which we do emulate functionality. Make sure ->padding is being passed on clean from userspace so that we can use it for something in the future, after the ioctl gets cast in stone. s/kvm_dev_ioctl_get_supported_cpuid/kvm_dev_ioctl_get_cpuid/ while at it. Signed-off-by: Borislav Petkov Signed-off-by: Paolo Bonzini (cherry picked from commit 9c15bb1d0a8411f9bb3395d21d5309bde7da0c1c) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 77 +++++++++++++++++++++++++++++-- arch/x86/include/uapi/asm/kvm.h | 6 +-- arch/x86/kvm/cpuid.c | 57 ++++++++++++++++++++--- arch/x86/kvm/cpuid.h | 5 +- arch/x86/kvm/x86.c | 9 ++-- include/uapi/linux/kvm.h | 2 + 6 files changed, 139 insertions(+), 17 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 9bfadeb8be31..d196ebe8956e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1122,9 +1122,9 @@ struct kvm_cpuid2 { struct kvm_cpuid_entry2 entries[0]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) struct kvm_cpuid_entry2 { __u32 function; @@ -2661,6 +2661,77 @@ and usually define the validity of a groups of registers. (e.g. one bit }; +4.81 KVM_GET_EMULATED_CPUID + +Capability: KVM_CAP_EXT_EMUL_CPUID +Architectures: x86 +Type: system ioctl +Parameters: struct kvm_cpuid2 (in/out) +Returns: 0 on success, -1 on error + +struct kvm_cpuid2 { + __u32 nent; + __u32 flags; + struct kvm_cpuid_entry2 entries[0]; +}; + +The member 'flags' is used for passing flags from userspace. + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) + +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +This ioctl returns x86 cpuid features which are emulated by +kvm.Userspace can use the information returned by this ioctl to query +which features are emulated by kvm instead of being present natively. + +Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2 +structure with the 'nent' field indicating the number of entries in +the variable-size array 'entries'. If the number of entries is too low +to describe the cpu capabilities, an error (E2BIG) is returned. If the +number is too high, the 'nent' field is adjusted and an error (ENOMEM) +is returned. If the number is just right, the 'nent' field is adjusted +to the number of valid entries in the 'entries' array, which is then +filled. + +The entries returned are the set CPUID bits of the respective features +which kvm emulates, as returned by the CPUID instruction, with unknown +or unsupported feature bits cleared. + +Features like x2apic, for example, may not be present in the host cpu +but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be +emulated efficiently and thus not included here. + +The fields in each entry are defined as follows: + + function: the eax value used to obtain the entry + index: the ecx value used to obtain the entry (for entries that are + affected by ecx) + flags: an OR of zero or more of the following: + KVM_CPUID_FLAG_SIGNIFCANT_INDEX: + if the index field is valid + KVM_CPUID_FLAG_STATEFUL_FUNC: + if cpuid for this function returns different values for successive + invocations; there will be several entries with the same function, + all with this flag set + KVM_CPUID_FLAG_STATE_READ_NEXT: + for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is + the first entry to be read by a cpu + eax, ebx, ecx, edx: the values returned by the cpuid instruction for + this function/index combination + + 6. Capabilities that can be enabled ----------------------------------- diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5d9a3033b3d7..d3a87780c70b 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 { __u32 padding[3]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) /* for KVM_SET_CPUID2 */ struct kvm_cpuid2 { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a20ecb5b6cbf..89d288237b9c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -187,8 +187,14 @@ static bool supported_xcr0_bit(unsigned bit) #define F(x) bit(X86_FEATURE_##x) -static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index, int *nent, int maxnent) +static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, + u32 func, u32 index, int *nent, int maxnent) +{ + return 0; +} + +static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index, int *nent, int maxnent) { int r; unsigned f_nx = is_efer_nx() ? F(NX) : 0; @@ -480,6 +486,15 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, return r; } +static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, + u32 idx, int *nent, int maxnent, unsigned int type) +{ + if (type == KVM_GET_EMULATED_CPUID) + return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); + + return __do_cpuid_ent(entry, func, idx, nent, maxnent); +} + #undef F struct kvm_cpuid_param { @@ -494,8 +509,34 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param) return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; } -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) +static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, + __u32 num_entries, unsigned int ioctl_type) +{ + int i; + + if (ioctl_type != KVM_GET_EMULATED_CPUID) + return false; + + /* + * We want to make sure that ->padding is being passed clean from + * userspace in case we want to use it for something in the future. + * + * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we + * have to give ourselves satisfied only with the emulated side. /me + * sheds a tear. + */ + for (i = 0; i < num_entries; i++) { + if (entries[i].padding[0] || + entries[i].padding[1] || + entries[i].padding[2]) + return true; + } + return false; +} + +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type) { struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG, i; @@ -512,6 +553,10 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, goto out; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) cpuid->nent = KVM_MAX_CPUID_ENTRIES; + + if (sanity_check_entries(entries, cpuid->nent, type)) + return -EINVAL; + r = -ENOMEM; cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); if (!cpuid_entries) @@ -525,7 +570,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, continue; r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; @@ -536,7 +581,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[nent - 1].eax; for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b7fd07984888..f1e4895174b2 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -6,8 +6,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries); +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type); int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 68b139fe0dbd..5276618579d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2510,6 +2510,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_EXT_EMUL_CPUID: case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: @@ -2619,15 +2620,17 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_SUPPORTED_CPUID: { + case KVM_GET_SUPPORTED_CPUID: + case KVM_GET_EMULATED_CPUID: { struct kvm_cpuid2 __user *cpuid_arg = argp; struct kvm_cpuid2 cpuid; r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; - r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, - cpuid_arg->entries); + + r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, + ioctl); if (r) goto out; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cba62019348d..3d365d191145 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -541,6 +541,7 @@ struct kvm_ppc_smmu_info { #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 +#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) /* * Extension capability list. @@ -667,6 +668,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_RTAS 91 #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 +#define KVM_CAP_EXT_EMUL_CPUID 95 #ifdef KVM_CAP_IRQ_ROUTING From a7dc5f55357702cc04773cd9e92a853767209d7d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:17 -0600 Subject: [PATCH 098/287] kvm: Add VFIO device So far we've succeeded at making KVM and VFIO mostly unaware of each other, but areas are cropping up where a connection beyond eventfds and irqfds needs to be made. This patch introduces a KVM-VFIO device that is meant to be a gateway for such interaction. The user creates the device and can add and remove VFIO groups to it via file descriptors. When a group is added, KVM verifies the group is valid and gets a reference to it via the VFIO external user interface. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini (cherry picked from commit ec53500fae421e07c5d035918ca454a429732ef4) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/devices/vfio.txt | 22 +++ arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/Makefile | 2 +- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 4 + virt/kvm/Kconfig | 3 + virt/kvm/kvm_main.c | 5 + virt/kvm/vfio.c | 220 +++++++++++++++++++++ 8 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 Documentation/virtual/kvm/devices/vfio.txt create mode 100644 virt/kvm/vfio.c diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt new file mode 100644 index 000000000000..ef51740c67ca --- /dev/null +++ b/Documentation/virtual/kvm/devices/vfio.txt @@ -0,0 +1,22 @@ +VFIO virtual device +=================== + +Device types supported: + KVM_DEV_TYPE_VFIO + +Only one VFIO instance may be created per VM. The created device +tracks VFIO groups in use by the VM and features of those groups +important to the correctness and acceleration of the VM. As groups +are enabled and disabled for use by the VM, KVM should be updated +about their presence. When registered with KVM, a reference to the +VFIO-group is held by KVM. + +Groups: + KVM_DEV_VFIO_GROUP + +KVM_DEV_VFIO_GROUP attributes: + KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking + KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking + +For each, kvm_device_attr.addr points to an int32_t file descriptor +for the VFIO group. diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a47a3e54b964..b89c5db2b832 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -38,6 +38,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select KVM_VFIO ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index bf4fb04d0112..25d22b2d6509 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -9,7 +9,7 @@ KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ - $(KVM)/eventfd.o $(KVM)/irqchip.o + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4de0a8fedf3f..214fc2d84366 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1045,6 +1045,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; +extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3d365d191145..d5b8501cebfd 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -844,6 +844,10 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 #define KVM_DEV_TYPE_XICS 3 +#define KVM_DEV_TYPE_VFIO 4 +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 /* * ioctls for VM fds diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 779262f59e25..fbe1a48bd629 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -27,3 +27,6 @@ config HAVE_KVM_MSI config HAVE_KVM_CPU_RELAX_INTERCEPT bool + +config KVM_VFIO + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d6b7d797cb16..652d682b1d55 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2270,6 +2270,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_XICS: ops = &kvm_xics_ops; break; +#endif +#ifdef CONFIG_KVM_VFIO + case KVM_DEV_TYPE_VFIO: + ops = &kvm_vfio_ops; + break; #endif default: return -ENODEV; diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c new file mode 100644 index 000000000000..597c258245ea --- /dev/null +++ b/virt/kvm/vfio.c @@ -0,0 +1,220 @@ +/* + * VFIO-KVM bridge pseudo device + * + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct kvm_vfio_group { + struct list_head node; + struct vfio_group *vfio_group; +}; + +struct kvm_vfio { + struct list_head group_list; + struct mutex lock; +}; + +static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) +{ + struct vfio_group *vfio_group; + struct vfio_group *(*fn)(struct file *); + + fn = symbol_get(vfio_group_get_external_user); + if (!fn) + return ERR_PTR(-EINVAL); + + vfio_group = fn(filep); + + symbol_put(vfio_group_get_external_user); + + return vfio_group; +} + +static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) +{ + void (*fn)(struct vfio_group *); + + fn = symbol_get(vfio_group_put_external_user); + if (!fn) + return; + + fn(vfio_group); + + symbol_put(vfio_group_put_external_user); +} + +static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) +{ + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + void __user *argp = (void __user *)arg; + struct fd f; + int32_t fd; + int ret; + + switch (attr) { + case KVM_DEV_VFIO_GROUP_ADD: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group == vfio_group) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -EEXIST; + } + } + + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); + if (!kvg) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -ENOMEM; + } + + list_add_tail(&kvg->node, &kv->group_list); + kvg->vfio_group = vfio_group; + + mutex_unlock(&kv->lock); + + return 0; + + case KVM_DEV_VFIO_GROUP_DEL: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + list_del(&kvg->node); + kvm_vfio_group_put_external_user(kvg->vfio_group); + kfree(kvg); + ret = 0; + break; + } + + mutex_unlock(&kv->lock); + + kvm_vfio_group_put_external_user(vfio_group); + + return ret; + } + + return -ENXIO; +} + +static int kvm_vfio_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + return kvm_vfio_set_group(dev, attr->attr, attr->addr); + } + + return -ENXIO; +} + +static int kvm_vfio_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + switch (attr->attr) { + case KVM_DEV_VFIO_GROUP_ADD: + case KVM_DEV_VFIO_GROUP_DEL: + return 0; + } + + break; + } + + return -ENXIO; +} + +static void kvm_vfio_destroy(struct kvm_device *dev) +{ + struct kvm_vfio *kv = dev->private; + struct kvm_vfio_group *kvg, *tmp; + + list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { + kvm_vfio_group_put_external_user(kvg->vfio_group); + list_del(&kvg->node); + kfree(kvg); + } + + kfree(kv); + kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ +} + +static int kvm_vfio_create(struct kvm_device *dev, u32 type) +{ + struct kvm_device *tmp; + struct kvm_vfio *kv; + + /* Only one VFIO "device" per VM */ + list_for_each_entry(tmp, &dev->kvm->devices, vm_node) + if (tmp->ops == &kvm_vfio_ops) + return -EBUSY; + + kv = kzalloc(sizeof(*kv), GFP_KERNEL); + if (!kv) + return -ENOMEM; + + INIT_LIST_HEAD(&kv->group_list); + mutex_init(&kv->lock); + + dev->private = kv; + + return 0; +} + +struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, + .destroy = kvm_vfio_destroy, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, +}; From 7f17a13bdd7902c3e4d55c273820e3099c1e1d33 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 30 Oct 2013 21:43:01 +0200 Subject: [PATCH 099/287] kvm_host: typo fix fix up typo in comment. Signed-off-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini (cherry picked from commit 81e87e26796782e014fd1f2bb9cd8fb6ce4021a8) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 214fc2d84366..6d24a2671d29 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -775,7 +775,7 @@ static inline void kvm_guest_enter(void) /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode - * is very similar to exiting to userspase from rcu point of view. In + * is very similar to exiting to userspace from rcu point of view. In * addition CPU may stay in a guest mode for quite a long time (up to * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. From d49c7a4473eebfda645f469c7a2e77846c8008e2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 5 Nov 2013 16:04:18 +0200 Subject: [PATCH 100/287] KVM: remove vm mmap method It was used in conjunction with KVM_SET_MEMORY_REGION ioctl which was removed by b74a07beed0 in 2010, QEMU stopped using it in 2008, so it is time to remove the code finally. Signed-off-by: Gleb Natapov (cherry picked from commit 80f5b5e700fa9c58480eafce0d47367bafb70006) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 652d682b1d55..dd9ce144cf99 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2524,44 +2524,12 @@ static long kvm_vm_compat_ioctl(struct file *filp, } #endif -static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *page[1]; - unsigned long addr; - int npages; - gfn_t gfn = vmf->pgoff; - struct kvm *kvm = vma->vm_file->private_data; - - addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) - return VM_FAULT_SIGBUS; - - npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, - NULL); - if (unlikely(npages != 1)) - return VM_FAULT_SIGBUS; - - vmf->page = page[0]; - return 0; -} - -static const struct vm_operations_struct kvm_vm_vm_ops = { - .fault = kvm_vm_fault, -}; - -static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_ops = &kvm_vm_vm_ops; - return 0; -} - static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = kvm_vm_compat_ioctl, #endif - .mmap = kvm_vm_mmap, .llseek = noop_llseek, }; From 60979ebbbbe9d4c56e87cffcaa92933d96637d7b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Nov 2013 18:29:45 +0000 Subject: [PATCH 101/287] arm64: KVM: initialize HYP mode following the kernel endianness Force SCTLR_EL2.EE to 1 if the kernel is compiled as BE. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 18ea3dbc9e5c8a53a361b17c4a5676ea6f4bcb72) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp-init.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index ba84e6705e20..2b0244d65c16 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -74,7 +74,10 @@ __do_hyp_init: msr mair_el2, x4 isb - mov x4, #SCTLR_EL2_FLAGS + mrs x4, sctlr_el2 + and x4, x4, #SCTLR_EL2_EE // preserve endianness of EL2 + ldr x5, =SCTLR_EL2_FLAGS + orr x4, x4, x5 msr sctlr_el2, x4 isb From 128a021aa83b73d07153f8fee5dd22e6933f62e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Nov 2013 18:29:46 +0000 Subject: [PATCH 102/287] arm64: KVM: vgic: byteswap GICv2 access on world switch if BE Ensure that accesses to the GICH_* registers are byteswapped when the kernel is compiled as big-endian. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit c5b2c0f5203b3bc678a8967daedf7114029975ae) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 1ac0bbbdddb2..3b47c36e10ff 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -403,6 +403,14 @@ __kvm_hyp_code_start: ldr w9, [x2, #GICH_ELRSR0] ldr w10, [x2, #GICH_ELRSR1] ldr w11, [x2, #GICH_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) +CPU_BE( rev w9, w9 ) +CPU_BE( rev w10, w10 ) +CPU_BE( rev w11, w11 ) str w4, [x3, #VGIC_CPU_HCR] str w5, [x3, #VGIC_CPU_VMCR] @@ -421,6 +429,7 @@ __kvm_hyp_code_start: ldr w4, [x3, #VGIC_CPU_NR_LR] add x3, x3, #VGIC_CPU_LR 1: ldr w5, [x2], #4 +CPU_BE( rev w5, w5 ) str w5, [x3], #4 sub w4, w4, #1 cbnz w4, 1b @@ -446,6 +455,9 @@ __kvm_hyp_code_start: ldr w4, [x3, #VGIC_CPU_HCR] ldr w5, [x3, #VGIC_CPU_VMCR] ldr w6, [x3, #VGIC_CPU_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) str w4, [x2, #GICH_HCR] str w5, [x2, #GICH_VMCR] @@ -456,6 +468,7 @@ __kvm_hyp_code_start: ldr w4, [x3, #VGIC_CPU_NR_LR] add x3, x3, #VGIC_CPU_LR 1: ldr w5, [x3], #4 +CPU_BE( rev w5, w5 ) str w5, [x2], #4 sub w4, w4, #1 cbnz w4, 1b From dd8858820ec48b90c5ab5c6631f50a613deb55bc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 12 Feb 2013 12:40:22 +0000 Subject: [PATCH 103/287] arm/arm64: KVM: MMIO support for BE guest Do the necessary byteswap when host and guest have different views of the universe. Actually, the only case we need to take care of is when the guest is BE. All the other cases are naturally handled. Also be careful about endianness when the data is being memcopy-ed from/to the run buffer. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 6d89d2d9b5bac9dbe40ee106ceda9307b6265234) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 41 +++++++++++++ arch/arm/kvm/mmio.c | 86 ++++++++++++++++++++++++---- arch/arm64/include/asm/kvm_emulate.h | 48 ++++++++++++++++ 3 files changed, 164 insertions(+), 11 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 708e4d8a647f..b79cd8d3d6ee 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -162,4 +162,45 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) return vcpu->arch.cp15[c0_MPIDR]; } +static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) +{ + return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT); +} + +static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return be16_to_cpu(data & 0xffff); + default: + return be32_to_cpu(data); + } + } + + return data; /* Leave LE untouched */ +} + +static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_be16(data & 0xffff); + default: + return cpu_to_be32(data); + } + } + + return data; /* Leave LE untouched */ +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 0c25d9487d53..4cb5a93182e9 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -23,6 +23,68 @@ #include "trace.h" +static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) +{ + void *datap = NULL; + union { + u8 byte; + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + tmp.byte = data; + datap = &tmp.byte; + break; + case 2: + tmp.hword = data; + datap = &tmp.hword; + break; + case 4: + tmp.word = data; + datap = &tmp.word; + break; + case 8: + tmp.dword = data; + datap = &tmp.dword; + break; + } + + memcpy(buf, datap, len); +} + +static unsigned long mmio_read_buf(char *buf, unsigned int len) +{ + unsigned long data = 0; + union { + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + data = buf[0]; + break; + case 2: + memcpy(&tmp.hword, buf, len); + data = tmp.hword; + break; + case 4: + memcpy(&tmp.word, buf, len); + data = tmp.word; + break; + case 8: + memcpy(&tmp.dword, buf, len); + data = tmp.dword; + break; + } + + return data; +} + /** * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation * @vcpu: The VCPU pointer @@ -33,28 +95,27 @@ */ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) { - unsigned long *dest; + unsigned long data; unsigned int len; int mask; if (!run->mmio.is_write) { - dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt); - *dest = 0; - len = run->mmio.len; if (len > sizeof(unsigned long)) return -EINVAL; - memcpy(dest, run->mmio.data, len); - - trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, - *((u64 *)run->mmio.data)); + data = mmio_read_buf(run->mmio.data, len); if (vcpu->arch.mmio_decode.sign_extend && len < sizeof(unsigned long)) { mask = 1U << ((len * 8) - 1); - *dest = (*dest ^ mask) - mask; + data = (data ^ mask) - mask; } + + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, + data); + data = vcpu_data_host_to_guest(vcpu, data, len); + *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data; } return 0; @@ -105,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { struct kvm_exit_mmio mmio; + unsigned long data; unsigned long rt; int ret; @@ -125,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, } rt = vcpu->arch.mmio_decode.rt; + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); + trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, fault_ipa, - (mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0); + (mmio.is_write) ? data : 0); if (mmio.is_write) - memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len); + mmio_write_buf(mmio.data, mmio.len, data); if (vgic_handle_mmio(vcpu, run, &mmio)) return 1; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6df93cdc652b..291f87cf457f 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -182,4 +182,52 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) return vcpu_sys_reg(vcpu, MPIDR_EL1); } +static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); + + return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); +} + +static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return be16_to_cpu(data & 0xffff); + case 4: + return be32_to_cpu(data & 0xffffffff); + default: + return be64_to_cpu(data); + } + } + + return data; /* Leave LE untouched */ +} + +static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_be16(data & 0xffff); + case 4: + return cpu_to_be32(data & 0xffffffff); + default: + return cpu_to_be64(data); + } + } + + return data; /* Leave LE untouched */ +} + #endif /* __ARM64_KVM_EMULATE_H__ */ From 53e38964402dd81c8528f3d2c6fd119c4091a390 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Nov 2013 14:12:15 +0000 Subject: [PATCH 104/287] arm/arm64: KVM: PSCI: propagate caller endianness to the incoming vcpu When booting a vcpu using PSCI, make sure we start it with the endianness of the caller. Otherwise, secondaries can be pretty unhappy to execute a BE kernel in LE mode... This conforms to PSCI spec Rev B, 5.13.3. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit ce94fe93d566bf381c6ecbd45010d36c5f04d692) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm/kvm/psci.c | 4 ++++ arch/arm64/include/asm/kvm_emulate.h | 8 ++++++++ 3 files changed, 17 insertions(+) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index b79cd8d3d6ee..0fa90c962ac8 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -162,6 +162,11 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) return vcpu->arch.cp15[c0_MPIDR]; } +static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) +{ + *vcpu_cpsr(vcpu) |= PSR_E_BIT; +} + static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) { return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT); diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 311263124acf..0881bf169fbc 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -71,6 +71,10 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu_set_thumb(vcpu); } + /* Propagate caller endianness */ + if (kvm_vcpu_is_be(source_vcpu)) + kvm_vcpu_set_be(vcpu); + *vcpu_pc(vcpu) = target_pc; vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 291f87cf457f..dd8ecfc3f995 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -182,6 +182,14 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) return vcpu_sys_reg(vcpu, MPIDR_EL1); } +static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; + else + vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25); +} + static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) From 52031ff6011942b7e29788b9ce5066cb45fe3e4e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 15 Nov 2013 13:14:12 -0800 Subject: [PATCH 105/287] arm/arm64: KVM: Fix hyp mappings of vmalloc regions Using virt_to_phys on percpu mappings is horribly wrong as it may be backed by vmalloc. Introduce kvm_kaddr_to_phys which translates both types of valid kernel addresses to the corresponding physical address. At the same time resolves a typing issue where we were storing the physical address as a 32 bit unsigned long (on arm), truncating the physical address for addresses above the 4GB limit. This caused breakage on Keystone. Cc: [3.10+] Reported-by: Santosh Shilimkar Tested-by: Santosh Shilimkar Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 40c2729bab48e2832b17c1fa8af9db60e776131b) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 371958370de4..580906989db1 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -334,6 +334,17 @@ static int __create_hyp_mappings(pgd_t *pgdp, return err; } +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ + if (!is_vmalloc_addr(kaddr)) { + BUG_ON(!virt_addr_valid(kaddr)); + return __pa(kaddr); + } else { + return page_to_phys(vmalloc_to_page(kaddr)) + + offset_in_page(kaddr); + } +} + /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -345,16 +356,27 @@ static int __create_hyp_mappings(pgd_t *pgdp, */ int create_hyp_mappings(void *from, void *to) { - unsigned long phys_addr = virt_to_phys(from); + phys_addr_t phys_addr; + unsigned long virt_addr; unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); - /* Check for a valid kernel memory mapping */ - if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) - return -EINVAL; + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); - return __create_hyp_mappings(hyp_pgd, start, end, - __phys_to_pfn(phys_addr), PAGE_HYP); + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); + err = __create_hyp_mappings(hyp_pgd, virt_addr, + virt_addr + PAGE_SIZE, + __phys_to_pfn(phys_addr), + PAGE_HYP); + if (err) + return err; + } + + return 0; } /** From 4ae68e1ee672530df9236b07ff267f06e107bc9a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 18 Nov 2013 10:35:55 +0100 Subject: [PATCH 106/287] KVM: kvm_clear_guest_page(): fix empty_zero_page usage Using the address of 'empty_zero_page' as source address in order to clear a page is wrong. On some architectures empty_zero_page is only the pointer to the struct page of the empty_zero_page. Therefore the clear page operation would copy the contents of a couple of struct pages instead of clearing a page. For kvm only arm/arm64 are affected by this bug. To fix this use the ZERO_PAGE macro instead which will return the struct page address of the empty_zero_page on all architectures. Signed-off-by: Heiko Carstens Signed-off-by: Gleb Natapov (cherry picked from commit 8a3caa6d74597c2a083f7c87f866891a0b12540b) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dd9ce144cf99..e2b9a0670639 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1613,8 +1613,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { - return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page, - offset, len); + const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); + + return kvm_write_guest_page(kvm, gfn, zero_page, offset, len); } EXPORT_SYMBOL_GPL(kvm_clear_guest_page); From 4479f3b0060731a9d61b30570db282b5587c885f Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 19 Nov 2013 14:59:12 -0500 Subject: [PATCH 107/287] arm/arm64: kvm: Use virt_to_idmap instead of virt_to_phys for idmap mappings KVM initialisation fails on architectures implementing virt_to_idmap() because virt_to_phys() on such architectures won't fetch you the correct idmap page. So update the KVM ARM code to use the virt_to_idmap() to fix the issue. Since the KVM code is shared between arm and arm64, we create kvm_virt_to_phys() and handle the redirection in respective headers. Cc: Christoffer Dall Cc: Marc Zyngier Cc: Catalin Marinas Signed-off-by: Santosh Shilimkar Signed-off-by: Christoffer Dall (cherry picked from commit 4fda342cc7f577599c53fd27b99c953c7b1da18a) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 1 + arch/arm/kvm/mmu.c | 8 ++++---- arch/arm64/include/asm/kvm_mmu.h | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 77de4a41cc50..2d122adcdb22 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -140,6 +140,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) +#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 580906989db1..659db0ed1370 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -916,9 +916,9 @@ int kvm_mmu_init(void) { int err; - hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); - hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); - hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); + hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { /* @@ -945,7 +945,7 @@ int kvm_mmu_init(void) */ kvm_flush_dcache_to_poc(init_bounce_page, len); - phys_base = virt_to_phys(init_bounce_page); + phys_base = kvm_virt_to_phys(init_bounce_page); hyp_idmap_vector += phys_base - hyp_idmap_start; hyp_idmap_start = phys_base; hyp_idmap_end = phys_base + len; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 680f74e67497..7f1f9408ff66 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -136,6 +136,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ From bc071b4cc769735a12fc9fa17cb54dd42f3fb0e6 Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Mon, 18 Nov 2013 16:09:22 -0800 Subject: [PATCH 108/287] KVM: Improve create VCPU parameter (CVE-2013-4587) In multiple functions the vcpu_id is used as an offset into a bitfield. Ag malicious user could specify a vcpu_id greater than 255 in order to set or clear bits in kernel memory. This could be used to elevate priveges in the kernel. This patch verifies that the vcpu_id provided is less than 255. The api documentation already specifies that the vcpu_id must be less than max_vcpus, but this is currently not checked. Reported-by: Andrew Honig Cc: stable@vger.kernel.org Signed-off-by: Andrew Honig Signed-off-by: Paolo Bonzini (cherry picked from commit 338c7dbadd2671189cec7faf64c84d01071b3f96) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e2b9a0670639..2dd59b957164 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1894,6 +1894,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) int r; struct kvm_vcpu *vcpu, *v; + if (id >= KVM_MAX_VCPUS) + return -EINVAL; + vcpu = kvm_arch_vcpu_create(kvm, id); if (IS_ERR(vcpu)) return PTR_ERR(vcpu); From 580ab4613364270a998a44850d49c4d8c5df5144 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 13 Dec 2013 15:07:21 +0900 Subject: [PATCH 109/287] KVM: Use cond_resched() directly and remove useless kvm_resched() Since the commit 15ad7146 ("KVM: Use the scheduler preemption notifiers to make kvm preemptible"), the remaining stuff in this function is a simple cond_resched() call with an extra need_resched() check which was there to avoid dropping VCPUs unnecessarily. Now it is meaningless. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini (cherry picked from commit c08ac06ab3f3cdb8d34376c3a8a5e46a31a62c8f) Signed-off-by: Christoffer Dall --- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 8 -------- 5 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 985bf80c622e..53f44bee9ebb 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -702,7 +702,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: srcu_read_unlock(&vcpu->kvm->srcu, idx); if (r > 0) { - kvm_resched(vcpu); + cond_resched(); idx = srcu_read_lock(&vcpu->kvm->srcu); goto again; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 550f5928b394..717e5b525f3b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1253,7 +1253,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvm_guest_exit(); preempt_enable(); - kvm_resched(vcpu); + cond_resched(); spin_lock(&vc->lock); now = get_tb(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5276618579d3..805c8e92cf66 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5947,7 +5947,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } if (need_resched()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - kvm_resched(vcpu); + cond_resched(); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); } } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6d24a2671d29..9c0f8545df73 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -569,7 +569,6 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); bool kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); -void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2dd59b957164..cb9a865c8e01 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1706,14 +1706,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ -void kvm_resched(struct kvm_vcpu *vcpu) -{ - if (!need_resched()) - return; - cond_resched(); -} -EXPORT_SYMBOL_GPL(kvm_resched); - bool kvm_vcpu_yield_to(struct kvm_vcpu *target) { struct pid *pid; From 8215ed10b4803d159b3f10784140a79eafd94382 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 5 Aug 2013 15:04:46 +0100 Subject: [PATCH 110/287] arm: kvm: implement CPU PM notifier Upon CPU shutdown and consequent warm-reboot, the hypervisor CPU state must be re-initialized. This patch implements a CPU PM notifier that upon warm-boot calls a KVM hook to reinitialize properly the hypervisor state so that the CPU can be safely resumed. Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Lorenzo Pieralisi (cherry picked from commit 1fcf7ce0c60213994269fb59569ec161eb6e08d6) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e312e4a53f8d..b04013608e7e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include @@ -853,6 +854,33 @@ static struct notifier_block hyp_init_cpu_nb = { .notifier_call = hyp_init_cpu_notify, }; +#ifdef CONFIG_CPU_PM +static int hyp_init_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, + void *v) +{ + if (cmd == CPU_PM_EXIT) { + cpu_init_hyp_mode(NULL); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static struct notifier_block hyp_init_cpu_pm_nb = { + .notifier_call = hyp_init_cpu_pm_notifier, +}; + +static void __init hyp_cpu_pm_init(void) +{ + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); +} +#else +static inline void hyp_cpu_pm_init(void) +{ +} +#endif + /** * Inits Hyp-mode on all online CPUs */ @@ -1013,6 +1041,8 @@ int kvm_arch_init(void *opaque) goto out_err; } + hyp_cpu_pm_init(); + kvm_coproc_table_init(); return 0; out_err: From 90715c6a824fcc239e7da97c4078b85c7944df7c Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 9 Dec 2013 00:22:53 +0900 Subject: [PATCH 111/287] treewide: Fix typos in printk Correct spelling typo in various part of kernel [ cdall: Pickes KVM/arm64 specific part not already merged into LSK ] Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina (cherry picked from commit 77d84ff87e9d38072abcca665ca22cb1da41cb86) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/handle_exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 8da56067c304..42a0f1bddfe7 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -90,7 +90,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { - kvm_err("Unkown exception class: hsr: %#08x\n", + kvm_err("Unknown exception class: hsr: %#08x\n", (unsigned int)kvm_vcpu_get_hsr(vcpu)); BUG(); } From 7e2c9ce0199412b559bbf527956c004a05a64619 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Nov 2013 17:43:19 -0800 Subject: [PATCH 112/287] arm: KVM: Don't return PSCI_INVAL if waitqueue is inactive The current KVM implementation of PSCI returns INVALID_PARAMETERS if the waitqueue for the corresponding CPU is not active. This does not seem correct, since KVM should not care what the specific thread is doing, for example, user space may not have called KVM_RUN on this VCPU yet or the thread may be busy looping to user space because it received a signal; this is really up to the user space implementation. Instead we should check specifically that the CPU is marked as being turned off, regardless of the VCPU thread state, and if it is, we shall simply clear the pause flag on the CPU and wake up the thread if it happens to be blocked for us. Further, the implementation seems to be racy when executing multiple VCPU threads. There really isn't a reasonable user space programming scheme to ensure all secondary CPUs have reached kvm_vcpu_first_run_init before turning on the boot CPU. Therefore, set the pause flag on the vcpu at VCPU init time (which can reasonably be expected to be completed for all CPUs by user space before running any VCPUs) and clear both this flag and the feature (in case the feature can somehow get set again in the future) and ping the waitqueue on turning on a VCPU using PSCI. Reported-by: Peter Maydell Signed-off-by: Christoffer Dall (cherry picked from commit 478a8237f656d86d25b3e4e4bf3c48f590156294) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 30 +++++++++++++++++++----------- arch/arm/kvm/psci.c | 11 ++++++----- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index b04013608e7e..bb43b8cc1231 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -479,15 +479,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return ret; } - /* - * Handle the "start in power-off" case by calling into the - * PSCI code. - */ - if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { - *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; - kvm_psci_call(vcpu); - } - return 0; } @@ -701,6 +692,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, + struct kvm_vcpu_init *init) +{ + int ret; + + ret = kvm_vcpu_set_target(vcpu, init); + if (ret) + return ret; + + /* + * Handle the "start in power-off" case by marking the VCPU as paused. + */ + if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + vcpu->arch.pause = true; + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -714,8 +723,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&init, argp, sizeof(init))) return -EFAULT; - return kvm_vcpu_set_target(vcpu, &init); - + return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); } case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 0881bf169fbc..448f60e8d23c 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -54,15 +54,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) } } - if (!vcpu) + /* + * Make sure the caller requested a valid CPU and that the CPU is + * turned off. + */ + if (!vcpu || !vcpu->arch.pause) return KVM_PSCI_RET_INVAL; target_pc = *vcpu_reg(source_vcpu, 2); - wq = kvm_arch_vcpu_wq(vcpu); - if (!waitqueue_active(wq)) - return KVM_PSCI_RET_INVAL; - kvm_reset_vcpu(vcpu); /* Gracefully handle Thumb2 entry point */ @@ -79,6 +79,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ + wq = kvm_arch_vcpu_wq(vcpu); wake_up_interruptible(wq); return KVM_PSCI_RET_SUCCESS; From b5a94dd48dc470624dcb7d581432a439d7a289c0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 16 Nov 2013 10:51:25 -0800 Subject: [PATCH 113/287] arm/arm64: KVM: arch_timer: Initialize cntvoff at kvm_init Initialize the cntvoff at kvm_init_vm time, not before running the VCPUs at the first time because that will overwrite any potentially restored values from user space. Cc: Andre Przywara Acked-by: Marc Zynger Signed-off-by: Christoffer Dall (cherry picked from commit a1a64387adeeba7a34ce06f2774e81f496ee803b) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 ++ virt/kvm/arm/vgic.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index bb43b8cc1231..71ad3a8706d9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -138,6 +138,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + kvm_timer_init(kvm); + /* Mark the initial VMID generation invalid */ kvm->arch.vmid_gen = 0; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 685fc72fc751..81e9481184a7 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1409,7 +1409,6 @@ int kvm_vgic_init(struct kvm *kvm) for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) vgic_set_target_reg(kvm, 0, i); - kvm_timer_init(kvm); kvm->arch.vgic.ready = true; out: mutex_unlock(&kvm->lock); From 764ee339777196e8cf9fa090f5041b02efc93630 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 13 Dec 2013 14:23:26 +0100 Subject: [PATCH 114/287] ARM/KVM: save and restore generic timer registers For migration to work we need to save (and later restore) the state of each core's virtual generic timer. Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export the three needed registers (control, counter, compare value). Though they live in cp15 space, we don't use the existing list, since they need special accessor functions and the arch timer is optional. Acked-by: Marc Zynger Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall (cherry picked from commit 39735a3a390431bcf60f9174b7d64f787fd6afa9) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 3 + arch/arm/include/uapi/asm/kvm.h | 20 +++++++ arch/arm/kvm/guest.c | 92 ++++++++++++++++++++++++++++++- arch/arm64/include/uapi/asm/kvm.h | 18 ++++++ virt/kvm/arm/arch_timer.c | 34 ++++++++++++ 5 files changed, 166 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8a6f6db14ee4..098f7dd6d564 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); +int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index c498b60c0505..835b8678de03 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -119,6 +119,26 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 #define KVM_REG_ARM_32_CRN_SHIFT 11 +#define ARM_CP15_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) + +#define __ARM_CP15_REG(op1,crn,crm,op2) \ + (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \ + ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \ + ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \ + ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \ + ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2)) + +#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32) + +#define __ARM_CP15_REG64(op1,crm) \ + (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64) +#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__) + +#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) +#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) + /* Normal registers are mapped as coprocessor 16. */ #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 20f8d97904af..2786eae10c0d 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } +#ifndef CONFIG_KVM_ARM_TIMER + +#define NUM_TIMER_REGS 0 + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + return 0; +} + +static bool is_timer_reg(u64 index) +{ + return false; +} + +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + return 0; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + return 0; +} + +#else + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +#endif + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + static unsigned long num_core_regs(void) { return sizeof(struct kvm_regs) / sizeof(u32); @@ -121,7 +198,8 @@ static unsigned long num_core_regs(void) */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) + + NUM_TIMER_REGS; } /** @@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { if (put_user(core_reg | i, uindices)) @@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + return kvm_arm_copy_coproc_indices(vcpu, uindices); } @@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + return kvm_arm_coproc_get_reg(vcpu, reg); } @@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + return kvm_arm_coproc_set_reg(vcpu, reg); } diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 5031f4263937..7c25ca8b02b3 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -129,6 +129,24 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 #define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 +#define ARM64_SYS_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \ + KVM_REG_ARM64_SYSREG_ ## n ## _MASK) + +#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \ + ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \ + ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \ + ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \ + ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \ + ARM64_SYS_REG_SHIFT_MASK(op2, OP2)) + +#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64) + +#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) +#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 #define KVM_ARM_IRQ_TYPE_MASK 0xff diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index c2e1ef4604e8..5081e809821f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -182,6 +182,40 @@ static void kvm_timer_init_interrupt(void *info) enable_percpu_irq(host_vtimer_irq, 0); } +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + timer->cntv_ctl = value; + break; + case KVM_REG_ARM_TIMER_CNT: + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; + break; + case KVM_REG_ARM_TIMER_CVAL: + timer->cntv_cval = value; + break; + default: + return -1; + } + return 0; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + return timer->cntv_ctl; + case KVM_REG_ARM_TIMER_CNT: + return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + case KVM_REG_ARM_TIMER_CVAL: + return timer->cntv_cval; + } + return (u64)-1; +} static int kvm_timer_cpu_notify(struct notifier_block *self, unsigned long action, void *cpu) From d90651fa17fcc2112dcfe05832a6c1169d26dee3 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:55 -0700 Subject: [PATCH 115/287] ARM: KVM: Allow creating the VGIC after VCPUs Rework the VGIC initialization slightly to allow initialization of the vgic cpu-specific state even if the irqchip (the VGIC) hasn't been created by user space yet. This is safe, because the vgic data structures are already allocated when the CPU is allocated if VGIC support is compiled into the kernel. Further, the init process does not depend on any other information and the sacrifice is a slight performance degradation for creating VMs in the no-VGIC case. The reason is that the new device control API doesn't mandate creating the VGIC before creating the VCPU and it is unreasonable to require user space to create the VGIC before creating the VCPUs. At the same time move the irqchip_in_kernel check out of kvm_vcpu_first_run_init and into the init function to make the per-vcpu and global init functions symmetric and add comments on the exported functions making it a bit easier to understand the init flow by only looking at vgic.c. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e1ba0207a1b3714bb3f000e506285ae5123cdfa7) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 7 ++++--- virt/kvm/arm/vgic.c | 22 +++++++++++++++++++--- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 71ad3a8706d9..ea8da1f4fe49 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -465,6 +465,8 @@ static void update_vttbr(struct kvm *kvm) static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + int ret; + if (likely(vcpu->arch.has_run_once)) return 0; @@ -474,9 +476,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Initialize the VGIC before running a vcpu the first time on * this VM. */ - if (irqchip_in_kernel(vcpu->kvm) && - unlikely(!vgic_initialized(vcpu->kvm))) { - int ret = kvm_vgic_init(vcpu->kvm); + if (unlikely(!vgic_initialized(vcpu->kvm))) { + ret = kvm_vgic_init(vcpu->kvm); if (ret) return ret; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 81e9481184a7..5e9df47778fb 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1243,15 +1243,19 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +/** + * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state + * @vcpu: pointer to the vcpu struct + * + * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to + * this vcpu and enable the VGIC for this VCPU + */ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int i; - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) return -EBUSY; @@ -1383,10 +1387,22 @@ int kvm_vgic_hyp_init(void) return ret; } +/** + * kvm_vgic_init - Initialize global VGIC state before running any VCPUs + * @kvm: pointer to the kvm struct + * + * Map the virtual CPU interface into the VM before running any VCPUs. We + * can't do this at creation time, because user space must first set the + * virtual CPU interface address in the guest physical address space. Also + * initialize the ITARGETSRn regs to 0 on the emulated distributor. + */ int kvm_vgic_init(struct kvm *kvm) { int ret = 0, i; + if (!irqchip_in_kernel(kvm)) + return 0; + mutex_lock(&kvm->lock); if (vgic_initialized(kvm)) From 1032acb686aaabafb124e12ae15a6adc9b3da557 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 17:29:18 +0100 Subject: [PATCH 116/287] KVM: arm-vgic: Support KVM_CREATE_DEVICE for VGIC Support creating the ARM VGIC device through the KVM_CREATE_DEVICE ioctl, which can then later be leveraged to use the KVM_{GET/SET}_DEVICE_ATTR, which is useful both for setting addresses in a more generic API than the ARM-specific one and is useful for save/restore of VGIC state. Adds KVM_CAP_DEVICE_CTRL to ARM capabilities. Note that we change the check for creating a VGIC from bailing out if any VCPUs were created, to bailing out if any VCPUs were ever run. This is an important distinction that shouldn't break anything, but allows creating the VGIC after the VCPUs have been created. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 7330672befe6269e575f79b924a7068b26c144b4) Signed-off-by: Christoffer Dall --- .../virtual/kvm/devices/arm-vgic.txt | 10 +++ arch/arm/kvm/arm.c | 1 + include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + virt/kvm/arm/vgic.c | 63 ++++++++++++++++++- virt/kvm/kvm_main.c | 5 ++ 6 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/arm-vgic.txt diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt new file mode 100644 index 000000000000..38f27f709a99 --- /dev/null +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -0,0 +1,10 @@ +ARM Virtual Generic Interrupt Controller (VGIC) +=============================================== + +Device types supported: + KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0 + +Only one VGIC instance may be instantiated through either this API or the +legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt +controller, requiring emulated user-space devices to inject interrupts to the +VGIC instead of directly to CPUs. diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ea8da1f4fe49..fcb68bb96176 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -191,6 +191,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IRQCHIP: r = vgic_present; break; + case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9c0f8545df73..1dfc17255ee0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1045,6 +1045,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; +extern struct kvm_device_ops kvm_arm_vgic_v2_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d5b8501cebfd..56347ab54cf7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -848,6 +848,7 @@ struct kvm_device_attr { #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_TYPE_ARM_VGIC_V2 5 /* * ioctls for VM fds diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5e9df47778fb..b15d6c17a090 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1433,20 +1433,45 @@ int kvm_vgic_init(struct kvm *kvm) int kvm_vgic_create(struct kvm *kvm) { - int ret = 0; + int i, vcpu_lock_idx = -1, ret = 0; + struct kvm_vcpu *vcpu; mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { + if (kvm->arch.vgic.vctrl_base) { ret = -EEXIST; goto out; } + /* + * Any time a vcpu is run, vcpu_load is called which tries to grab the + * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure + * that no other VCPUs are run while we create the vgic. + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!mutex_trylock(&vcpu->mutex)) + goto out_unlock; + vcpu_lock_idx = i; + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.has_run_once) { + ret = -EBUSY; + goto out_unlock; + } + } + spin_lock_init(&kvm->arch.vgic.lock); kvm->arch.vgic.vctrl_base = vgic_vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; +out_unlock: + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&vcpu->mutex); + } + out: mutex_unlock(&kvm->lock); return ret; @@ -1510,3 +1535,37 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) mutex_unlock(&kvm->lock); return r; } + +static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static void vgic_destroy(struct kvm_device *dev) +{ + kfree(dev); +} + +static int vgic_create(struct kvm_device *dev, u32 type) +{ + return kvm_vgic_create(dev->kvm); +} + +struct kvm_device_ops kvm_arm_vgic_v2_ops = { + .name = "kvm-arm-vgic", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_set_attr, + .get_attr = vgic_get_attr, + .has_attr = vgic_has_attr, +}; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cb9a865c8e01..e9a43b6455be 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2271,6 +2271,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_VFIO: ops = &kvm_vfio_ops; break; +#endif +#ifdef CONFIG_KVM_ARM_VGIC + case KVM_DEV_TYPE_ARM_VGIC_V2: + ops = &kvm_arm_vgic_v2_ops; + break; #endif default: return -ENODEV; From c4ad31ff7d94f6504c757ddcf742f0597c494080 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: [PATCH 117/287] KVM: arm-vgic: Set base addr through device API Support setting the distributor and cpu interface base addresses in the VM physical address space through the KVM_{SET,GET}_DEVICE_ATTR API in addition to the ARM specific API. This has the added benefit of being able to share more code in user space and do things in a uniform manner. Also deprecate the older API at the same time, but backwards compatibility will be maintained. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit ce01e4e8874d410738f4b4733b26642d6611a331) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 8 +- .../virtual/kvm/devices/arm-vgic.txt | 11 +++ arch/arm/include/uapi/asm/kvm.h | 2 + arch/arm/kvm/arm.c | 2 +- include/kvm/arm_vgic.h | 2 +- virt/kvm/arm/vgic.c | 87 ++++++++++++++++--- 6 files changed, 96 insertions(+), 16 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d196ebe8956e..3c75a17555a8 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2324,7 +2324,7 @@ This ioctl returns the guest registers that are supported for the KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. -4.80 KVM_ARM_SET_DEVICE_ADDR +4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated) Capability: KVM_CAP_ARM_SET_DEVICE_ADDR Architectures: arm, arm64 @@ -2362,7 +2362,11 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the base addresses will return -EEXIST. -4.82 KVM_PPC_RTAS_DEFINE_TOKEN +Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API +should be used instead. + + +4.86 KVM_PPC_RTAS_DEFINE_TOKEN Capability: KVM_CAP_PPC_RTAS Architectures: ppc diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 38f27f709a99..c9febb2a0c3e 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -8,3 +8,14 @@ Only one VGIC instance may be instantiated through either this API or the legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt controller, requiring emulated user-space devices to inject interrupts to the VGIC instead of directly to CPUs. + +Groups: + KVM_DEV_ARM_VGIC_GRP_ADDR + Attributes: + KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) + Base address in the guest physical address space of the GIC distributor + register mappings. + + KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) + Base address in the guest physical address space of the GIC virtual cpu + interface register mappings. diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 835b8678de03..76a742769e2b 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -163,6 +163,8 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_VFP_FPINST 0x1009 #define KVM_REG_ARM_VFP_FPINST2 0x100A +/* Device Control API: ARM VGIC */ +#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index fcb68bb96176..e71f6e15d3cd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -785,7 +785,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, case KVM_ARM_DEVICE_VGIC_V2: if (!vgic_present) return -ENXIO; - return kvm_vgic_set_addr(kvm, type, dev_addr->addr); + return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7e2d15837b02..be85127bfed3 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -144,7 +144,7 @@ struct kvm_run; struct kvm_exit_mmio; #ifdef CONFIG_KVM_ARM_VGIC -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); +int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b15d6c17a090..45db48de4282 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1495,6 +1495,12 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, { int ret; + if (addr & ~KVM_PHYS_MASK) + return -E2BIG; + + if (addr & (SZ_4K - 1)) + return -EINVAL; + if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) return -EEXIST; if (addr + size < addr) @@ -1507,26 +1513,41 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, return ret; } -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) +/** + * kvm_vgic_addr - set or get vgic VM base addresses + * @kvm: pointer to the vm struct + * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX + * @addr: pointer to address value + * @write: if true set the address in the VM address space, if false read the + * address + * + * Set or get the vgic base addresses for the distributor and the virtual CPU + * interface in the VM physical address space. These addresses are properties + * of the emulated core/SoC and therefore user space initially knows this + * information. + */ +int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) { int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; - if (addr & ~KVM_PHYS_MASK) - return -E2BIG; - - if (addr & (SZ_4K - 1)) - return -EINVAL; - mutex_lock(&kvm->lock); switch (type) { case KVM_VGIC_V2_ADDR_TYPE_DIST: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, - addr, KVM_VGIC_V2_DIST_SIZE); + if (write) { + r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, + *addr, KVM_VGIC_V2_DIST_SIZE); + } else { + *addr = vgic->vgic_dist_base; + } break; case KVM_VGIC_V2_ADDR_TYPE_CPU: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, - addr, KVM_VGIC_V2_CPU_SIZE); + if (write) { + r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, + *addr, KVM_VGIC_V2_CPU_SIZE); + } else { + *addr = vgic->vgic_cpu_base; + } break; default: r = -ENODEV; @@ -1538,16 +1559,58 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + int r; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + r = kvm_vgic_addr(dev->kvm, type, &addr, true); + return (r == -ENODEV) ? -ENXIO : r; + } + } + return -ENXIO; } static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return -ENXIO; + int r = -ENXIO; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + r = kvm_vgic_addr(dev->kvm, type, &addr, false); + if (r) + return (r == -ENODEV) ? -ENXIO : r; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + } + } + + return r; } static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + case KVM_VGIC_V2_ADDR_TYPE_CPU: + return 0; + } + break; + } return -ENXIO; } From 10b316b72589c7ba6f74c7263a289ffbaa1bf80b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: [PATCH 118/287] irqchip: arm-gic: Define additional MMIO offsets and masks Define CPU interface offsets for the GICC_ABPR, GICC_APR, and GICC_IIDR registers. Define distributor registers for the GICD_SPENDSGIR and the GICD_CPENDSGIR. KVM/ARM needs to know about these definitions to fully support save/restore of the VGIC. Also define some masks and shifts for the various GICH_VMCR fields. Cc: Thomas Gleixner Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 0307e1770fdeff2732cf7a35d0f7f49db67c6621) Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 3e203eb23cc7..4483adb61c88 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -17,6 +17,9 @@ #define GIC_CPU_EOI 0x10 #define GIC_CPU_RUNNINGPRI 0x14 #define GIC_CPU_HIGHPRI 0x18 +#define GIC_CPU_ALIAS_BINPOINT 0x1c +#define GIC_CPU_ACTIVEPRIO 0xd0 +#define GIC_CPU_IDENT 0xfc #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 @@ -54,6 +57,15 @@ #define GICH_LR_ACTIVE_BIT (1 << 29) #define GICH_LR_EOI (1 << 19) +#define GICH_VMCR_CTRL_SHIFT 0 +#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT) +#define GICH_VMCR_PRIMASK_SHIFT 27 +#define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT) +#define GICH_VMCR_BINPOINT_SHIFT 21 +#define GICH_VMCR_BINPOINT_MASK (0x7 << GICH_VMCR_BINPOINT_SHIFT) +#define GICH_VMCR_ALIAS_BINPOINT_SHIFT 18 +#define GICH_VMCR_ALIAS_BINPOINT_MASK (0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT) + #define GICH_MISR_EOI (1 << 0) #define GICH_MISR_U (1 << 1) From 405003b808d4ff3de73eebc18dfb6f1e708626c4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: [PATCH 119/287] KVM: arm-vgic: Make vgic mmio functions more generic Rename the vgic_ranges array to vgic_dist_ranges to be more specific and to prepare for handling CPU interface register access as well (for save/restore of VGIC state). Pass offset from distributor or interface MMIO base to find_matching_range function instead of the physical address of the access in the VM memory map. This allows other callers unaware of the VM specifics, but with generic VGIC knowledge to reuse the function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 1006e8cb22e861260688917ca4cfe6cde8ad69eb) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 45db48de4282..e2596f618281 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -602,7 +602,7 @@ struct mmio_range { phys_addr_t offset); }; -static const struct mmio_range vgic_ranges[] = { +static const struct mmio_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, @@ -669,14 +669,13 @@ static const struct mmio_range vgic_ranges[] = { static const struct mmio_range *find_matching_range(const struct mmio_range *ranges, struct kvm_exit_mmio *mmio, - phys_addr_t base) + phys_addr_t offset) { const struct mmio_range *r = ranges; - phys_addr_t addr = mmio->phys_addr - base; while (r->len) { - if (addr >= r->base && - (addr + mmio->len) <= (r->base + r->len)) + if (offset >= r->base && + (offset + mmio->len) <= (r->base + r->len)) return r; r++; } @@ -713,7 +712,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return true; } - range = find_matching_range(vgic_ranges, mmio, base); + offset = mmio->phys_addr - base; + range = find_matching_range(vgic_dist_ranges, mmio, offset); if (unlikely(!range || !range->handle_mmio)) { pr_warn("Unhandled access %d %08llx %d\n", mmio->is_write, mmio->phys_addr, mmio->len); From f05c65c6daf2d2c33c31ddc3b48df2693f8e9594 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 11 Dec 2013 20:29:11 -0800 Subject: [PATCH 120/287] arm/arm64: kvm: Set vcpu->cpu to -1 on vcpu_put The arch-generic KVM code expects the cpu field of a vcpu to be -1 if the vcpu is no longer assigned to a cpu. This is used for the optimized make_all_cpus_request path and will be used by the vgic code to check that no vcpus are running. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e9b152cb957cb194437f37e79f0f3c9d34fe53d6) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e71f6e15d3cd..169c718ddd90 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -343,6 +343,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + /* + * The arch-generic KVM code expects the cpu field of a vcpu to be -1 + * if the vcpu is no longer assigned to a cpu. This is used for the + * optimized make_all_cpus_request path. + */ + vcpu->cpu = -1; + kvm_arm_set_running_vcpu(NULL); } From 0b3a540dcc4087698c95c6d1bda4071424283345 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 21:17:31 +0100 Subject: [PATCH 121/287] KVM: arm-vgic: Add vgic reg access from dev attr Add infrastructure to handle distributor and cpu interface register accesses through the KVM_{GET/SET}_DEVICE_ATTR interface by adding the KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS groups and defining the semantics of the attr field to be the MMIO offset as specified in the GICv2 specs. Missing register accesses or other changes in individual register access functions to support save/restore of the VGIC state is added in subsequent patches. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f) Signed-off-by: Christoffer Dall --- .../virtual/kvm/devices/arm-vgic.txt | 52 +++++ arch/arm/include/uapi/asm/kvm.h | 6 + virt/kvm/arm/vgic.c | 178 ++++++++++++++++++ 3 files changed, 236 insertions(+) diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index c9febb2a0c3e..7f4e91b1316b 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -19,3 +19,55 @@ Groups: KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) Base address in the guest physical address space of the GIC virtual cpu interface register mappings. + + KVM_DEV_ARM_VGIC_GRP_DIST_REGS + Attributes: + The attr field of kvm_device_attr encodes two values: + bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | + values: | reserved | cpu id | offset | + + All distributor regs are (rw, 32-bit) + + The offset is relative to the "Distributor base address" as defined in the + GICv2 specs. Getting or setting such a register has the same effect as + reading or writing the register on the actual hardware from the cpu + specified with cpu id field. Note that most distributor fields are not + banked, but return the same value regardless of the cpu id used to access + the register. + Limitations: + - Priorities are not implemented, and registers are RAZ/WI + Errors: + -ENODEV: Getting or setting this register is not yet supported + -EBUSY: One or more VCPUs are running + + KVM_DEV_ARM_VGIC_GRP_CPU_REGS + Attributes: + The attr field of kvm_device_attr encodes two values: + bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | + values: | reserved | cpu id | offset | + + All CPU interface regs are (rw, 32-bit) + + The offset specifies the offset from the "CPU interface base address" as + defined in the GICv2 specs. Getting or setting such a register has the + same effect as reading or writing the register on the actual hardware. + + The Active Priorities Registers APRn are implementation defined, so we set a + fixed format for our implementation that fits with the model of a "GICv2 + implementation without the security extensions" which we present to the + guest. This interface always exposes four register APR[0-3] describing the + maximum possible 128 preemption levels. The semantics of the register + indicate if any interrupts in a given preemption level are in the active + state by setting the corresponding bit. + + Thus, preemption level X has one or more active interrupts if and only if: + + APRn[X mod 32] == 0b1, where n = X / 32 + + Bits for undefined preemption levels are RAZ/WI. + + Limitations: + - Priorities are not implemented, and registers are RAZ/WI + Errors: + -ENODEV: Getting or setting this register is not yet supported + -EBUSY: One or more VCPUs are running diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 76a742769e2b..ef0c8785ba16 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -165,6 +165,12 @@ struct kvm_arch_memory_slot { /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 +#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 +#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 +#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 +#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 +#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e2596f618281..88599b585362 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -589,6 +589,20 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } +static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return false; +} + +static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return false; +} + /* * I would have liked to use the kvm_bus_io_*() API instead, but it * cannot cope with banked registers (only the VM pointer is passed @@ -663,6 +677,16 @@ static const struct mmio_range vgic_dist_ranges[] = { .len = 4, .handle_mmio = handle_mmio_sgi_reg, }, + { + .base = GIC_DIST_SGI_PENDING_CLEAR, + .len = VGIC_NR_SGIS, + .handle_mmio = handle_mmio_sgi_clear, + }, + { + .base = GIC_DIST_SGI_PENDING_SET, + .len = VGIC_NR_SGIS, + .handle_mmio = handle_mmio_sgi_set, + }, {} }; @@ -1557,6 +1581,114 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) return r; } +static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + return true; +} + +static const struct mmio_range vgic_cpu_ranges[] = { + { + .base = GIC_CPU_CTRL, + .len = 12, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_ALIAS_BINPOINT, + .len = 4, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_ACTIVEPRIO, + .len = 16, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_IDENT, + .len = 4, + .handle_mmio = handle_cpu_mmio_misc, + }, +}; + +static int vgic_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u32 *reg, bool is_write) +{ + const struct mmio_range *r = NULL, *ranges; + phys_addr_t offset; + int ret, cpuid, c; + struct kvm_vcpu *vcpu, *tmp_vcpu; + struct vgic_dist *vgic; + struct kvm_exit_mmio mmio; + + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> + KVM_DEV_ARM_VGIC_CPUID_SHIFT; + + mutex_lock(&dev->kvm->lock); + + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { + ret = -EINVAL; + goto out; + } + + vcpu = kvm_get_vcpu(dev->kvm, cpuid); + vgic = &dev->kvm->arch.vgic; + + mmio.len = 4; + mmio.is_write = is_write; + if (is_write) + mmio_data_write(&mmio, ~0, *reg); + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + mmio.phys_addr = vgic->vgic_dist_base + offset; + ranges = vgic_dist_ranges; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + mmio.phys_addr = vgic->vgic_cpu_base + offset; + ranges = vgic_cpu_ranges; + break; + default: + BUG(); + } + r = find_matching_range(ranges, &mmio, offset); + + if (unlikely(!r || !r->handle_mmio)) { + ret = -ENXIO; + goto out; + } + + + spin_lock(&vgic->lock); + + /* + * Ensure that no other VCPU is running by checking the vcpu->cpu + * field. If no other VPCUs are running we can safely access the VGIC + * state, because even if another VPU is run after this point, that + * VCPU will not touch the vgic state, because it will block on + * getting the vgic->lock in kvm_vgic_sync_hwstate(). + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { + if (unlikely(tmp_vcpu->cpu != -1)) { + ret = -EBUSY; + goto out_vgic_unlock; + } + } + + offset -= r->base; + r->handle_mmio(vcpu, &mmio, offset); + + if (!is_write) + *reg = mmio_data_read(&mmio, ~0); + + ret = 0; +out_vgic_unlock: + spin_unlock(&vgic->lock); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r; @@ -1573,6 +1705,18 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = kvm_vgic_addr(dev->kvm, type, &addr, true); return (r == -ENODEV) ? -ENXIO : r; } + + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_attr_regs_access(dev, attr, ®, true); + } + } return -ENXIO; @@ -1594,14 +1738,42 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) if (copy_to_user(uaddr, &addr, sizeof(addr))) return -EFAULT; + break; } + + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg = 0; + + r = vgic_attr_regs_access(dev, attr, ®, false); + if (r) + return r; + r = put_user(reg, uaddr); + break; + } + } return r; } +static int vgic_has_attr_regs(const struct mmio_range *ranges, + phys_addr_t offset) +{ + struct kvm_exit_mmio dev_attr_mmio; + + dev_attr_mmio.len = 4; + if (find_matching_range(ranges, &dev_attr_mmio, offset)) + return 0; + else + return -ENXIO; +} + static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + phys_addr_t offset; + switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_ADDR: switch (attr->attr) { @@ -1610,6 +1782,12 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return 0; } break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + return vgic_has_attr_regs(vgic_dist_ranges, offset); + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + return vgic_has_attr_regs(vgic_cpu_ranges, offset); } return -ENXIO; } From 944b8a6f9f400ded36fcdc61acd801fbd7413985 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 15 Nov 2013 20:51:31 -0800 Subject: [PATCH 122/287] KVM: arm-vgic: Support unqueueing of LRs to the dist To properly access the VGIC state from user space it is very unpractical to have to loop through all the LRs in all register access functions. Instead, support moving all pending state from LRs to the distributor, but leave active state LRs alone. Note that to accurately present the active and pending state to VCPUs reading these distributor registers from a live VM, we would have to stop all other VPUs than the calling VCPU and ask each CPU to unqueue their LR state onto the distributor and add fields to track active state on the distributor side as well. We don't have any users of such functionality yet and there are other inaccuracies of the GIC emulation, so don't provide accurate synchronized access to this state just yet. However, when the time comes, having this function should help. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit cbd333a4bfd0d93bba36d46a0e4e7979228873a6) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 88 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 88599b585362..d08ba28e729a 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -589,6 +589,80 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } +#define LR_CPUID(lr) \ + (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) +#define LR_IRQID(lr) \ + ((lr) & GICH_LR_VIRTUALID) + +static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) +{ + clear_bit(lr_nr, vgic_cpu->lr_used); + vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE; + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; +} + +/** + * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor + * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs + * + * Move any pending IRQs that have already been assigned to LRs back to the + * emulated distributor state so that the complete emulated state can be read + * from the main emulation structures without investigating the LRs. + * + * Note that IRQs in the active state in the LRs get their pending state moved + * to the distributor but the active state stays in the LRs, because we don't + * track the active state on the distributor side. + */ +static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int vcpu_id = vcpu->vcpu_id; + int i, irq, source_cpu; + u32 *lr; + + for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + lr = &vgic_cpu->vgic_lr[i]; + irq = LR_IRQID(*lr); + source_cpu = LR_CPUID(*lr); + + /* + * There are three options for the state bits: + * + * 01: pending + * 10: active + * 11: pending and active + * + * If the LR holds only an active interrupt (not pending) then + * just leave it alone. + */ + if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT) + continue; + + /* + * Reestablish the pending state on the distributor and the + * CPU interface. It may have already been pending, but that + * is fine, then we are only setting a few bits that were + * already set. + */ + vgic_dist_irq_set(vcpu, irq); + if (irq < VGIC_NR_SGIS) + dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu; + *lr &= ~GICH_LR_PENDING_BIT; + + /* + * If there's no state left on the LR (it could still be + * active), then the LR does not hold any useful info and can + * be marked as free for other use. + */ + if (!(*lr & GICH_LR_STATE)) + vgic_retire_lr(i, irq, vgic_cpu); + + /* Finally update the VGIC state. */ + vgic_update_state(vcpu->kvm); + } +} + static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -848,8 +922,6 @@ static void vgic_update_state(struct kvm *kvm) } } -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) #define MK_LR_PEND(src, irq) \ (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) @@ -871,9 +943,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - clear_bit(lr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE; + vgic_retire_lr(lr, irq, vgic_cpu); if (vgic_irq_is_active(vcpu, irq)) vgic_irq_clear_active(vcpu, irq); } @@ -1675,6 +1745,14 @@ static int vgic_attr_regs_access(struct kvm_device *dev, } } + /* + * Move all pending IRQs from the LRs on all VCPUs so the pending + * state can be properly represented in the register state accessible + * through this API. + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) + vgic_unqueue_irqs(tmp_vcpu); + offset -= r->base; r->handle_mmio(vcpu, &mmio, offset); From 8a280d12e7988c920e8805de50c0a7e129032d1b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 21:22:31 +0100 Subject: [PATCH 123/287] KVM: arm-vgic: Add GICD_SPENDSGIR and GICD_CPENDSGIR handlers Handle MMIO accesses to the two registers which should support both the case where the VMs want to read/write either of these registers and the case where user space reads/writes these registers to do save/restore of the VGIC state. Note that the added complexity compared to simple set/clear enable registers stems from the bookkeping of source cpu ids. It may be possible to change the underlying data structure to simplify the complexity, but since this is not in the critical path at all, this will do. Also note that reading this register from a live guest will not be accurate compared to on hardware, because some state may be living on the CPU LRs and the only way to give a consistent read would be to force stop all the VCPUs and request them to unqueu the LR state onto the distributor. Until we have an actual user of live reading this register, we can live with the difference. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 90a5355ee7639e92c0492ec592cba5c31bd80687) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 70 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index d08ba28e729a..e59aaa4c64e5 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -663,18 +663,80 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } } -static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) +/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ +static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int sgi; + int min_sgi = (offset & ~0x3) * 4; + int max_sgi = min_sgi + 3; + int vcpu_id = vcpu->vcpu_id; + u32 reg = 0; + + /* Copy source SGIs from distributor side */ + for (sgi = min_sgi; sgi <= max_sgi; sgi++) { + int shift = 8 * (sgi - min_sgi); + reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; + } + + mmio_data_write(mmio, ~0, reg); return false; } +static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, bool set) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int sgi; + int min_sgi = (offset & ~0x3) * 4; + int max_sgi = min_sgi + 3; + int vcpu_id = vcpu->vcpu_id; + u32 reg; + bool updated = false; + + reg = mmio_data_read(mmio, ~0); + + /* Clear pending SGIs on the distributor */ + for (sgi = min_sgi; sgi <= max_sgi; sgi++) { + u8 mask = reg >> (8 * (sgi - min_sgi)); + if (set) { + if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) + updated = true; + dist->irq_sgi_sources[vcpu_id][sgi] |= mask; + } else { + if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) + updated = true; + dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; + } + } + + if (updated) + vgic_update_state(vcpu->kvm); + + return updated; +} + static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - return false; + if (!mmio->is_write) + return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); + else + return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true); +} + +static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (!mmio->is_write) + return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); + else + return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } /* From a0a11ba68172ffeb62b0a7be8dc4162a0fc2ad6d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:57 -0700 Subject: [PATCH 124/287] KVM: arm-vgic: Support CPU interface reg access Implement support for the CPU interface register access driven by MMIO address offsets from the CPU interface base address. Useful for user space to support save/restore of the VGIC state. This commit adds support only for the same logic as the current VGIC support, and no more. For example, the active priority registers are handled as RAZ/WI, just like setting priorities on the emulated distributor. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit fa20f5aea56f271f83e91b9cde00f043a5a14990) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 81 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e59aaa4c64e5..be456ce264d0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -71,6 +71,10 @@ #define VGIC_ADDR_UNDEF (-1) #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) +#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ +#define IMPLEMENTER_ARM 0x43b +#define GICC_ARCH_VERSION_V2 0x2 + /* Physical address of vgic virtual cpu interface */ static phys_addr_t vgic_vcpu_base; @@ -312,7 +316,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, u32 word_offset = offset & 3; switch (offset & ~3) { - case 0: /* CTLR */ + case 0: /* GICD_CTLR */ reg = vcpu->kvm->arch.vgic.enabled; vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); @@ -323,15 +327,15 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, } break; - case 4: /* TYPER */ + case 4: /* GICD_TYPER */ reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; reg |= (VGIC_NR_IRQS >> 5) - 1; vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); break; - case 8: /* IIDR */ - reg = 0x4B00043B; + case 8: /* GICD_IIDR */ + reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); break; @@ -1716,9 +1720,70 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - return true; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u32 reg, mask = 0, shift = 0; + bool updated = false; + + switch (offset & ~0x3) { + case GIC_CPU_CTRL: + mask = GICH_VMCR_CTRL_MASK; + shift = GICH_VMCR_CTRL_SHIFT; + break; + case GIC_CPU_PRIMASK: + mask = GICH_VMCR_PRIMASK_MASK; + shift = GICH_VMCR_PRIMASK_SHIFT; + break; + case GIC_CPU_BINPOINT: + mask = GICH_VMCR_BINPOINT_MASK; + shift = GICH_VMCR_BINPOINT_SHIFT; + break; + case GIC_CPU_ALIAS_BINPOINT: + mask = GICH_VMCR_ALIAS_BINPOINT_MASK; + shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT; + break; + } + + if (!mmio->is_write) { + reg = (vgic_cpu->vgic_vmcr & mask) >> shift; + mmio_data_write(mmio, ~0, reg); + } else { + reg = mmio_data_read(mmio, ~0); + reg = (reg << shift) & mask; + if (reg != (vgic_cpu->vgic_vmcr & mask)) + updated = true; + vgic_cpu->vgic_vmcr &= ~mask; + vgic_cpu->vgic_vmcr |= reg; + } + return updated; } +static bool handle_mmio_abpr(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT); +} + +static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + + if (mmio->is_write) + return false; + + /* GICC_IIDR */ + reg = (PRODUCT_ID_KVM << 20) | + (GICC_ARCH_VERSION_V2 << 16) | + (IMPLEMENTER_ARM << 0); + mmio_data_write(mmio, ~0, reg); + return false; +} + +/* + * CPU Interface Register accesses - these are not accessed by the VM, but by + * user space for saving and restoring VGIC state. + */ static const struct mmio_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, @@ -1728,17 +1793,17 @@ static const struct mmio_range vgic_cpu_ranges[] = { { .base = GIC_CPU_ALIAS_BINPOINT, .len = 4, - .handle_mmio = handle_cpu_mmio_misc, + .handle_mmio = handle_mmio_abpr, }, { .base = GIC_CPU_ACTIVEPRIO, .len = 16, - .handle_mmio = handle_cpu_mmio_misc, + .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_CPU_IDENT, .len = 4, - .handle_mmio = handle_cpu_mmio_misc, + .handle_mmio = handle_cpu_mmio_ident, }, }; From c23fe6933f79809bae75b5e8c16c36d1ffc79e41 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 12 Dec 2013 16:12:22 +0000 Subject: [PATCH 125/287] arm64: KVM: Add Kconfig option for max VCPUs per-Guest Current max VCPUs per-Guest is set to 4 which is preventing us from creating a Guest (or VM) with 8 VCPUs on Host (e.g. X-Gene Storm SOC) with 8 Host CPUs. The correct value of max VCPUs per-Guest should be same as the max CPUs supported by GICv2 which is 8 but, increasing value of max VCPUs per-Guest can make things slower hence we add Kconfig option to let KVM users select appropriate max VCPUs per-Guest. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Marc Zyngier (cherry picked from commit da7814700a0c408bead58ce4714b7625ffbaade1) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 7 ++++++- arch/arm64/kvm/Kconfig | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5d85a02d1231..0a1d69751562 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -26,7 +26,12 @@ #include #include -#define KVM_MAX_VCPUS 4 +#if defined(CONFIG_KVM_ARM_MAX_VCPUS) +#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#else +#define KVM_MAX_VCPUS 0 +#endif + #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 4480ab339a00..8ba85e9ea388 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -36,6 +36,17 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. +config KVM_ARM_MAX_VCPUS + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 + help + Static number of max supported virtual CPUs per VM. + + If you choose a high number, the vcpu structures will be quite + large, so only choose a reasonable number that you expect to + actually use. + config KVM_ARM_VGIC bool depends on KVM_ARM_HOST && OF From af4604a0f9c909b60d043cebb20110804639460e Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 14 Nov 2013 15:20:08 +0000 Subject: [PATCH 126/287] arm64: KVM: Support X-Gene guest VCPU on APM X-Gene host This patch allows us to have X-Gene guest VCPU when using KVM arm64 on APM X-Gene host. We add KVM_ARM_TARGET_XGENE_POTENZA for X-Gene Potenza compatible guest VCPU and we return KVM_ARM_TARGET_XGENE_POTENZA in kvm_target_cpu() when running on X-Gene host with Potenza core. [maz: sanitized the commit log] Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Marc Zyngier (cherry picked from commit e28100bd8ed9e37b7cd4578140a1e7f95bd40835) Signed-off-by: Christoffer Dall --- arch/arm64/include/uapi/asm/kvm.h | 3 ++- arch/arm64/kvm/guest.c | 32 +++++++++++++++++----------- arch/arm64/kvm/sys_regs_generic_v8.c | 3 +++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 7c25ca8b02b3..495ab6f84a61 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -55,8 +55,9 @@ struct kvm_regs { #define KVM_ARM_TARGET_AEM_V8 0 #define KVM_ARM_TARGET_FOUNDATION_V8 1 #define KVM_ARM_TARGET_CORTEX_A57 2 +#define KVM_ARM_TARGET_XGENE_POTENZA 3 -#define KVM_ARM_NUM_TARGETS 3 +#define KVM_ARM_NUM_TARGETS 4 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3f0731e53274..08745578d54d 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -207,20 +207,26 @@ int __attribute_const__ kvm_target_cpu(void) unsigned long implementor = read_cpuid_implementor(); unsigned long part_number = read_cpuid_part_number(); - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; + switch (implementor) { + case ARM_CPU_IMP_ARM: + switch (part_number) { + case ARM_CPU_PART_AEM_V8: + return KVM_ARM_TARGET_AEM_V8; + case ARM_CPU_PART_FOUNDATION: + return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A57: + return KVM_ARM_TARGET_CORTEX_A57; + }; + break; + case ARM_CPU_IMP_APM: + switch (part_number) { + case APM_CPU_PART_POTENZA: + return KVM_ARM_TARGET_XGENE_POTENZA; + }; + break; + }; - switch (part_number) { - case ARM_CPU_PART_AEM_V8: - return KVM_ARM_TARGET_AEM_V8; - case ARM_CPU_PART_FOUNDATION: - return KVM_ARM_TARGET_FOUNDATION_V8; - case ARM_CPU_PART_CORTEX_A57: - /* Currently handled by the generic backend */ - return KVM_ARM_TARGET_CORTEX_A57; - default: - return -EINVAL; - } + return -EINVAL; } int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 4268ab9356b1..8fe6f76b0edc 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -90,6 +90,9 @@ static int __init sys_reg_genericv8_init(void) &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, + &genericv8_target_table); + return 0; } late_initcall(sys_reg_genericv8_init); From dfbd506266a97ad252336ea96e41791fdd0c0828 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 12 Dec 2013 16:12:23 +0000 Subject: [PATCH 127/287] arm64: KVM: Force undefined exception for Guest SMC intructions The SMC-based PSCI emulation for Guest is going to be very different from the in-kernel HVC-based PSCI emulation hence for now just inject undefined exception when Guest executes SMC instruction. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: marc Zyngier (cherry picked from commit e5cf9dcdbfd26cd4e1991db08755da900454efeb) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/handle_exit.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 42a0f1bddfe7..7bc41eab4c64 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -39,9 +39,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; - kvm_inject_undefined(vcpu); return 1; } From 2aad15258f0795432073b14b616a634e452432f0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 29 Dec 2013 12:12:29 -0800 Subject: [PATCH 128/287] kvm: make local functions static Running 'make namespacecheck' found lots of functions that should be declared static, since only used in one file. Signed-off-by: Stephen Hemminger Signed-off-by: Marcelo Tosatti (cherry picked from commit 7940876e1330671708186ac3386aa521ffb5c182) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 16 ---------------- virt/kvm/ioapic.c | 2 +- virt/kvm/ioapic.h | 1 - virt/kvm/kvm_main.c | 35 ++++++++++++++++++----------------- 4 files changed, 19 insertions(+), 35 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1dfc17255ee0..95625d5cb3f0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -449,8 +449,6 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); -void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, - u64 last_generation); static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) { @@ -523,7 +521,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); -void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); @@ -535,7 +532,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); -void kvm_release_pfn_dirty(pfn_t pfn); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); void kvm_set_pfn_accessed(pfn_t pfn); @@ -562,8 +558,6 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, - gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); @@ -590,8 +584,6 @@ int kvm_get_dirty_log(struct kvm *kvm, int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); -int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status); long kvm_arch_vm_ioctl(struct file *filp, @@ -639,8 +631,6 @@ void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -void kvm_free_physmem(struct kvm *kvm); - void *kvm_kvzalloc(unsigned long size); void kvm_kvfree(const void *addr); @@ -1067,12 +1057,6 @@ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } - -static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) -{ - return true; -} - #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ #endif diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 2d682977ce82..ce9ed99ad7dc 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -520,7 +520,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, return 0; } -void kvm_ioapic_reset(struct kvm_ioapic *ioapic) +static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) { int i; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 615d8c995c3c..90d43e95dcf8 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -91,7 +91,6 @@ void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); -void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e9a43b6455be..2162bd5d17d7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -95,6 +95,12 @@ static int hardware_enable_all(void); static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *new, u64 last_generation); + +static void kvm_release_pfn_dirty(pfn_t pfn); +static void mark_page_dirty_in_slot(struct kvm *kvm, + struct kvm_memory_slot *memslot, gfn_t gfn); bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); @@ -552,7 +558,7 @@ static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, free->npages = 0; } -void kvm_free_physmem(struct kvm *kvm) +static void kvm_free_physmem(struct kvm *kvm) { struct kvm_memslots *slots = kvm->memslots; struct kvm_memory_slot *memslot; @@ -674,8 +680,9 @@ static void sort_memslots(struct kvm_memslots *slots) slots->id_to_index[slots->memslots[i].id] = i; } -void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, - u64 last_generation) +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *new, + u64 last_generation) { if (new) { int id = new->id; @@ -923,8 +930,8 @@ int kvm_set_memory_region(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_set_memory_region); -int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) +static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; @@ -1045,7 +1052,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, } unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, - gfn_t gfn) + gfn_t gfn) { return gfn_to_hva_many(slot, gfn, NULL); } @@ -1385,18 +1392,11 @@ void kvm_release_page_dirty(struct page *page) } EXPORT_SYMBOL_GPL(kvm_release_page_dirty); -void kvm_release_pfn_dirty(pfn_t pfn) +static void kvm_release_pfn_dirty(pfn_t pfn) { kvm_set_pfn_dirty(pfn); kvm_release_pfn_clean(pfn); } -EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); - -void kvm_set_page_dirty(struct page *page) -{ - kvm_set_pfn_dirty(page_to_pfn(page)); -} -EXPORT_SYMBOL_GPL(kvm_set_page_dirty); void kvm_set_pfn_dirty(pfn_t pfn) { @@ -1638,8 +1638,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) } EXPORT_SYMBOL_GPL(kvm_clear_guest); -void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, - gfn_t gfn) +static void mark_page_dirty_in_slot(struct kvm *kvm, + struct kvm_memory_slot *memslot, + gfn_t gfn) { if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; @@ -1753,7 +1754,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); * locking does not harm. It may result in trying to yield to same VCPU, fail * and continue with next VCPU and so on. */ -bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) +static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) { bool eligible; From c9dad332f97620c9221a724e20d2bc4db51bb7a0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Dec 2013 16:56:06 +0000 Subject: [PATCH 129/287] arm/arm64: KVM: relax the requirements of VMA alignment for THP The THP code in KVM/ARM is a bit restrictive in not allowing a THP to be used if the VMA is not 2MB aligned. Actually, it is not so much the VMA that matters, but the associated memslot: A process can perfectly mmap a region with no particular alignment restriction, and then pass a 2MB aligned address to KVM. In this case, KVM will only use this 2MB aligned region, and will ignore the range between vma->vm_start and memslot->userspace_addr. It can also choose to place this memslot at whatever alignment it wants in the IPA space. In the end, what matters is the relative alignment of the user space and IPA mappings with respect to a 2M page. They absolutely must be the same if you want to use THP. Cc: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 136d737fd20102f1be9b02356590fd55e3a40d0e) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 659db0ed1370..7789857d1470 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -667,14 +667,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { /* - * Pages belonging to VMAs not aligned to the PMD mapping - * granularity cannot be mapped using block descriptors even - * if the pages belong to a THP for the process, because the - * stage-2 block descriptor will cover more than a single THP - * and we loose atomicity for unmapping, updates, and splits - * of the THP or other pages in the stage-2 block range. + * Pages belonging to memslots that don't have the same + * alignment for userspace and IPA cannot be mapped using + * block descriptors even if the pages belong to a THP for + * the process, because the stage-2 block descriptor will + * cover more than a single THP and we loose atomicity for + * unmapping, updates, and splits of the THP or other pages + * in the stage-2 block range. */ - if (vma->vm_start & ~PMD_MASK) + if ((memslot->userspace_addr & ~PMD_MASK) != + ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK)) force_pte = true; } up_read(¤t->mm->mmap_sem); From 3b254a9515ba76de3746f4c28041845ba27b3745 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 7 Jan 2014 13:45:15 +0530 Subject: [PATCH 130/287] KVM: ARM: Remove duplicate include trace.h was included twice. Remove duplicate inclusion. Signed-off-by: Sachin Kamat Signed-off-by: Christoffer Dall (cherry picked from commit 61466710de078c697106fa5b70ec7afc9feab520) Signed-off-by: Christoffer Dall --- arch/arm/kvm/handle_exit.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index a92079011a83..0de91fc6de0f 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -26,8 +26,6 @@ #include "trace.h" -#include "trace.h" - typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) From 328c366d43fda9d85c758c455b5f522747fa3d7e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 9 Jan 2014 18:43:16 -0600 Subject: [PATCH 131/287] kvm: Provide kvm_vcpu_eligible_for_directed_yield() stub Commit 7940876e1330671708186ac3386aa521ffb5c182 ("kvm: make local functions static") broke KVM PPC builds due to removing (rather than moving) the stub version of kvm_vcpu_eligible_for_directed_yield(). This patch reintroduces it. Signed-off-by: Scott Wood Cc: Stephen Hemminger Cc: Alexander Graf [Move the #ifdef inside the function. - Paolo] Signed-off-by: Paolo Bonzini (cherry picked from commit 4a55dd7273c95b4a19fbcf0ae1bbd1cfd09dfc36) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2162bd5d17d7..2d57b23683a4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1731,7 +1731,6 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) } EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); -#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT /* * Helper that checks whether a VCPU is eligible for directed yield. * Most eligible candidate to yield is decided by following heuristics: @@ -1756,6 +1755,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); */ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) { +#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT bool eligible; eligible = !vcpu->spin_loop.in_spin_loop || @@ -1766,8 +1766,10 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); return eligible; -} +#else + return true; #endif +} void kvm_vcpu_on_spin(struct kvm_vcpu *me) { From 031cbab9d6d2a6beca9e19472a40f5e22ecfbe07 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Jan 2014 16:16:39 +0300 Subject: [PATCH 132/287] KVM: return an error code in kvm_vm_ioctl_register_coalesced_mmio() If kvm_io_bus_register_dev() fails then it returns success but it should return an error code. I also did a little cleanup like removing an impossible NULL test. Cc: stable@vger.kernel.org Fixes: 2b3c246a682c ('KVM: Make coalesced mmio use a device per zone') Signed-off-by: Dan Carpenter Signed-off-by: Paolo Bonzini (cherry picked from commit aac5c4226e7136c331ed384c25d5560204da10a0) Signed-off-by: Christoffer Dall --- virt/kvm/coalesced_mmio.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 88b2fe3ddf42..00d86427af0f 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -154,17 +154,13 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, list_add_tail(&dev->list, &kvm->coalesced_zones); mutex_unlock(&kvm->slots_lock); - return ret; + return 0; out_free_dev: mutex_unlock(&kvm->slots_lock); - kfree(dev); - if (dev == NULL) - return -ENXIO; - - return 0; + return ret; } int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, From e9aa8f627c7c504c3880acdfa57849bd3d75dc83 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 6 Jun 2013 15:32:37 +0200 Subject: [PATCH 133/287] KVM: async_pf: Provide additional direct page notification By setting a Kconfig option, the architecture can control when guest notifications will be presented by the apf backend. There is the default batch mechanism, working as before, where the vcpu thread should pull in this information. Opposite to this, there is now the direct mechanism, that will push the information to the guest. This way s390 can use an already existing architecture interface. Still the vcpu thread should call check_completion to cleanup leftovers. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger (cherry picked from commit e0ead41a6dac09f86675ce07a66e4b253a9b7bd5) Signed-off-by: Christoffer Dall --- arch/x86/kvm/mmu.c | 2 +- include/linux/kvm_host.h | 2 +- virt/kvm/Kconfig | 4 ++++ virt/kvm/async_pf.c | 20 ++++++++++++++++++-- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3c1877bbfe6a..f47d2e11108e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3232,7 +3232,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.direct_map = vcpu->arch.mmu.direct_map; arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); - return kvm_setup_async_pf(vcpu, gva, gfn, &arch); + return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch); } static bool can_do_async_pf(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 95625d5cb3f0..f6801d10e04c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -180,7 +180,7 @@ struct kvm_async_pf { void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, struct kvm_arch_async_pf *arch); int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index fbe1a48bd629..13f2d19793e3 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -22,6 +22,10 @@ config KVM_MMIO config KVM_ASYNC_PF bool +# Toggle to switch between direct notification and batch job +config KVM_ASYNC_PF_SYNC + bool + config HAVE_KVM_MSI bool diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8631d9c14320..00980ab02c45 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -28,6 +28,21 @@ #include "async_pf.h" #include +static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} +static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifndef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} + static struct kmem_cache *async_pf_cache; int kvm_async_pf_init(void) @@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work) down_read(&mm->mmap_sem); get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); + kvm_async_page_present_sync(vcpu, apf); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); @@ -138,7 +154,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) } } -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; @@ -159,7 +175,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; - work->addr = gfn_to_hva(vcpu->kvm, gfn); + work->addr = hva; work->arch = *arch; work->mm = current->mm; atomic_inc(&work->mm->mm_count); From 65d762a03f29316956725f348546794de0160fcc Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Tue, 3 Sep 2013 12:31:16 +0200 Subject: [PATCH 134/287] KVM: async_pf: Allow to wait for outstanding work On s390 we are not able to cancel work. Instead we will flush the work and wait for completion. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger (cherry picked from commit 9f2ceda49c6b8827c795731c204f6c2587886e2c) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 00980ab02c45..889aad022014 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -113,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); list_del(&work->queue); + +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + flush_work(&work->work); +#else if (cancel_work_sync(&work->work)) { mmdrop(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } +#endif } spin_lock(&vcpu->async_pf.lock); From c76f5b54ec88ab03043cdd2ab2c148daa0bccd7c Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 31 Jan 2014 14:32:46 +0100 Subject: [PATCH 135/287] KVM: async_pf: Add missing call for async page present Commit KVM: async_pf: Provide additional direct page notification missed the call from kvm_check_async_pf_completion to the new introduced function. Reported-by: Paolo Bonzini Signed-off-by: Dominik Dingel Signed-off-by: Paolo Bonzini (cherry picked from commit 1179ba539541347d5427cde8bcfdaa5ead14f3aa) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 889aad022014..10df100c4514 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -151,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->async_pf.lock); kvm_arch_async_page_ready(vcpu, work); - kvm_arch_async_page_present(vcpu, work); + kvm_async_page_present_async(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; From 8cf654c3d7b883a4ffc64e6301b4af5a9f3b6b93 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Feb 2014 10:24:12 +0000 Subject: [PATCH 136/287] arm64: fix typo: s/SERRROR/SERROR/ Somehow SERROR has acquired an additional 'R' in a couple of headers. This patch removes them before they spread further. As neither instance is in use yet, no other sites need to be fixed up. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit bfb67a5606376bb32cb6f93dc05cda2e8c2038a5) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index c98ef4771c73..0eb398655378 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -231,7 +231,7 @@ #define ESR_EL2_EC_SP_ALIGN (0x26) #define ESR_EL2_EC_FP_EXC32 (0x28) #define ESR_EL2_EC_FP_EXC64 (0x2C) -#define ESR_EL2_EC_SERRROR (0x2F) +#define ESR_EL2_EC_SERROR (0x2F) #define ESR_EL2_EC_BREAKPT (0x30) #define ESR_EL2_EC_BREAKPT_HYP (0x31) #define ESR_EL2_EC_SOFTSTP (0x32) From 4635cac7cb81e457a59eb1cbea32d828f6c25a05 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:51:57 +0100 Subject: [PATCH 137/287] asmlinkage, kvm: Make kvm_rebooting visible kvm_rebooting is referenced from assembler code, thus needs to be visible. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-1-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin (cherry picked from commit 52480137d82062bb8d0fb778cb9934667958e367) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2d57b23683a4..88660cbe70a4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,7 +102,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); -bool kvm_rebooting; +__visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; From 312c49473d6543ad3888172fc63992e5a326c622 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 2 Feb 2014 13:41:02 -0800 Subject: [PATCH 138/287] arm64: KVM: Add VGIC device control for arm64 This fixes the build breakage introduced by c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f and adds support for the device control API and save/restore of the VGIC state for ARMv8. The defines were simply missing from the arm64 header files and uaccess.h must be implicitly imported from somewhere else on arm. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini (cherry picked from commit 2a2f3e269c75edf916de5967079069aeb6a601cb) Signed-off-by: Christoffer Dall --- arch/arm64/include/uapi/asm/kvm.h | 9 +++++++++ virt/kvm/arm/vgic.c | 1 + 2 files changed, 10 insertions(+) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 495ab6f84a61..eaf54a30bedc 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -148,6 +148,15 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +/* Device Control API: ARM VGIC */ +#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 +#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 +#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 +#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 +#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 +#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 #define KVM_ARM_IRQ_TYPE_MASK 0xff diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index be456ce264d0..8ca405cd7c1a 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -24,6 +24,7 @@ #include #include #include +#include #include From 2aaea8b6ef1ba1dd5de6646abf86a4bb2a3a0642 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Wed, 26 Feb 2014 16:14:18 +0100 Subject: [PATCH 139/287] KVM: add kvm_arch_vcpu_runnable() test to kvm_vcpu_on_spin() loop Use the arch specific function kvm_arch_vcpu_runnable() to add a further criterium to identify a suitable vcpu to yield to during undirected yield processing. Signed-off-by: Michael Mueller Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 98f4a14676127397c54cab7d6119537ed4d113a2) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 88660cbe70a4..9f5fab0a4fda 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1800,7 +1800,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (vcpu == me) continue; - if (waitqueue_active(&vcpu->wq)) + if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; From 24e14ecb92fafddb1f3c9cce1b4aed3596f86859 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 26 Feb 2014 18:47:36 +0000 Subject: [PATCH 140/287] arm/arm64: KVM: detect CPU reset on CPU_PM_EXIT Commit 1fcf7ce0c602 (arm: kvm: implement CPU PM notifier) added support for CPU power-management, using a cpu_notifier to re-init KVM on a CPU that entered CPU idle. The code assumed that a CPU entering idle would actually be powered off, loosing its state entierely, and would then need to be reinitialized. It turns out that this is not always the case, and some HW performs CPU PM without actually killing the core. In this case, we try to reinitialize KVM while it is still live. It ends up badly, as reported by Andre Przywara (using a Calxeda Midway): [ 3.663897] Kernel panic - not syncing: unexpected prefetch abort in Hyp mode at: 0x685760 [ 3.663897] unexpected data abort in Hyp mode at: 0xc067d150 [ 3.663897] unexpected HVC/SVC trap in Hyp mode at: 0xc0901dd0 The trick here is to detect if we've been through a full re-init or not by looking at HVBAR (VBAR_EL2 on arm64). This involves implementing the backend for __hyp_get_vectors in the main KVM HYP code (rather small), and checking the return value against the default one when the CPU notifier is called on CPU_PM_EXIT. Reported-by: Andre Przywara Tested-by: Andre Przywara Cc: Lorenzo Pieralisi Cc: Rob Herring Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini (cherry picked from commit b20c9f29c5c25921c6ad18b50d4b61e6d181c3cc) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 3 ++- arch/arm/kvm/interrupts.S | 11 ++++++++++- arch/arm64/kvm/hyp.S | 27 +++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 169c718ddd90..9804406ff37e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -878,7 +878,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { - if (cmd == CPU_PM_EXIT) { + if (cmd == CPU_PM_EXIT && + __hyp_get_vectors() == hyp_default_vectors) { cpu_init_hyp_mode(NULL); return NOTIFY_OK; } diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index ddc15539bad2..0d68d4073068 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -220,6 +220,10 @@ after_vfp_restore: * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are * passed in r0 and r1. * + * A function pointer with a value of 0xffffffff has a special meaning, + * and is used to implement __hyp_get_vectors in the same way as in + * arch/arm/kernel/hyp_stub.S. + * * The calling convention follows the standard AAPCS: * r0 - r3: caller save * r12: caller save @@ -363,6 +367,11 @@ hyp_hvc: host_switch_to_hyp: pop {r0, r1, r2} + /* Check for __hyp_get_vectors */ + cmp r0, #-1 + mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR + beq 1f + push {lr} mrs lr, SPSR push {lr} @@ -378,7 +387,7 @@ THUMB( orr lr, #1) pop {lr} msr SPSR_csxf, lr pop {lr} - eret +1: eret guest_trap: load_vcpu @ Load VCPU pointer to r0 diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 3b47c36e10ff..2c56012cb2d2 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -694,6 +694,24 @@ __hyp_panic_str: .align 2 +/* + * u64 kvm_call_hyp(void *hypfn, ...); + * + * This is not really a variadic function in the classic C-way and care must + * be taken when calling this to ensure parameters are passed in registers + * only, since the stack will change between the caller and the callee. + * + * Call the function with the first argument containing a pointer to the + * function you wish to call in Hyp mode, and subsequent arguments will be + * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the + * function pointer can be passed). The function being called must be mapped + * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are + * passed in r0 and r1. + * + * A function pointer with a value of 0 has a special meaning, and is + * used to implement __hyp_get_vectors in the same way as in + * arch/arm64/kernel/hyp_stub.S. + */ ENTRY(kvm_call_hyp) hvc #0 ret @@ -737,7 +755,12 @@ el1_sync: // Guest trapped into EL2 pop x2, x3 pop x0, x1 - push lr, xzr + /* Check for __hyp_get_vectors */ + cbnz x0, 1f + mrs x0, vbar_el2 + b 2f + +1: push lr, xzr /* * Compute the function address in EL2, and shuffle the parameters. @@ -750,7 +773,7 @@ el1_sync: // Guest trapped into EL2 blr lr pop lr, xzr - eret +2: eret el1_trap: /* From fca920166b7c07b3932b2a8a24916e5002a57900 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Jan 2014 19:13:10 +0000 Subject: [PATCH 141/287] arm64: KVM: force cache clean on page fault when caches are off In order for the guest with caches off to observe data written contained in a given page, we need to make sure that page is committed to memory, and not just hanging in the cache (as guest accesses are completely bypassing the cache until it decides to enable it). For this purpose, hook into the coherent_icache_guest_page function and flush the region if the guest SCTLR_EL1 register doesn't show the MMU and caches as being enabled. The function also get renamed to coherent_cache_guest_page. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit 2d58b733c87689d3d5144e4ac94ea861cc729145) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 4 ++-- arch/arm/kvm/mmu.c | 4 ++-- arch/arm64/include/asm/kvm_mmu.h | 16 ++++++++++++---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 2d122adcdb22..6d0f3d3023b7 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -116,8 +116,8 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, - unsigned long size) +static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, + unsigned long size) { /* * If we are going to insert an instruction page and the icache is diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 7789857d1470..fc71a8df0e13 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -715,7 +715,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); + coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, PAGE_S2); @@ -723,7 +723,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } - coherent_icache_guest_page(kvm, hva, PAGE_SIZE); + coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7f1f9408ff66..6eaf69b5e42c 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -106,7 +106,6 @@ static inline bool kvm_is_write_fault(unsigned long esr) return true; } -static inline void kvm_clean_dcache_area(void *addr, size_t size) {} static inline void kvm_clean_pgd(pgd_t *pgd) {} static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} static inline void kvm_clean_pte(pte_t *pte) {} @@ -124,9 +123,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, - unsigned long size) +#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) + +static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { + return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; +} + +static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, + unsigned long size) +{ + if (!vcpu_has_cache_enabled(vcpu)) + kvm_flush_dcache_to_poc((void *)hva, size); + if (!icache_is_aliasing()) { /* PIPT */ flush_icache_range(hva, hva + size); } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ @@ -135,7 +144,6 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } } -#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) #endif /* __ASSEMBLY__ */ From 96d03922c8cb37a48451b02f95e80cd749266bbe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Jan 2014 10:55:17 +0000 Subject: [PATCH 142/287] arm64: KVM: allows discrimination of AArch32 sysreg access The current handling of AArch32 trapping is slightly less than perfect, as it is not possible (from a handler point of view) to distinguish it from an AArch64 access, nor to tell a 32bit from a 64bit access either. Fix this by introducing two additional flags: - is_aarch32: true if the access was made in AArch32 mode - is_32bit: true if is_aarch32 == true and a MCR/MRC instruction was used to perform the access (as opposed to MCRR/MRRC). This allows a handler to cover all the possible conditions in which a system register gets trapped. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall (cherry picked from commit 2072d29c46b73e39b3c6c56c6027af77086f45fd) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 6 ++++++ arch/arm64/kvm/sys_regs.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 02e9d09e1d80..bf03e0fadf1f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -437,6 +437,8 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) u32 hsr = kvm_vcpu_get_hsr(vcpu); int Rt2 = (hsr >> 10) & 0xf; + params.is_aarch32 = true; + params.is_32bit = false; params.CRm = (hsr >> 1) & 0xf; params.Rt = (hsr >> 5) & 0xf; params.is_write = ((hsr & 1) == 0); @@ -480,6 +482,8 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); + params.is_aarch32 = true; + params.is_32bit = true; params.CRm = (hsr >> 1) & 0xf; params.Rt = (hsr >> 5) & 0xf; params.is_write = ((hsr & 1) == 0); @@ -549,6 +553,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) struct sys_reg_params params; unsigned long esr = kvm_vcpu_get_hsr(vcpu); + params.is_aarch32 = false; + params.is_32bit = false; params.Op0 = (esr >> 20) & 3; params.Op1 = (esr >> 14) & 0x7; params.CRn = (esr >> 10) & 0xf; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index d50d3722998e..d411e251412c 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -30,6 +30,8 @@ struct sys_reg_params { u8 Op2; u8 Rt; bool is_write; + bool is_aarch32; + bool is_32bit; /* Only valid if is_aarch32 is true */ }; struct sys_reg_desc { From 8d8d8cd59448d8f2f9cba73d6cd5558ebe7d5a43 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Jan 2014 18:00:55 +0000 Subject: [PATCH 143/287] arm64: KVM: trap VM system registers until MMU and caches are ON In order to be able to detect the point where the guest enables its MMU and caches, trap all the VM related system registers. Once we see the guest enabling both the MMU and the caches, we can go back to a saner mode of operation, which is to leave these registers in complete control of the guest. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit 4d44923b17bff283c002ed961373848284aaff1b) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 3 +- arch/arm64/include/asm/kvm_asm.h | 3 +- arch/arm64/kvm/sys_regs.c | 90 +++++++++++++++++++++++++++----- 3 files changed, 82 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 0eb398655378..00fbaa75dc7b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -62,6 +62,7 @@ * RW: 64bit by default, can be overriden for 32bit VMs * TAC: Trap ACTLR * TSC: Trap SMC + * TVM: Trap VM ops (until M+C set in SCTLR_EL1) * TSW: Trap cache operations by set/way * TWE: Trap WFE * TWI: Trap WFI @@ -74,7 +75,7 @@ * SWIO: Turn set/way invalidates into set/way clean+invalidate */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ - HCR_BSU_IS | HCR_FB | HCR_TAC | \ + HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ HCR_AMO | HCR_IMO | HCR_FMO | \ HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index b25763bc0ec4..9fcd54b1e16d 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -79,7 +79,8 @@ #define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ #define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ #define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ -#define c10_AMAIR (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ #define NR_CP15_REGS (NR_SYS_REGS * 2) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index bf03e0fadf1f..2097e5ecba42 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -120,6 +120,46 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +/* + * Generic accessor for VM registers. Only called as long as HCR_TVM + * is set. + */ +static bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + unsigned long val; + + BUG_ON(!p->is_write); + + val = *vcpu_reg(vcpu, p->Rt); + if (!p->is_aarch32) { + vcpu_sys_reg(vcpu, r->reg) = val; + } else { + vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL; + if (!p->is_32bit) + vcpu_cp15(vcpu, r->reg + 1) = val >> 32; + } + return true; +} + +/* + * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the + * guest enables the MMU, we stop trapping the VM sys_regs and leave + * it in complete control of the caches. + */ +static bool access_sctlr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + access_vm_reg(vcpu, p, r); + + if (vcpu_has_cache_enabled(vcpu)) /* MMU+Caches enabled? */ + vcpu->arch.hcr_el2 &= ~HCR_TVM; + + return true; +} + /* * We could trap ID_DFR0 and tell the guest we don't support performance * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was @@ -185,32 +225,32 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_mpidr, MPIDR_EL1 }, /* SCTLR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), - NULL, reset_val, SCTLR_EL1, 0x00C50078 }, + access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 }, /* CPACR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), NULL, reset_val, CPACR_EL1, 0 }, /* TTBR0_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, TTBR0_EL1 }, + access_vm_reg, reset_unknown, TTBR0_EL1 }, /* TTBR1_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), - NULL, reset_unknown, TTBR1_EL1 }, + access_vm_reg, reset_unknown, TTBR1_EL1 }, /* TCR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), - NULL, reset_val, TCR_EL1, 0 }, + access_vm_reg, reset_val, TCR_EL1, 0 }, /* AFSR0_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), - NULL, reset_unknown, AFSR0_EL1 }, + access_vm_reg, reset_unknown, AFSR0_EL1 }, /* AFSR1_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), - NULL, reset_unknown, AFSR1_EL1 }, + access_vm_reg, reset_unknown, AFSR1_EL1 }, /* ESR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), - NULL, reset_unknown, ESR_EL1 }, + access_vm_reg, reset_unknown, ESR_EL1 }, /* FAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, FAR_EL1 }, + access_vm_reg, reset_unknown, FAR_EL1 }, /* PAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000), NULL, reset_unknown, PAR_EL1 }, @@ -224,17 +264,17 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* MAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), - NULL, reset_unknown, MAIR_EL1 }, + access_vm_reg, reset_unknown, MAIR_EL1 }, /* AMAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), - NULL, reset_amair_el1, AMAIR_EL1 }, + access_vm_reg, reset_amair_el1, AMAIR_EL1 }, /* VBAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), NULL, reset_val, VBAR_EL1, 0 }, /* CONTEXTIDR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), - NULL, reset_val, CONTEXTIDR_EL1, 0 }, + access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, /* TPIDR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), NULL, reset_unknown, TPIDR_EL1 }, @@ -305,14 +345,32 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_val, FPEXC32_EL2, 0x70 }, }; -/* Trapped cp15 registers */ +/* + * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, + * depending on the way they are accessed (as a 32bit or a 64bit + * register). + */ static const struct sys_reg_desc cp15_regs[] = { + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, + { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, + { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR }, + { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR }, + { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR }, + { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR }, + /* * DC{C,I,CI}SW operations: */ { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw }, { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, @@ -326,6 +384,14 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, + + { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, + { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, + { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, + { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, + { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, + + { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, }; /* Target specific emulation tables */ From 086ffbacac938e5d6208d286ffdf01867753a071 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Feb 2014 14:29:03 +0000 Subject: [PATCH 144/287] ARM: KVM: introduce kvm_p*d_addr_end The use of p*d_addr_end with stage-2 translation is slightly dodgy, as the IPA is 40bits, while all the p*d_addr_end helpers are taking an unsigned long (arm64 is fine with that as unligned long is 64bit). The fix is to introduce 64bit clean versions of the same helpers, and use them in the stage-2 page table code. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit a3c8bd31af260a17d626514f636849ee1cd1f63e) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 13 +++++++++++++ arch/arm/kvm/mmu.c | 10 +++++----- arch/arm64/include/asm/kvm_mmu.h | 4 ++++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 6d0f3d3023b7..891afe78311a 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -114,6 +114,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) pmd_val(*pmd) |= L_PMD_S2_RDWR; } +/* Open coded p*d_addr_end that can deal with 64bit addresses */ +#define kvm_pgd_addr_end(addr, end) \ +({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) + +#define kvm_pud_addr_end(addr,end) (end) + +#define kvm_pmd_addr_end(addr, end) \ +({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) + struct kvm; static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index fc71a8df0e13..c1c08b240f35 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -145,7 +145,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { - addr = pud_addr_end(addr, end); + addr = kvm_pud_addr_end(addr, end); continue; } @@ -155,13 +155,13 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, * move on. */ clear_pud_entry(kvm, pud, addr); - addr = pud_addr_end(addr, end); + addr = kvm_pud_addr_end(addr, end); continue; } pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { - addr = pmd_addr_end(addr, end); + addr = kvm_pmd_addr_end(addr, end); continue; } @@ -176,10 +176,10 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, */ if (kvm_pmd_huge(*pmd) || page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); - next = pmd_addr_end(addr, end); + next = kvm_pmd_addr_end(addr, end); if (page_empty(pmd) && !page_empty(pud)) { clear_pud_entry(kvm, pud, addr); - next = pud_addr_end(addr, end); + next = kvm_pud_addr_end(addr, end); } } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 6eaf69b5e42c..00c0cc8b8045 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -121,6 +121,10 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) pmd_val(*pmd) |= PMD_S2_RDWR; } +#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) +#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) +#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) From 5ac10a803ba46a03fccfbd078b60fa0cd83422c0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 15 Jan 2014 12:50:23 +0000 Subject: [PATCH 145/287] arm64: KVM: flush VM pages before letting the guest enable caches When the guest runs with caches disabled (like in an early boot sequence, for example), all the writes are diectly going to RAM, bypassing the caches altogether. Once the MMU and caches are enabled, whatever sits in the cache becomes suddenly visible, which isn't what the guest expects. A way to avoid this potential disaster is to invalidate the cache when the MMU is being turned on. For this, we hook into the SCTLR_EL1 trapping code, and scan the stage-2 page tables, invalidating the pages/sections that have already been mapped in. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit 9d218a1fcf4c6b759d442ef702842fae92e1ea61) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 2 + arch/arm/kvm/mmu.c | 93 ++++++++++++++++++++++++++++++++ arch/arm64/include/asm/kvm_mmu.h | 2 + arch/arm64/kvm/sys_regs.c | 5 +- 4 files changed, 101 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 891afe78311a..eb85b81eea6f 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -155,6 +155,8 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) +void stage2_flush_vm(struct kvm *kvm); + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index c1c08b240f35..d7e998c6a08f 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -187,6 +187,99 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, } } +static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); + } + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = pmd_offset(pud, addr); + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); + } else { + stage2_flush_ptes(kvm, pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); +} + +static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + if (pud_huge(*pud)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); + } else { + stage2_flush_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); +} + +static void stage2_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = addr + PAGE_SIZE * memslot->npages; + phys_addr_t next; + pgd_t *pgd; + + pgd = kvm->arch.pgd + pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + stage2_flush_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 + * @kvm: The struct kvm pointer + * + * Go through the stage 2 page tables and invalidate any cache lines + * backing memory already mapped to the VM. + */ +void stage2_flush_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_flush_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + /** * free_boot_hyp_pgd - free HYP boot page tables * diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 00c0cc8b8045..7d29847a893b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -150,5 +150,7 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) +void stage2_flush_vm(struct kvm *kvm); + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2097e5ecba42..03244582bc55 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -154,8 +155,10 @@ static bool access_sctlr(struct kvm_vcpu *vcpu, { access_vm_reg(vcpu, p, r); - if (vcpu_has_cache_enabled(vcpu)) /* MMU+Caches enabled? */ + if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ vcpu->arch.hcr_el2 &= ~HCR_TVM; + stage2_flush_vm(vcpu->kvm); + } return true; } From ebdea56c8b2acd4fd7a421d1d670e6bf10f37e60 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Jan 2014 19:13:10 +0000 Subject: [PATCH 146/287] ARM: KVM: force cache clean on page fault when caches are off In order for a guest with caches disabled to observe data written contained in a given page, we need to make sure that page is committed to memory, and not just hanging in the cache (as guest accesses are completely bypassing the cache until it decides to enable it). For this purpose, hook into the coherent_cache_guest_page function and flush the region if the guest SCTLR register doesn't show the MMU and caches as being enabled. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Reviewed-by: Catalin Marinas (cherry picked from commit 159793001d7d85af17855630c94f0a176848e16b) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index eb85b81eea6f..5c7aa3c1519f 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -129,9 +129,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) struct kvm; +#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) + +static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; +} + static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, unsigned long size) { + if (!vcpu_has_cache_enabled(vcpu)) + kvm_flush_dcache_to_poc((void *)hva, size); + /* * If we are going to insert an instruction page and the icache is * either VIPT or PIPT, there is a potential problem where the host @@ -152,7 +162,6 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, } } -#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) void stage2_flush_vm(struct kvm *kvm); From d773d11dd453f43793082c5206799c818a4a4b55 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Jan 2014 18:56:26 +0000 Subject: [PATCH 147/287] ARM: KVM: fix handling of trapped 64bit coprocessor accesses Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling) changed the way we match the 64bit coprocessor access from user space, but didn't update the trap handler for the same set of registers. The effect is that a trapped 64bit access is never matched, leading to a fault being injected into the guest. This went unnoticed as we didn't really trap any 64bit register so far. Placing the CRm field of the access into the CRn field of the matching structure fixes the problem. Also update the debug feature to emit the expected string in case of failing match. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas (cherry picked from commit 46c214dd595381c880794413facadfa07fba5c95) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 4 ++-- arch/arm/kvm/coproc.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 78c0885d6501..126c90d18387 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -443,7 +443,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct coproc_params params; - params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; + params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf; params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0); params.is_64bit = true; @@ -451,7 +451,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf; params.Op2 = 0; params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; - params.CRn = 0; + params.CRm = 0; return emulate_cp15(vcpu, ¶ms); } diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 0461d5c8d3de..c5ad7ff40c96 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p) { /* Look, we even formatted it for you to paste into the table! */ if (p->is_64bit) { - kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n", - p->CRm, p->Op1, p->is_write ? "write" : "read"); + kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n", + p->CRn, p->Op1, p->is_write ? "write" : "read"); } else { kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32," " func_%s },\n", From ba3c65fc739a98f9eae4e8997d8a6e66d130b0cf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Jan 2014 18:56:26 +0000 Subject: [PATCH 148/287] ARM: KVM: fix ordering of 64bit coprocessor accesses Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling) added an ordering dependency for the 64bit registers. The order described is: CRn, CRm, Op1, Op2, 64bit-first. Unfortunately, the implementation is: CRn, 64bit-first, CRm... Move the 64bit test to be last in order to match the documentation. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas (cherry picked from commit 547f781378a22b65c2ab468f235c23001b5924da) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index c5ad7ff40c96..8dda870e84f9 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1, return -1; if (i1->CRn != i2->CRn) return i1->CRn - i2->CRn; - if (i1->is_64 != i2->is_64) - return i2->is_64 - i1->is_64; if (i1->CRm != i2->CRm) return i1->CRm - i2->CRm; if (i1->Op1 != i2->Op1) return i1->Op1 - i2->Op1; - return i1->Op2 - i2->Op2; + if (i1->Op2 != i2->Op2) + return i1->Op2 - i2->Op2; + return i2->is_64 - i1->is_64; } From ff860e2b7761196f6379888acd6668ecddb703df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 22 Jan 2014 09:43:38 +0000 Subject: [PATCH 149/287] ARM: KVM: introduce per-vcpu HYP Configuration Register So far, KVM/ARM used a fixed HCR configuration per guest, except for the VI/VF/VA bits to control the interrupt in absence of VGIC. With the upcoming need to dynamically reconfigure trapping, it becomes necessary to allow the HCR to be changed on a per-vcpu basis. The fix here is to mimic what KVM/arm64 already does: a per vcpu HCR field, initialized at setup time. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas (cherry picked from commit ac30a11e8e92a03dbe236b285c5cbae0bf563141) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 1 - arch/arm/include/asm/kvm_host.h | 9 ++++++--- arch/arm/kernel/asm-offsets.c | 1 + arch/arm/kvm/guest.c | 1 + arch/arm/kvm/interrupts_head.S | 9 +++------ 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 1d3153c7eb41..a843e74a384c 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -69,7 +69,6 @@ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ HCR_TWE | HCR_SWIO | HCR_TIDCP) -#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) /* System Control Register (SCTLR) bits */ #define SCTLR_TE (1 << 30) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 098f7dd6d564..09af14999c9b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -101,6 +101,12 @@ struct kvm_vcpu_arch { /* The CPU type we expose to the VM */ u32 midr; + /* HYP trapping configuration */ + u32 hcr; + + /* Interrupt related fields */ + u32 irq_lines; /* IRQ and FIQ levels */ + /* Exception Information */ struct kvm_vcpu_fault_info fault; @@ -128,9 +134,6 @@ struct kvm_vcpu_arch { /* IO related fields */ struct kvm_decode mmio_decode; - /* Interrupt related fields */ - u32 irq_lines; /* IRQ and FIQ levels */ - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index ee68cce6b48e..aa2acc1dd986 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -168,6 +168,7 @@ int main(void) DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); + DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr)); DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 2786eae10c0d..b23a59c1c522 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + vcpu->arch.hcr = HCR_GUEST_MASK; return 0; } diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 6f18695a09cb..a37270d7d4d6 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -597,17 +597,14 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */ .macro configure_hyp_role operation - mrc p15, 4, r2, c1, c1, 0 @ HCR - bic r2, r2, #HCR_VIRT_EXCP_MASK - ldr r3, =HCR_GUEST_MASK .if \operation == vmentry - orr r2, r2, r3 + ldr r2, [vcpu, #VCPU_HCR] ldr r3, [vcpu, #VCPU_IRQ_LINES] orr r2, r2, r3 .else - bic r2, r2, r3 + mov r2, #0 .endif - mcr p15, 4, r2, c1, c1, 0 + mcr p15, 4, r2, c1, c1, 0 @ HCR .endm .macro load_vcpu From 6076945ed741ff33724410d0bb9dfa535b475e79 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 22 Jan 2014 10:20:09 +0000 Subject: [PATCH 150/287] ARM: KVM: add world-switch for AMAIR{0,1} HCR.TVM traps (among other things) accesses to AMAIR0 and AMAIR1. In order to minimise the amount of surprise a guest could generate by trying to access these registers with caches off, add them to the list of registers we switch/handle. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas (cherry picked from commit af20814ee927ed888288d98917a766b4179c4fe0) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_asm.h | 4 +++- arch/arm/kvm/coproc.c | 6 ++++++ arch/arm/kvm/interrupts_head.S | 12 ++++++++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 661da11f76f4..53b3c4a50d5c 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -48,7 +48,9 @@ #define c13_TID_URO 26 /* Thread ID, User R/O */ #define c13_TID_PRIV 27 /* Thread ID, Privileged */ #define c14_CNTKCTL 28 /* Timer Control Register (PL1) */ -#define NR_CP15_REGS 29 /* Number of regs (incl. invalid) */ +#define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */ +#define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */ +#define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */ #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 126c90d18387..a5a54a48d51b 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -328,6 +328,12 @@ static const struct coproc_reg cp15_regs[] = { { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32, NULL, reset_unknown, c10_NMRR}, + /* AMAIR0/AMAIR1: swapped by interrupt.S. */ + { CRn(10), CRm( 3), Op1( 0), Op2( 0), is32, + access_vm_reg, reset_unknown, c10_AMAIR0}, + { CRn(10), CRm( 3), Op1( 0), Op2( 1), is32, + access_vm_reg, reset_unknown, c10_AMAIR1}, + /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c12_VBAR, 0x00000000 }, diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index a37270d7d4d6..76af93025574 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -303,13 +303,17 @@ vcpu .req r0 @ vcpu pointer always in r0 mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL mrrc p15, 0, r4, r5, c7 @ PAR + mrc p15, 0, r6, c10, c3, 0 @ AMAIR0 + mrc p15, 0, r7, c10, c3, 1 @ AMAIR1 .if \store_to_vcpu == 0 - push {r2,r4-r5} + push {r2,r4-r7} .else str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] add r12, vcpu, #CP15_OFFSET(c7_PAR) strd r4, r5, [r12] + str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] + str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] .endif .endm @@ -322,15 +326,19 @@ vcpu .req r0 @ vcpu pointer always in r0 */ .macro write_cp15_state read_from_vcpu .if \read_from_vcpu == 0 - pop {r2,r4-r5} + pop {r2,r4-r7} .else ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] add r12, vcpu, #CP15_OFFSET(c7_PAR) ldrd r4, r5, [r12] + ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] + ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] .endif mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL mcrr p15, 0, r4, r5, c7 @ PAR + mcr p15, 0, r6, c10, c3, 0 @ AMAIR0 + mcr p15, 0, r7, c10, c3, 1 @ AMAIR1 .if \read_from_vcpu == 0 pop {r2-r12} From 11178dce0398cd8419c639c8ab81357cb4f2bbf0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Jan 2014 18:00:55 +0000 Subject: [PATCH 151/287] ARM: KVM: trap VM system registers until MMU and caches are ON In order to be able to detect the point where the guest enables its MMU and caches, trap all the VM related system registers. Once we see the guest enabling both the MMU and the caches, we can go back to a saner mode of operation, which is to leave these registers in complete control of the guest. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit 8034699a42d68043b495c7e0cfafccd920707ec8) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 3 +- arch/arm/kvm/coproc.c | 74 ++++++++++++++++++++++++++-------- arch/arm/kvm/coproc.h | 4 ++ arch/arm/kvm/coproc_a15.c | 2 +- arch/arm/kvm/coproc_a7.c | 2 +- 5 files changed, 66 insertions(+), 19 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index a843e74a384c..816db0bf2dd8 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -55,6 +55,7 @@ * The bits we set in HCR: * TAC: Trap ACTLR * TSC: Trap SMC + * TVM: Trap VM ops (until MMU and caches are on) * TSW: Trap cache operations by set/way * TWI: Trap WFI * TWE: Trap WFE @@ -68,7 +69,7 @@ */ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_TWE | HCR_SWIO | HCR_TIDCP) + HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP) /* System Control Register (SCTLR) bits */ #define SCTLR_TE (1 << 30) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index a5a54a48d51b..c58a35116f63 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -204,6 +205,44 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +/* + * Generic accessor for VM registers. Only called as long as HCR_TVM + * is set. + */ +static bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + BUG_ON(!p->is_write); + + vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); + if (p->is_64bit) + vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); + + return true; +} + +/* + * SCTLR accessor. Only called as long as HCR_TVM is set. If the + * guest enables the MMU, we stop trapping the VM sys_regs and leave + * it in complete control of the caches. + * + * Used by the cpu-specific code. + */ +bool access_sctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + access_vm_reg(vcpu, p, r); + + if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ + vcpu->arch.hcr &= ~HCR_TVM; + stage2_flush_vm(vcpu->kvm); + } + + return true; +} + /* * We could trap ID_DFR0 and tell the guest we don't support performance * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was @@ -261,33 +300,36 @@ static const struct coproc_reg cp15_regs[] = { { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, NULL, reset_val, c1_CPACR, 0x00000000 }, - /* TTBR0/TTBR1: swapped by interrupt.S. */ - { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, - { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, - - /* TTBCR: swapped by interrupt.S. */ + /* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */ + { CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 }, + { CRn(2), CRm( 0), Op1( 0), Op2( 0), is32, + access_vm_reg, reset_unknown, c2_TTBR0 }, + { CRn(2), CRm( 0), Op1( 0), Op2( 1), is32, + access_vm_reg, reset_unknown, c2_TTBR1 }, { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c2_TTBCR, 0x00000000 }, + access_vm_reg, reset_val, c2_TTBCR, 0x00000000 }, + { CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 }, + /* DACR: swapped by interrupt.S. */ { CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c3_DACR }, + access_vm_reg, reset_unknown, c3_DACR }, /* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */ { CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c5_DFSR }, + access_vm_reg, reset_unknown, c5_DFSR }, { CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c5_IFSR }, + access_vm_reg, reset_unknown, c5_IFSR }, { CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c5_ADFSR }, + access_vm_reg, reset_unknown, c5_ADFSR }, { CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c5_AIFSR }, + access_vm_reg, reset_unknown, c5_AIFSR }, /* DFAR/IFAR: swapped by interrupt.S. */ { CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c6_DFAR }, + access_vm_reg, reset_unknown, c6_DFAR }, { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_unknown, c6_IFAR }, + access_vm_reg, reset_unknown, c6_IFAR }, /* PAR swapped by interrupt.S */ { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, @@ -324,9 +366,9 @@ static const struct coproc_reg cp15_regs[] = { /* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */ { CRn(10), CRm( 2), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c10_PRRR}, + access_vm_reg, reset_unknown, c10_PRRR}, { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c10_NMRR}, + access_vm_reg, reset_unknown, c10_NMRR}, /* AMAIR0/AMAIR1: swapped by interrupt.S. */ { CRn(10), CRm( 3), Op1( 0), Op2( 0), is32, @@ -340,7 +382,7 @@ static const struct coproc_reg cp15_regs[] = { /* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */ { CRn(13), CRm( 0), Op1( 0), Op2( 1), is32, - NULL, reset_val, c13_CID, 0x00000000 }, + access_vm_reg, reset_val, c13_CID, 0x00000000 }, { CRn(13), CRm( 0), Op1( 0), Op2( 2), is32, NULL, reset_unknown, c13_TID_URW }, { CRn(13), CRm( 0), Op1( 0), Op2( 3), is32, diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 8dda870e84f9..1a44bbe39643 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1, #define is64 .is_64 = true #define is32 .is_64 = false +bool access_sctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r); + #endif /* __ARM_KVM_COPROC_LOCAL_H__ */ diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index bb0cac1410cc..e6f4ae48bda9 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -34,7 +34,7 @@ static const struct coproc_reg a15_regs[] = { /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_val, c1_SCTLR, 0x00C50078 }, + access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, }; static struct kvm_coproc_target_table a15_target_table = { diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c index 1df767331588..17fc7cd479d3 100644 --- a/arch/arm/kvm/coproc_a7.c +++ b/arch/arm/kvm/coproc_a7.c @@ -37,7 +37,7 @@ static const struct coproc_reg a7_regs[] = { /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_val, c1_SCTLR, 0x00C50878 }, + access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, }; static struct kvm_coproc_target_table a7_target_table = { From 0ddf276ba9c11c5bd993c7c87226dde6fc5ffa51 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Jan 2014 17:38:33 +0000 Subject: [PATCH 152/287] ARM: KVM: fix warning in mmu.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling with THP enabled leads to the following warning: arch/arm/kvm/mmu.c: In function ‘unmap_range’: arch/arm/kvm/mmu.c:177:39: warning: ‘pte’ may be used uninitialized in this function [-Wmaybe-uninitialized] if (kvm_pmd_huge(*pmd) || page_empty(pte)) { ^ Code inspection reveals that these two cases are mutually exclusive, so GCC is a bit overzealous here. Silence it anyway by initializing pte to NULL and testing it later on. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall (cherry picked from commit 56041bf920d2937b7cadcb30cb206f0372eee814) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index d7e998c6a08f..80bb1e6c2c29 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -144,6 +144,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, while (addr < end) { pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); + pte = NULL; if (pud_none(*pud)) { addr = kvm_pud_addr_end(addr, end); continue; @@ -174,7 +175,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, /* * If the pmd entry is to be cleared, walk back up the ladder */ - if (kvm_pmd_huge(*pmd) || page_empty(pte)) { + if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) { clear_pmd_entry(kvm, pmd, addr); next = kvm_pmd_addr_end(addr, end); if (page_empty(pmd) && !page_empty(pud)) { From 500d9e60bb1f98d24a2780542a68d6a1ffc32315 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 6 Mar 2014 03:30:46 +0000 Subject: [PATCH 153/287] ARM: KVM: fix non-VGIC compilation Add a stub for kvm_vgic_addr when compiling without CONFIG_KVM_ARM_VGIC. The usefulness of this configurarion is extremely doubtful, but let's fix it anyway (until we decide that we'll always support a VGIC). Reported-by: Michele Paolino Cc: Paolo Bonzini Cc: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini (cherry picked from commit 6cbde8253a8143ada18ec0d1711230747a7c1934) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index be85127bfed3..f27000f55a83 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -171,6 +171,11 @@ static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 add return 0; } +static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) +{ + return -ENXIO; +} + static inline int kvm_vgic_init(struct kvm *kvm) { return 0; From 1fd8c219ef68ef56a4a0b0effa66c38e409697eb Mon Sep 17 00:00:00 2001 From: Radha Mohan Chintakuntla Date: Fri, 7 Mar 2014 08:49:25 +0000 Subject: [PATCH 154/287] arm64: Add boot time configuration of Intermediate Physical Address size ARMv8 supports a range of physical address bit sizes. The PARange bits from ID_AA64MMFR0_EL1 register are read during boot-time and the intermediate physical address size bits are written in the translation control registers (TCR_EL1 and VTCR_EL2). There is no change in the VA bits and levels of translation. Signed-off-by: Radha Mohan Chintakuntla Reviewed-by: Will Deacon Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 87366d8cf7b3f6dc34633938aa8766e5a390ce33) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 15 ++++++--------- arch/arm64/include/asm/pgtable-hwdef.h | 5 ++--- arch/arm64/kvm/hyp-init.S | 6 ++++++ arch/arm64/mm/proc.S | 8 +++++++- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 00fbaa75dc7b..3d6903006a8a 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -107,7 +107,6 @@ /* VTCR_EL2 Registers bits */ #define VTCR_EL2_PS_MASK (7 << 16) -#define VTCR_EL2_PS_40B (2 << 16) #define VTCR_EL2_TG0_MASK (1 << 14) #define VTCR_EL2_TG0_4K (0 << 14) #define VTCR_EL2_TG0_64K (1 << 14) @@ -130,10 +129,9 @@ * 64kB pages (TG0 = 1) * 2 level page tables (SL = 1) */ -#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \ - VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ - VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ - VTCR_EL2_T0SZ_40B) +#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ + VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #else /* @@ -143,10 +141,9 @@ * 4kB pages (TG0 = 0) * 3 level page tables (SL = 1) */ -#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \ - VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ - VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ - VTCR_EL2_T0SZ_40B) +#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ + VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index b1d2e26c3c88..f7af66b54cb2 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -100,9 +100,9 @@ #define PTE_HYP PTE_USER /* - * 40-bit physical address supported. + * Highest possible physical address supported. */ -#define PHYS_MASK_SHIFT (40) +#define PHYS_MASK_SHIFT (48) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) /* @@ -122,7 +122,6 @@ #define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) #define TCR_TG0_64K (UL(1) << 14) #define TCR_TG1_64K (UL(1) << 30) -#define TCR_IPS_40BIT (UL(2) << 32) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 2b0244d65c16..d968796f4b2d 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -68,6 +68,12 @@ __do_hyp_init: msr tcr_el2, x4 ldr x4, =VTCR_EL2_FLAGS + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in + * VTCR_EL2. + */ + mrs x5, ID_AA64MMFR0_EL1 + bfi x4, x5, #16, #3 msr vtcr_el2, x4 mrs x4, mair_el1 diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 8e0158f198d7..55a40f6dbf78 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -140,8 +140,14 @@ ENTRY(__cpu_setup) * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for * both user and kernel. */ - ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \ + ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \ TCR_ASID16 | TCR_TBI0 | (1 << 31) + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in + * TCR_EL1. + */ + mrs x9, ID_AA64MMFR0_EL1 + bfi x10, x9, #32, #3 #ifdef CONFIG_ARM64_64K_PAGES orr x10, x10, TCR_TG0_64K orr x10, x10, TCR_TG1_64K From 7fe3c71dbc4c635cc5e4b527b748749fc771eb57 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 7 Apr 2014 01:36:08 +0800 Subject: [PATCH 155/287] arm, kvm: fix double lock on cpu_add_remove_lock Commit 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration) holds the lock before calling the two functions: kvm_vgic_hyp_init() kvm_timer_hyp_init() and both the two functions are calling register_cpu_notifier() to register cpu notifier, so cause double lock on cpu_add_remove_lock. Considered that both two functions are only called inside kvm_arch_init() with holding cpu_add_remove_lock, so simply use __register_cpu_notifier() to fix the problem. Fixes: 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration) Signed-off-by: Ming Lei Reviewed-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki (cherry picked from commit 553f809e23f00976caea7a1ebdabaa58a6383e7d) Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 2 +- virt/kvm/arm/vgic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 5081e809821f..22fa819a9b6a 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void) host_vtimer_irq = ppi; - err = register_cpu_notifier(&kvm_timer_cpu_nb); + err = __register_cpu_notifier(&kvm_timer_cpu_nb); if (err) { kvm_err("Cannot register timer CPU notifier\n"); goto out_free; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8ca405cd7c1a..47b29834a6b6 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1496,7 +1496,7 @@ int kvm_vgic_hyp_init(void) goto out; } - ret = register_cpu_notifier(&vgic_cpu_nb); + ret = __register_cpu_notifier(&vgic_cpu_nb); if (ret) { kvm_err("Cannot register vgic CPU notifier\n"); goto out_free_irq; From 9b937709747526fc1fcac118d605705f78424d3c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jan 2014 16:09:21 +0100 Subject: [PATCH 156/287] KVM: add kvm_is_error_gpa() helper It's quite common (in the s390 guest access code) to test if a guest physical address points to a valid guest memory area or not. So add a simple helper function in common code, since this might be of interest for other architectures as well. Signed-off-by: Heiko Carstens Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger (cherry picked from commit dfeec843fb237d73947e818f961e8d6f0df22b01) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f6801d10e04c..eef946f92c0c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -840,6 +840,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } +static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) +{ + unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + + return kvm_is_error_hva(hva); +} + static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) { set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); From d80e22fea4940bd2ad63648a941620b6fccd59a3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Apr 2014 11:46:04 +0100 Subject: [PATCH 157/287] ARM: KVM: disable KVM in Kconfig on big-endian systems KVM currently crashes and burns on big-endian hosts, so don't allow it to be selected until we've got that fixed. Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 4e4468fac4381b92eb333d94256e7fb8350f3de3) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 466bd299b1a8..4be5bb150bdd 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,7 +23,7 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST - depends on ARM_VIRT_EXT && ARM_LPAE + depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN ---help--- Support hosting virtualized guest machines. You will also need to select one or more of the processor modules below. From 16b80f7131f5d763118811815439c82e4ac45e50 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Fri, 28 Mar 2014 14:25:19 +0000 Subject: [PATCH 158/287] arm: KVM: fix possible misalignment of PGDs and bounce page The kvm/mmu code shared by arm and arm64 uses kalloc() to allocate a bounce page (if hypervisor init code crosses page boundary) and hypervisor PGDs. The problem is that kalloc() does not guarantee the proper alignment. In the case of the bounce page, the page sized buffer allocated may also cross a page boundary negating the purpose and leading to a hang during kvm initialization. Likewise the PGDs allocated may not meet the minimum alignment requirements of the underlying MMU. This patch uses __get_free_page() to guarantee the worst case alignment needs of the bounce page and PGDs on both arm and arm64. Cc: # 3.10+ Signed-off-by: Mark Salter Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 5d4e08c45a6cf8f1ab3c7fa375007635ac569165) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 80bb1e6c2c29..16f804938b8f 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -42,6 +42,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) @@ -293,14 +295,14 @@ void free_boot_hyp_pgd(void) if (boot_hyp_pgd) { unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(boot_hyp_pgd); + free_pages((unsigned long)boot_hyp_pgd, pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(init_bounce_page); + free_page((unsigned long)init_bounce_page); init_bounce_page = NULL; mutex_unlock(&kvm_hyp_pgd_mutex); @@ -330,7 +332,7 @@ void free_hyp_pgds(void) for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); - kfree(hyp_pgd); + free_pages((unsigned long)hyp_pgd, pgd_order); hyp_pgd = NULL; } @@ -1024,7 +1026,7 @@ int kvm_mmu_init(void) size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; phys_addr_t phys_base; - init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + init_bounce_page = (void *)__get_free_page(GFP_KERNEL); if (!init_bounce_page) { kvm_err("Couldn't allocate HYP init bounce page\n"); err = -ENOMEM; @@ -1050,8 +1052,9 @@ int kvm_mmu_init(void) (unsigned long)phys_base); } - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); err = -ENOMEM; From 738a77ae39541495f4d5396ed093bccdebc589f1 Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Thu, 10 Apr 2014 13:14:32 +0100 Subject: [PATCH 159/287] KVM: ARM: vgic: Fix sgi dispatch problem When dispatch SGI(mode == 0), that is the vcpu of VM should send sgi to the cpu which the target_cpus list. So, there must add the "break" to branch of case 0. Cc: # 3.10+ Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 91021a6c8ffdc55804dab5acdfc7de4f278b9ac3) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 47b29834a6b6..7e8b44efb739 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -916,6 +916,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) case 0: if (!target_cpus) return; + break; case 1: target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; From 6e349e33cec7724f9ae0520a1539ab7cf1d24893 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 15:26:01 +0200 Subject: [PATCH 160/287] KVM: async_pf: mm->mm_users can not pin apf->mm get_user_pages(mm) is simply wrong if mm->mm_users == 0 and exit_mmap/etc was already called (or is in progress), mm->mm_count can only pin mm->pgd and mm_struct itself. Change kvm_setup_async_pf/async_pf_execute to inc/dec mm->mm_users. kvm_create_vm/kvm_destroy_vm play with ->mm_count too but this case looks fine at first glance, it seems that this ->mm is only used to verify that current->mm == kvm->mm. Signed-off-by: Oleg Nesterov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini (cherry picked from commit 41c22f626254b9dc0376928cae009e73d1b6a49a) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 10df100c4514..06e6401d6ef4 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -101,7 +101,7 @@ static void async_pf_execute(struct work_struct *work) if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - mmdrop(mm); + mmput(mm); kvm_put_kvm(vcpu->kvm); } @@ -118,7 +118,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) flush_work(&work->work); #else if (cancel_work_sync(&work->work)) { - mmdrop(work->mm); + mmput(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } @@ -183,7 +183,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->addr = hva; work->arch = *arch; work->mm = current->mm; - atomic_inc(&work->mm->mm_count); + atomic_inc(&work->mm->mm_users); kvm_get_kvm(work->vcpu->kvm); /* this can't really happen otherwise gfn_to_pfn_async @@ -201,7 +201,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, return 1; retry_sync: kvm_put_kvm(work->vcpu->kvm); - mmdrop(work->mm); + mmput(work->mm); kmem_cache_free(async_pf_cache, work); return 0; } From bc38de2312a2a078f2d5ad596888a131d811b143 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 Apr 2014 00:07:18 +0200 Subject: [PATCH 161/287] KVM: arm/arm64: vgic: fix GICD_ICFGR register accesses Since KVM internally represents the ICFGR registers by stuffing two of them into one word, the offset for accessing the internal representation and the one for the MMIO based access are different. So keep the original offset around, but adjust the internal array offset by one bit. Reported-by: Haibin Wang Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit f2ae85b2ab3776b9e4e42e5b6fa090f40d396794) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7e8b44efb739..f9af48c9eb37 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -548,11 +548,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, u32 val; u32 *reg; - offset >>= 1; reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset); + vcpu->vcpu_id, offset >> 1); - if (offset & 2) + if (offset & 4) val = *reg >> 16; else val = *reg & 0xffff; @@ -561,13 +560,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, vgic_reg_access(mmio, &val, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (offset < 4) { + if (offset < 8) { *reg = ~0U; /* Force PPIs/SGIs to 1 */ return false; } val = vgic_cfg_compress(val); - if (offset & 2) { + if (offset & 4) { *reg &= 0xffff; *reg |= val << 16; } else { From 2a1992b43b4e9f453e77e62740181ef665426c93 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 15:25:58 +0200 Subject: [PATCH 162/287] KVM: async_pf: kill the unnecessary use_mm/unuse_mm async_pf_execute() async_pf_execute() has no reasons to adopt apf->mm, gup(current, mm) should work just fine even if current has another or NULL ->mm. Recently kvm_async_page_present_sync() was added insedie the "use_mm" section, but it seems that it doesn't need current->mm too. Signed-off-by: Oleg Nesterov Reviewed-by: Andrea Arcangeli Signed-off-by: Paolo Bonzini (cherry picked from commit d72d946d0b649b79709b99b9d5cb7269fff8afaa) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 06e6401d6ef4..cda703e512d3 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -80,12 +80,10 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); - use_mm(mm); down_read(&mm->mmap_sem); get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); kvm_async_page_present_sync(vcpu, apf); - unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); From 4c158ade8e722cd59ba1a6d8ca46be8427c8ac90 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 28 Apr 2014 17:03:00 +0200 Subject: [PATCH 163/287] KVM: async_pf: change async_pf_execute() to use get_user_pages(tsk => NULL) async_pf_execute() passes tsk == current to gup(), this is doesn't hurt but unnecessary and misleading. "tsk" is only used to account the number of faults and current is the random workqueue thread. Signed-off-by: Oleg Nesterov Suggested-by: Andrea Arcangeli Signed-off-by: Paolo Bonzini (cherry picked from commit e9545b9f8aeb63e05818e4b3250057260bc072aa) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index cda703e512d3..d6a3d0993d88 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -81,7 +81,7 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); down_read(&mm->mmap_sem); - get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); + get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); kvm_async_page_present_sync(vcpu, apf); From de055bd5eb6f2daf69c81f6f62241f0b977a2ae4 Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Tue, 29 Apr 2014 14:49:17 +0800 Subject: [PATCH 164/287] KVM: ARM: vgic: Fix the overlap check action about setting the GICD & GICC base address. Currently below check in vgic_ioaddr_overlap will always succeed, because the vgic dist base and vgic cpu base are still kept UNDEF after initialization. The code as follows will be return forever. if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) return 0; So, before invoking the vgic_ioaddr_overlap, it needs to set the corresponding base address firstly. Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 30c2117085bc4e05d091cee6eba79f069b41a9cd) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f9af48c9eb37..56ff9bebb577 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1667,10 +1667,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, if (addr + size < addr) return -EINVAL; + *ioaddr = addr; ret = vgic_ioaddr_overlap(kvm); if (ret) - return ret; - *ioaddr = addr; + *ioaddr = VGIC_ADDR_UNDEF; + return ret; } From 7d46aca1cf97c1ff323d799fd82ffd49bc294bd1 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:14 +0530 Subject: [PATCH 165/287] KVM: Add capability to advertise PSCI v0.2 support User space (i.e. QEMU or KVMTOOL) should be able to check whether KVM ARM/ARM64 supports in-kernel PSCI v0.2 emulation. For this purpose, we define KVM_CAP_ARM_PSCI_0_2 in KVM user space interface header. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 717abd208dff75b343243aa5ed688f62190dda5e) Signed-off-by: Christoffer Dall --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 56347ab54cf7..cb7ebcc7a9db 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -669,6 +669,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_EXT_EMUL_CPUID 95 +#define KVM_CAP_ARM_PSCI_0_2 102 #ifdef KVM_CAP_IRQ_ROUTING From 041d3f918f8daf3715fc87f1516c40466f94e6dd Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:15 +0530 Subject: [PATCH 166/287] ARM/ARM64: KVM: Add common header for PSCI related defines We need a common place to share PSCI related defines among ARM kernel, ARM64 kernel, KVM ARM/ARM64 PSCI emulation, and user space. We introduce uapi/linux/psci.h for this purpose. This newly added header will be first used by KVM ARM/ARM64 in-kernel PSCI emulation and user space (i.e. QEMU or KVMTOOL). Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Ashwin Chaugule Signed-off-by: Christoffer Dall (cherry picked from commit e546eea74ec66698e29c583639cf6e2a11e46490) Signed-off-by: Christoffer Dall --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/psci.h | 90 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 include/uapi/linux/psci.h diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index bdc6e87ff3eb..405887bec8b3 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -311,6 +311,7 @@ header-y += ppp-ioctl.h header-y += ppp_defs.h header-y += pps.h header-y += prctl.h +header-y += psci.h header-y += ptp_clock.h header-y += ptrace.h header-y += qnx4_fs.h diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h new file mode 100644 index 000000000000..310d83e0a91b --- /dev/null +++ b/include/uapi/linux/psci.h @@ -0,0 +1,90 @@ +/* + * ARM Power State and Coordination Interface (PSCI) header + * + * This header holds common PSCI defines and macros shared + * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space. + * + * Copyright (C) 2014 Linaro Ltd. + * Author: Anup Patel + */ + +#ifndef _UAPI_LINUX_PSCI_H +#define _UAPI_LINUX_PSCI_H + +/* + * PSCI v0.1 interface + * + * The PSCI v0.1 function numbers are implementation defined. + * + * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED, + * INVALID_PARAMS, and DENIED defined below are applicable + * to PSCI v0.1. + */ + +/* PSCI v0.2 interface */ +#define PSCI_0_2_FN_BASE 0x84000000 +#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n)) +#define PSCI_0_2_64BIT 0x40000000 +#define PSCI_0_2_FN64_BASE \ + (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT) +#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n)) + +#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0) +#define PSCI_0_2_FN_CPU_SUSPEND PSCI_0_2_FN(1) +#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2) +#define PSCI_0_2_FN_CPU_ON PSCI_0_2_FN(3) +#define PSCI_0_2_FN_AFFINITY_INFO PSCI_0_2_FN(4) +#define PSCI_0_2_FN_MIGRATE PSCI_0_2_FN(5) +#define PSCI_0_2_FN_MIGRATE_INFO_TYPE PSCI_0_2_FN(6) +#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU PSCI_0_2_FN(7) +#define PSCI_0_2_FN_SYSTEM_OFF PSCI_0_2_FN(8) +#define PSCI_0_2_FN_SYSTEM_RESET PSCI_0_2_FN(9) + +#define PSCI_0_2_FN64_CPU_SUSPEND PSCI_0_2_FN64(1) +#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3) +#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4) +#define PSCI_0_2_FN64_MIGRATE PSCI_0_2_FN64(5) +#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) + +/* PSCI v0.2 power state encoding for CPU_SUSPEND function */ +#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff +#define PSCI_0_2_POWER_STATE_ID_SHIFT 0 +#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16 +#define PSCI_0_2_POWER_STATE_TYPE_MASK \ + (0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT) +#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24 +#define PSCI_0_2_POWER_STATE_AFFL_MASK \ + (0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + +/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */ +#define PSCI_0_2_AFFINITY_LEVEL_ON 0 +#define PSCI_0_2_AFFINITY_LEVEL_OFF 1 +#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING 2 + +/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */ +#define PSCI_0_2_TOS_UP_MIGRATE 0 +#define PSCI_0_2_TOS_UP_NO_MIGRATE 1 +#define PSCI_0_2_TOS_MP 2 + +/* PSCI version decoding (independent of PSCI version) */ +#define PSCI_VERSION_MAJOR_SHIFT 16 +#define PSCI_VERSION_MINOR_MASK \ + ((1U << PSCI_VERSION_MAJOR_SHIFT) - 1) +#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK +#define PSCI_VERSION_MAJOR(ver) \ + (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT) +#define PSCI_VERSION_MINOR(ver) \ + ((ver) & PSCI_VERSION_MINOR_MASK) + +/* PSCI return values (inclusive of all PSCI versions) */ +#define PSCI_RET_SUCCESS 0 +#define PSCI_RET_NOT_SUPPORTED -1 +#define PSCI_RET_INVALID_PARAMS -2 +#define PSCI_RET_DENIED -3 +#define PSCI_RET_ALREADY_ON -4 +#define PSCI_RET_ON_PENDING -5 +#define PSCI_RET_INTERNAL_FAILURE -6 +#define PSCI_RET_NOT_PRESENT -7 +#define PSCI_RET_DISABLED -8 + +#endif /* _UAPI_LINUX_PSCI_H */ From 4ccf6abe18fc14863159b2c51a4958214b4c13dc Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:16 +0530 Subject: [PATCH 167/287] ARM/ARM64: KVM: Add base for PSCI v0.2 emulation Currently, the in-kernel PSCI emulation provides PSCI v0.1 interface to VCPUs. This patch extends current in-kernel PSCI emulation to provide PSCI v0.2 interface to VCPUs. By default, ARM/ARM64 KVM will always provide PSCI v0.1 interface for keeping the ABI backward-compatible. To select PSCI v0.2 interface for VCPUs, the user space (i.e. QEMU or KVMTOOL) will have to set KVM_ARM_VCPU_PSCI_0_2 feature when doing VCPU init using KVM_ARM_VCPU_INIT ioctl. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 7d0f84aae9e231930985eaff63ac91b61aaa15d6) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm/include/asm/kvm_psci.h | 4 ++ arch/arm/include/uapi/asm/kvm.h | 10 +-- arch/arm/kvm/psci.c | 105 +++++++++++++++++++++++------- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_psci.h | 4 ++ arch/arm64/include/uapi/asm/kvm.h | 10 +-- 7 files changed, 105 insertions(+), 32 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 09af14999c9b..193ceaf01bfd 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -36,7 +36,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG -#define KVM_VCPU_MAX_FEATURES 1 +#define KVM_VCPU_MAX_FEATURES 2 #include diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h index 9a83d98bf170..4c0e3e1d1597 100644 --- a/arch/arm/include/asm/kvm_psci.h +++ b/arch/arm/include/asm/kvm_psci.h @@ -18,6 +18,10 @@ #ifndef __ARM_KVM_PSCI_H__ #define __ARM_KVM_PSCI_H__ +#define KVM_ARM_PSCI_0_1 1 +#define KVM_ARM_PSCI_0_2 2 + +int kvm_psci_version(struct kvm_vcpu *vcpu); bool kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index ef0c8785ba16..e6ebdd3471e5 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -20,6 +20,7 @@ #define __ARM_KVM_H__ #include +#include #include #define __KVM_HAVE_GUEST_DEBUG @@ -83,6 +84,7 @@ struct kvm_regs { #define KVM_VGIC_V2_CPU_SIZE 0x2000 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ +#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ struct kvm_vcpu_init { __u32 target; @@ -201,9 +203,9 @@ struct kvm_arch_memory_slot { #define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) #define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) -#define KVM_PSCI_RET_SUCCESS 0 -#define KVM_PSCI_RET_NI ((unsigned long)-1) -#define KVM_PSCI_RET_INVAL ((unsigned long)-2) -#define KVM_PSCI_RET_DENIED ((unsigned long)-3) +#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS +#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED +#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS +#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 448f60e8d23c..8c42596cdbdf 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -59,7 +59,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * turned off. */ if (!vcpu || !vcpu->arch.pause) - return KVM_PSCI_RET_INVAL; + return PSCI_RET_INVALID_PARAMS; target_pc = *vcpu_reg(source_vcpu, 2); @@ -82,7 +82,82 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) wq = kvm_arch_vcpu_wq(vcpu); wake_up_interruptible(wq); - return KVM_PSCI_RET_SUCCESS; + return PSCI_RET_SUCCESS; +} + +int kvm_psci_version(struct kvm_vcpu *vcpu) +{ + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) + return KVM_ARM_PSCI_0_2; + + return KVM_ARM_PSCI_0_1; +} + +static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +{ + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + /* + * Bits[31:16] = Major Version = 0 + * Bits[15:0] = Minor Version = 2 + */ + val = 2; + break; + case PSCI_0_2_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN_MIGRATE: + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN64_AFFINITY_INFO: + case PSCI_0_2_FN64_MIGRATE: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + val = PSCI_RET_NOT_SUPPORTED; + break; + default: + return false; + } + + *vcpu_reg(vcpu, 0) = val; + return true; +} + +static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu) +{ + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case KVM_PSCI_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case KVM_PSCI_FN_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case KVM_PSCI_FN_CPU_SUSPEND: + case KVM_PSCI_FN_MIGRATE: + val = PSCI_RET_NOT_SUPPORTED; + break; + default: + return false; + } + + *vcpu_reg(vcpu, 0) = val; + return true; } /** @@ -97,26 +172,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ bool kvm_psci_call(struct kvm_vcpu *vcpu) { - unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); - unsigned long val; - - switch (psci_fn) { - case KVM_PSCI_FN_CPU_OFF: - kvm_psci_vcpu_off(vcpu); - val = KVM_PSCI_RET_SUCCESS; - break; - case KVM_PSCI_FN_CPU_ON: - val = kvm_psci_vcpu_on(vcpu); - break; - case KVM_PSCI_FN_CPU_SUSPEND: - case KVM_PSCI_FN_MIGRATE: - val = KVM_PSCI_RET_NI; - break; - + switch (kvm_psci_version(vcpu)) { + case KVM_ARM_PSCI_0_2: + return kvm_psci_0_2_call(vcpu); + case KVM_ARM_PSCI_0_1: + return kvm_psci_0_1_call(vcpu); default: return false; - } - - *vcpu_reg(vcpu, 0) = val; - return true; + }; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0a1d69751562..92242ce06309 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -39,7 +39,7 @@ #include #include -#define KVM_VCPU_MAX_FEATURES 2 +#define KVM_VCPU_MAX_FEATURES 3 struct kvm_vcpu; int kvm_target_cpu(void); diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h index e301a4816355..e25c658a757b 100644 --- a/arch/arm64/include/asm/kvm_psci.h +++ b/arch/arm64/include/asm/kvm_psci.h @@ -18,6 +18,10 @@ #ifndef __ARM64_KVM_PSCI_H__ #define __ARM64_KVM_PSCI_H__ +#define KVM_ARM_PSCI_0_1 1 +#define KVM_ARM_PSCI_0_2 2 + +int kvm_psci_version(struct kvm_vcpu *vcpu); bool kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index eaf54a30bedc..e6471daf3fb5 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -31,6 +31,7 @@ #define KVM_NR_SPSR 5 #ifndef __ASSEMBLY__ +#include #include #include @@ -77,6 +78,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ +#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ struct kvm_vcpu_init { __u32 target; @@ -186,10 +188,10 @@ struct kvm_arch_memory_slot { #define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) #define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) -#define KVM_PSCI_RET_SUCCESS 0 -#define KVM_PSCI_RET_NI ((unsigned long)-1) -#define KVM_PSCI_RET_INVAL ((unsigned long)-2) -#define KVM_PSCI_RET_DENIED ((unsigned long)-3) +#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS +#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED +#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS +#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED #endif From c6debeb5f9cf9a1c1f8867d991563302797e20c3 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:18 +0530 Subject: [PATCH 168/287] ARM/ARM64: KVM: Make kvm_psci_call() return convention more flexible Currently, the kvm_psci_call() returns 'true' or 'false' based on whether the PSCI function call was handled successfully or not. This does not help us emulate system-level PSCI functions where the actual emulation work will be done by user space (QEMU or KVMTOOL). Examples of such system-level PSCI functions are: PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET. This patch updates kvm_psci_call() to return three types of values: 1) > 0 (success) 2) = 0 (success but exit to user space) 3) < 0 (errors) Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e8e7fcc5e2710b31ef842ee799db99c07986c364) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_psci.h | 2 +- arch/arm/kvm/handle_exit.c | 10 +++++++--- arch/arm/kvm/psci.c | 28 ++++++++++++++++------------ arch/arm64/include/asm/kvm_psci.h | 2 +- arch/arm64/kvm/handle_exit.c | 12 ++++++++---- 5 files changed, 33 insertions(+), 21 deletions(-) diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h index 4c0e3e1d1597..6bda945d31fa 100644 --- a/arch/arm/include/asm/kvm_psci.h +++ b/arch/arm/include/asm/kvm_psci.h @@ -22,6 +22,6 @@ #define KVM_ARM_PSCI_0_2 2 int kvm_psci_version(struct kvm_vcpu *vcpu); -bool kvm_psci_call(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 0de91fc6de0f..4c979d466cc1 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -38,14 +38,18 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { + int ret; + trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); - if (kvm_psci_call(vcpu)) + ret = kvm_psci_call(vcpu); + if (ret < 0) { + kvm_inject_undefined(vcpu); return 1; + } - kvm_inject_undefined(vcpu); - return 1; + return ret; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 8c42596cdbdf..14e6fa6c8e35 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -93,7 +93,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu) return KVM_ARM_PSCI_0_1; } -static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -128,14 +128,14 @@ static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu) val = PSCI_RET_NOT_SUPPORTED; break; default: - return false; + return -EINVAL; } *vcpu_reg(vcpu, 0) = val; - return true; + return 1; } -static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu) +static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -153,11 +153,11 @@ static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu) val = PSCI_RET_NOT_SUPPORTED; break; default: - return false; + return -EINVAL; } *vcpu_reg(vcpu, 0) = val; - return true; + return 1; } /** @@ -165,12 +165,16 @@ static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu) * @vcpu: Pointer to the VCPU struct * * Handle PSCI calls from guests through traps from HVC instructions. - * The calling convention is similar to SMC calls to the secure world where - * the function number is placed in r0 and this function returns true if the - * function number specified in r0 is withing the PSCI range, and false - * otherwise. + * The calling convention is similar to SMC calls to the secure world + * where the function number is placed in r0. + * + * This function returns: > 0 (success), 0 (success but exit to user + * space), and < 0 (errors) + * + * Errors: + * -EINVAL: Unrecognized PSCI function */ -bool kvm_psci_call(struct kvm_vcpu *vcpu) +int kvm_psci_call(struct kvm_vcpu *vcpu) { switch (kvm_psci_version(vcpu)) { case KVM_ARM_PSCI_0_2: @@ -178,6 +182,6 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu) case KVM_ARM_PSCI_0_1: return kvm_psci_0_1_call(vcpu); default: - return false; + return -EINVAL; }; } diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h index e25c658a757b..bc39e557c56c 100644 --- a/arch/arm64/include/asm/kvm_psci.h +++ b/arch/arm64/include/asm/kvm_psci.h @@ -22,6 +22,6 @@ #define KVM_ARM_PSCI_0_2 2 int kvm_psci_version(struct kvm_vcpu *vcpu); -bool kvm_psci_call(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 7bc41eab4c64..182415e1a952 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -30,11 +30,15 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; + int ret; - kvm_inject_undefined(vcpu); - return 1; + ret = kvm_psci_call(vcpu); + if (ret < 0) { + kvm_inject_undefined(vcpu); + return 1; + } + + return ret; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) From 7b0aa85882b902333c01f07bef88da07e5c53ebf Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:19 +0530 Subject: [PATCH 169/287] KVM: Add KVM_EXIT_SYSTEM_EVENT to user space API header Currently, we don't have an exit reason to notify user space about a system-level event (for e.g. system reset or shutdown) triggered by the VCPU. This patch adds exit reason KVM_EXIT_SYSTEM_EVENT for this purpose. We can also inform user space about the 'type' and architecture specific 'flags' of a system-level event using the kvm_run structure. This newly added KVM_EXIT_SYSTEM_EVENT will be used by KVM ARM/ARM64 in-kernel PSCI v0.2 support to reset/shutdown VMs. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 8ad6b634928a25971dc42dce101808b1491f87ec) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 15 +++++++++++++++ include/uapi/linux/kvm.h | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 3c75a17555a8..48e7888291f5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2636,6 +2636,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an external interrupt has just been delivered into the guest. User space should put the acknowledged interrupt vector into the 'epr' field. + /* KVM_EXIT_SYSTEM_EVENT */ + struct { +#define KVM_SYSTEM_EVENT_SHUTDOWN 1 +#define KVM_SYSTEM_EVENT_RESET 2 + __u32 type; + __u64 flags; + } system_event; + +If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered +a system-level event using some architecture specific mechanism (hypercall +or some special instruction). In case of ARM/ARM64, this is triggered using +HVC instruction based PSCI call from the vcpu. The 'type' field describes +the system-level event type. The 'flags' field describes architecture +specific flags for the system-level event. + /* Fix the size of the union. */ char padding[256]; }; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cb7ebcc7a9db..0a70874e4a63 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -171,6 +171,7 @@ struct kvm_pit_config { #define KVM_EXIT_WATCHDOG 21 #define KVM_EXIT_S390_TSCH 22 #define KVM_EXIT_EPR 23 +#define KVM_EXIT_SYSTEM_EVENT 24 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -301,6 +302,13 @@ struct kvm_run { struct { __u32 epr; } epr; + /* KVM_EXIT_SYSTEM_EVENT */ + struct { +#define KVM_SYSTEM_EVENT_SHUTDOWN 1 +#define KVM_SYSTEM_EVENT_RESET 2 + __u32 type; + __u64 flags; + } system_event; /* Fix the size of the union. */ char padding[256]; }; From 2834b56a4051c66feb99c5546723fd6afa434d0e Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:20 +0530 Subject: [PATCH 170/287] ARM/ARM64: KVM: Emulate PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET The PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET functions are system-level functions hence cannot be fully emulated by in-kernel PSCI emulation code. To tackle this, we forward PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET function calls from vcpu to user space (i.e. QEMU or KVMTOOL) via kvm_run structure using KVM_EXIT_SYSTEM_EVENT exit reasons. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 4b1238269ed340d59ef829fd9c30a39cfb2923a8) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 46 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 14e6fa6c8e35..59362131b79f 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -85,6 +85,23 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) return PSCI_RET_SUCCESS; } +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +{ + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = type; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +static void kvm_psci_system_off(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); +} + +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); +} + int kvm_psci_version(struct kvm_vcpu *vcpu) { if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) @@ -95,6 +112,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu) static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { + int ret = 1; unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -114,13 +132,35 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; + case PSCI_0_2_FN_SYSTEM_OFF: + kvm_psci_system_off(vcpu); + /* + * We should'nt be going back to guest VCPU after + * receiving SYSTEM_OFF request. + * + * If user space accidently/deliberately resumes + * guest VCPU after SYSTEM_OFF request then guest + * VCPU should see internal failure from PSCI return + * value. To achieve this, we preload r0 (or x0) with + * PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + case PSCI_0_2_FN_SYSTEM_RESET: + kvm_psci_system_reset(vcpu); + /* + * Same reason as SYSTEM_OFF for preloading r0 (or x0) + * with PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; case PSCI_0_2_FN_CPU_SUSPEND: case PSCI_0_2_FN_AFFINITY_INFO: case PSCI_0_2_FN_MIGRATE: case PSCI_0_2_FN_MIGRATE_INFO_TYPE: case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: - case PSCI_0_2_FN_SYSTEM_OFF: - case PSCI_0_2_FN_SYSTEM_RESET: case PSCI_0_2_FN64_CPU_SUSPEND: case PSCI_0_2_FN64_AFFINITY_INFO: case PSCI_0_2_FN64_MIGRATE: @@ -132,7 +172,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) } *vcpu_reg(vcpu, 0) = val; - return 1; + return ret; } static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) From 4c7726b875f0ae0aa10b553d0ddf1662ca1c8536 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:21 +0530 Subject: [PATCH 171/287] ARM/ARM64: KVM: Emulate PSCI v0.2 AFFINITY_INFO This patch adds emulation of PSCI v0.2 AFFINITY_INFO function call for KVM ARM/ARM64. This is a VCPU-level function call which will be used to determine current state of given affinity level. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e6bc13c8a70eabc6a39098ccedf6129c734e3db3) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 52 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 59362131b79f..3b6a0cf25c7d 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -27,6 +27,16 @@ * as described in ARM document number ARM DEN 0022A. */ +#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + +static unsigned long psci_affinity_mask(unsigned long affinity_level) +{ + if (affinity_level <= 3) + return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); + + return 0; +} + static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.pause = true; @@ -85,6 +95,42 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) return PSCI_RET_SUCCESS; } +static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) +{ + int i; + unsigned long mpidr; + unsigned long target_affinity; + unsigned long target_affinity_mask; + unsigned long lowest_affinity_level; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + target_affinity = *vcpu_reg(vcpu, 1); + lowest_affinity_level = *vcpu_reg(vcpu, 2); + + /* Determine target affinity mask */ + target_affinity_mask = psci_affinity_mask(lowest_affinity_level); + if (!target_affinity_mask) + return PSCI_RET_INVALID_PARAMS; + + /* Ignore other bits of target affinity */ + target_affinity &= target_affinity_mask; + + /* + * If one or more VCPU matching target affinity are running + * then ON else OFF + */ + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if (((mpidr & target_affinity_mask) == target_affinity) && + !tmp->arch.pause) { + return PSCI_0_2_AFFINITY_LEVEL_ON; + } + } + + return PSCI_0_2_AFFINITY_LEVEL_OFF; +} + static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) { memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); @@ -132,6 +178,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + val = kvm_psci_vcpu_affinity_info(vcpu); + break; case PSCI_0_2_FN_SYSTEM_OFF: kvm_psci_system_off(vcpu); /* @@ -157,12 +207,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) ret = 0; break; case PSCI_0_2_FN_CPU_SUSPEND: - case PSCI_0_2_FN_AFFINITY_INFO: case PSCI_0_2_FN_MIGRATE: case PSCI_0_2_FN_MIGRATE_INFO_TYPE: case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: case PSCI_0_2_FN64_CPU_SUSPEND: - case PSCI_0_2_FN64_AFFINITY_INFO: case PSCI_0_2_FN64_MIGRATE: case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: val = PSCI_RET_NOT_SUPPORTED; From f11d09d9c9ce2dcca77dd41fcb16c3811cc513d5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:22 +0530 Subject: [PATCH 172/287] ARM/ARM64: KVM: Emulate PSCI v0.2 MIGRATE_INFO_TYPE and related functions This patch adds emulation of PSCI v0.2 MIGRATE, MIGRATE_INFO_TYPE, and MIGRATE_INFO_UP_CPU function calls for KVM ARM/ARM64. KVM ARM/ARM64 being a hypervisor (and not a Trusted OS), we cannot provide this functions hence we emulate these functions in following way: 1. MIGRATE - Returns "Not Supported" 2. MIGRATE_INFO_TYPE - Return 2 i.e. Trusted OS is not present 3. MIGRATE_INFO_UP_CPU - Returns "Not Supported" Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit bab0b43012a8ad64877fa46134370a7f5c6ce861) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 3b6a0cf25c7d..cce901a510fa 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -182,6 +182,22 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_AFFINITY_INFO: val = kvm_psci_vcpu_affinity_info(vcpu); break; + case PSCI_0_2_FN_MIGRATE: + case PSCI_0_2_FN64_MIGRATE: + val = PSCI_RET_NOT_SUPPORTED; + break; + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + /* + * Trusted OS is MP hence does not require migration + * or + * Trusted OS is not present + */ + val = PSCI_0_2_TOS_MP; + break; + case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + val = PSCI_RET_NOT_SUPPORTED; + break; case PSCI_0_2_FN_SYSTEM_OFF: kvm_psci_system_off(vcpu); /* @@ -207,12 +223,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) ret = 0; break; case PSCI_0_2_FN_CPU_SUSPEND: - case PSCI_0_2_FN_MIGRATE: - case PSCI_0_2_FN_MIGRATE_INFO_TYPE: - case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: case PSCI_0_2_FN64_CPU_SUSPEND: - case PSCI_0_2_FN64_MIGRATE: - case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: val = PSCI_RET_NOT_SUPPORTED; break; default: From 69590f71e0ef474e0d44514484308c2eb826306d Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:23 +0530 Subject: [PATCH 173/287] ARM/ARM64: KVM: Fix CPU_ON emulation for PSCI v0.2 As-per PSCI v0.2, the source CPU provides physical address of "entry point" and "context id" for starting a target CPU. Also, if target CPU is already running then we should return ALREADY_ON. Current emulation of CPU_ON function does not consider physical address of "context id" and returns INVALID_PARAMETERS if target CPU is already running. This patch updates kvm_psci_vcpu_on() such that it works for both PSCI v0.1 and PSCI v0.2. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit aa8aeefe5e567637bbec7d7a3031cc057e3af303) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index cce901a510fa..1067579c7336 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -48,6 +48,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) struct kvm_vcpu *vcpu = NULL, *tmp; wait_queue_head_t *wq; unsigned long cpu_id; + unsigned long context_id; unsigned long mpidr; phys_addr_t target_pc; int i; @@ -68,10 +69,17 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * Make sure the caller requested a valid CPU and that the CPU is * turned off. */ - if (!vcpu || !vcpu->arch.pause) + if (!vcpu) return PSCI_RET_INVALID_PARAMS; + if (!vcpu->arch.pause) { + if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) + return PSCI_RET_ALREADY_ON; + else + return PSCI_RET_INVALID_PARAMS; + } target_pc = *vcpu_reg(source_vcpu, 2); + context_id = *vcpu_reg(source_vcpu, 3); kvm_reset_vcpu(vcpu); @@ -86,6 +94,11 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) kvm_vcpu_set_be(vcpu); *vcpu_pc(vcpu) = target_pc; + /* + * NOTE: We always update r0 (or x0) because for PSCI v0.1 + * the general puspose registers are undefined upon CPU_ON. + */ + *vcpu_reg(vcpu, 0) = context_id; vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ From 6824eda31449e4585085a49739d60e640b1c7583 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:24 +0530 Subject: [PATCH 174/287] ARM/ARM64: KVM: Emulate PSCI v0.2 CPU_SUSPEND This patch adds emulation of PSCI v0.2 CPU_SUSPEND function call for KVM ARM/ARM64. This is a CPU-level function call which can suspend current CPU or current CPU cluster. We don't have VCPU clusters in KVM so we only suspend the current VCPU. The CPU_SUSPEND emulation is not tested much because currently there is no CPUIDLE driver in Linux kernel that uses PSCI CPU_SUSPEND. The PSCI CPU_SUSPEND implementation in ARM64 kernel was tested using a Simple CPUIDLE driver which is not published due to unstable DT-bindings for PSCI. (For more info, http://lwn.net/Articles/574950/) For simplicity, we implement CPU_SUSPEND emulation similar to WFI (Wait-for-interrupt) emulation and we also treat power-down request to be same as stand-by request. This is consistent with section 5.4.1 and section 5.4.2 of PSCI v0.2 specification. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit b376d02b53b87f8684f91f13ba4ee43331850fcd) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 1067579c7336..09cf37737ee2 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -37,6 +37,26 @@ static unsigned long psci_affinity_mask(unsigned long affinity_level) return 0; } +static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + /* + * NOTE: For simplicity, we make VCPU suspend emulation to be + * same-as WFI (Wait-for-interrupt) emulation. + * + * This means for KVM the wakeup events are interrupts and + * this is consistent with intended use of StateID as described + * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). + * + * Further, we also treat power-down request to be same as + * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 + * specification (ARM DEN 0022A). This means all suspend states + * for KVM will preserve the register state. + */ + kvm_vcpu_block(vcpu); + + return PSCI_RET_SUCCESS; +} + static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.pause = true; @@ -183,6 +203,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) */ val = 2; break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + val = kvm_psci_vcpu_suspend(vcpu); + break; case PSCI_0_2_FN_CPU_OFF: kvm_psci_vcpu_off(vcpu); val = PSCI_RET_SUCCESS; @@ -235,10 +259,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) val = PSCI_RET_INTERNAL_FAILURE; ret = 0; break; - case PSCI_0_2_FN_CPU_SUSPEND: - case PSCI_0_2_FN64_CPU_SUSPEND: - val = PSCI_RET_NOT_SUPPORTED; - break; default: return -EINVAL; } From f4aa5773a7ac1dad4bc6fc726b178547289e6045 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:25 +0530 Subject: [PATCH 175/287] ARM/ARM64: KVM: Advertise KVM_CAP_ARM_PSCI_0_2 to user space We have PSCI v0.2 emulation available in KVM ARM/ARM64 hence advertise this to user space (i.e. QEMU or KVMTOOL) via KVM_CHECK_EXTENSION ioctl. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 4447a208f7fc2e2dff8c6a8df2a1fd6dd72fb3e2) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9804406ff37e..354dc42b6395 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -197,6 +197,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: r = 1; break; case KVM_CAP_COALESCED_MMIO: From 5ab6e0ee8a2375651052456289f6d397f78f737e Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 16 Jan 2014 13:44:20 +0100 Subject: [PATCH 176/287] kvm/irqchip: Speed up KVM_SET_GSI_ROUTING When starting lots of dataplane devices the bootup takes very long on Christian's s390 with irqfd patches. With larger setups he is even able to trigger some timeouts in some components. Turns out that the KVM_SET_GSI_ROUTING ioctl takes very long (strace claims up to 0.1 sec) when having multiple CPUs. This is caused by the synchronize_rcu and the HZ=100 of s390. By changing the code to use a private srcu we can speed things up. This patch reduces the boot time till mounting root from 8 to 2 seconds on my s390 guest with 100 disks. Uses of hlist_for_each_entry_rcu, hlist_add_head_rcu, hlist_del_init_rcu are fine because they do not have lockdep checks (hlist_for_each_entry_rcu uses rcu_dereference_raw rather than rcu_dereference, and write-sides do not do rcu lockdep at all). Note that we're hardly relying on the "sleepable" part of srcu. We just want SRCU's faster detection of grace periods. Testing was done by Andrew Theurer using netperf tests STREAM, MAERTS and RR. The difference between results "before" and "after" the patch has mean -0.2% and standard deviation 0.6%. Using a paired t-test on the data points says that there is a 2.5% probability that the patch is the cause of the performance difference (rather than a random fluctuation). (Restricting the t-test to RR, which is the most likely to be affected, changes the numbers to respectively -0.3% mean, 0.7% stdev, and 8% probability that the numbers actually say something about the patch. The probability increases mostly because there are fewer data points). Cc: Marcelo Tosatti Cc: Michael S. Tsirkin Tested-by: Christian Borntraeger # s390 Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 719d93cd5f5c5c8775b7a38192069e8e1d1ac46e) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 + virt/kvm/eventfd.c | 25 +++++++++++++++---------- virt/kvm/irq_comm.c | 17 +++++++++-------- virt/kvm/irqchip.c | 31 ++++++++++++++++--------------- virt/kvm/kvm_main.c | 16 ++++++++++------ 5 files changed, 51 insertions(+), 39 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eef946f92c0c..a3c83491a791 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -346,6 +346,7 @@ struct kvm { struct mm_struct *mm; /* userspace tied to this vm */ struct kvm_memslots *memslots; struct srcu_struct srcu; + struct srcu_struct irq_srcu; #ifdef CONFIG_KVM_APIC_ARCHITECTURE u32 bsp_vcpu_id; #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 64ee720b75c7..b51c19ffd8fd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "iodev.h" @@ -118,19 +119,22 @@ static void irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { struct _irqfd_resampler *resampler; + struct kvm *kvm; struct _irqfd *irqfd; + int idx; resampler = container_of(kian, struct _irqfd_resampler, notifier); + kvm = resampler->kvm; - kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, + kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); - rcu_read_lock(); + idx = srcu_read_lock(&kvm->irq_srcu); list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) eventfd_signal(irqfd->resamplefd, 1); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } static void @@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) mutex_lock(&kvm->irqfds.resampler_lock); list_del_rcu(&irqfd->resampler_link); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { list_del(&resampler->link); @@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) unsigned long flags = (unsigned long)key; struct kvm_kernel_irq_routing_entry *irq; struct kvm *kvm = irqfd->kvm; + int idx; if (flags & POLLIN) { - rcu_read_lock(); - irq = rcu_dereference(irqfd->irq_entry); + idx = srcu_read_lock(&kvm->irq_srcu); + irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); /* An event has been signaled, inject an interrupt */ if (irq) kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); else schedule_work(&irqfd->inject); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } if (flags & POLLHUP) { @@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); mutex_unlock(&kvm->irqfds.resampler_lock); } @@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock). - * It is paired with synchronize_rcu done by caller + * It is paired with synchronize_srcu done by caller * of that function. */ rcu_assign_pointer(irqfd->irq_entry, NULL); @@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm) /* * Change irq_routing and irqfd. - * Caller must invoke synchronize_rcu afterwards. + * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. */ void kvm_irq_routing_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index e2e6b4473a96..ced4a542a031 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; + int idx; trace_kvm_set_irq(irq, level, irq_source_id); @@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) * Since there's no easy way to do this, we only support injecting MSI * which is limited to 1:1 GSI mapping. */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); if (irq < irq_rt->nr_rt_entries) hlist_for_each_entry(e, &irq_rt->map[irq], link) { if (likely(e->type == KVM_IRQ_ROUTING_MSI)) @@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) ret = -EWOULDBLOCK; break; } - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); return ret; } @@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, mutex_lock(&kvm->irq_lock); hlist_del_rcu(&kimn->link); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); } void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask) { struct kvm_irq_mask_notifier *kimn; - int gsi; + int idx, gsi; - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) kimn->func(kimn, mask); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 20dc9e4a8f6c..b43c275775cd 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include "irq.h" @@ -33,19 +34,19 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - int gsi; + int gsi, idx; - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) { - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); return true; } - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); return false; } @@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - int gsi; + int gsi, idx; trace_kvm_ack_irq(irqchip, pin); - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); #ifdef __KVM_HAVE_IOAPIC kvm_vcpu_request_scan_ioapic(kvm); #endif @@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0; + int ret = -1, i = 0, idx; struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); if (irq < irq_rt->nr_rt_entries) hlist_for_each_entry(e, &irq_rt->map[irq], link) irq_set[i++] = *e; - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { int r; @@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm, kvm_irq_routing_update(kvm, new); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu_expedited(&kvm->irq_srcu); new = old; r = 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9f5fab0a4fda..17b107fa7a2b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -456,11 +456,11 @@ static struct kvm *kvm_create_vm(unsigned long type) r = kvm_arch_init_vm(kvm, type); if (r) - goto out_err_nodisable; + goto out_err_no_disable; r = hardware_enable_all(); if (r) - goto out_err_nodisable; + goto out_err_no_disable; #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); @@ -472,10 +472,12 @@ static struct kvm *kvm_create_vm(unsigned long type) r = -ENOMEM; kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!kvm->memslots) - goto out_err_nosrcu; + goto out_err_no_srcu; kvm_init_memslots_id(kvm); if (init_srcu_struct(&kvm->srcu)) - goto out_err_nosrcu; + goto out_err_no_srcu; + if (init_srcu_struct(&kvm->irq_srcu)) + goto out_err_no_irq_srcu; for (i = 0; i < KVM_NR_BUSES; i++) { kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); @@ -504,10 +506,12 @@ static struct kvm *kvm_create_vm(unsigned long type) return kvm; out_err: + cleanup_srcu_struct(&kvm->irq_srcu); +out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); -out_err_nosrcu: +out_err_no_srcu: hardware_disable_all(); -out_err_nodisable: +out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm->buses[i]); kfree(kvm->memslots); From ccc916d9aac9d95baab922ab859f0ef37bb22558 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 2 May 2014 16:24:10 +0100 Subject: [PATCH 177/287] arm64: barriers: make use of barrier options with explicit barriers When calling our low-level barrier macros directly, we can often suffice with more relaxed behaviour than the default "all accesses, full system" option. This patch updates the users of dsb() to specify the option which they actually require. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 98f7685ee69f871ba991089cb9685f0da07517ea) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 03244582bc55..c59a1bdab5eb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -71,13 +71,13 @@ static u32 get_ccsidr(u32 csselr) static void do_dc_cisw(u32 val) { asm volatile("dc cisw, %x0" : : "r" (val)); - dsb(); + dsb(ish); } static void do_dc_csw(u32 val) { asm volatile("dc csw, %x0" : : "r" (val)); - dsb(); + dsb(ish); } /* See note at ARM ARM B1.14.4 */ From 17acc52b78027a728c419a0d7b4d69d30b94294a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 2 May 2014 16:24:14 +0100 Subject: [PATCH 178/287] arm64: kvm: use inner-shareable barriers for inner-shareable maintenance In order to ensure completion of inner-shareable maintenance instructions (cache and TLB) on AArch64, we can use the -ish suffix to the dsb instruction. This patch relaxes our dsb sy instructions to dsb ish where possible. Acked-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit ee9e101c11478680d579bd20bb38a4d3e2514fe3) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 2c56012cb2d2..b0d1512acf08 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -630,9 +630,15 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) * whole of Stage-1. Weep... */ tlbi ipas2e1is, x1 - dsb sy + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb ish tlbi vmalle1is - dsb sy + dsb ish isb msr vttbr_el2, xzr @@ -643,7 +649,7 @@ ENTRY(__kvm_flush_vm_context) dsb ishst tlbi alle1is ic ialluis - dsb sy + dsb ish ret ENDPROC(__kvm_flush_vm_context) From 502f5822c7ac7b11040d4abb7ca1d09d56de251c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 May 2014 18:06:03 +0100 Subject: [PATCH 179/287] arm64: KVM: Enable minimalistic support for Cortex-A53 In order to allow KVM to run on Cortex-A53 implementations, wire the minimal support required. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 1252b3313642c3d0dff5b951b625468bf0dcd059) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/cputype.h | 1 + arch/arm64/include/uapi/asm/kvm.h | 3 ++- arch/arm64/kvm/guest.c | 2 ++ arch/arm64/kvm/sys_regs_generic_v8.c | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5fe138e0b828..343f7f737970 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -41,6 +41,7 @@ #define ARM_CPU_PART_AEM_V8 0xD0F0 #define ARM_CPU_PART_FOUNDATION 0xD000 +#define ARM_CPU_PART_CORTEX_A53 0xD030 #define ARM_CPU_PART_CORTEX_A57 0xD070 #define APM_CPU_PART_POTENZA 0x0000 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index e6471daf3fb5..e633ff8cdec8 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -57,8 +57,9 @@ struct kvm_regs { #define KVM_ARM_TARGET_FOUNDATION_V8 1 #define KVM_ARM_TARGET_CORTEX_A57 2 #define KVM_ARM_TARGET_XGENE_POTENZA 3 +#define KVM_ARM_TARGET_CORTEX_A53 4 -#define KVM_ARM_NUM_TARGETS 4 +#define KVM_ARM_NUM_TARGETS 5 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 08745578d54d..60b5c31f3c10 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -214,6 +214,8 @@ int __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_AEM_V8; case ARM_CPU_PART_FOUNDATION: return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A53: + return KVM_ARM_TARGET_CORTEX_A53; case ARM_CPU_PART_CORTEX_A57: return KVM_ARM_TARGET_CORTEX_A57; }; diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 8fe6f76b0edc..475fd2929310 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -88,6 +88,8 @@ static int __init sys_reg_genericv8_init(void) &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53, + &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, From c3bd5cf97635e0d04a2b0a7025d4a7512a6e6345 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 3 Jun 2014 13:44:17 +0200 Subject: [PATCH 180/287] KVM: add missing cleanup_srcu_struct Reported-by: hrg Signed-off-by: Paolo Bonzini (cherry picked from commit 820b3fcdeb80d30410f4427d2cbf9161c35fdeef) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 17b107fa7a2b..3db56912caed 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -607,6 +607,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); kvm_free_physmem(kvm); + cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); hardware_disable_all(); From 892f5c6841880062cbe947d08131c2c55613ae94 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 May 2014 16:05:13 +0200 Subject: [PATCH 181/287] KVM: prepare for KVM_(S|G)ET_MP_STATE on other architectures Highlight the aspects of the ioctls that are actually specific to x86 and ia64. As defined restrictions (irqchip) and mp states may not apply to other architectures, these parts are flagged to belong to x86 and ia64. In preparation for the use of KVM_(S|G)ET_MP_STATE by s390. Fix a spelling error (KVM_SET_MP_STATE vs. KVM_SET_MPSTATE) on the way. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger (cherry picked from commit 0b4820d6d8b6448bc9f7fac1bb1a801a53b425e1) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 21 ++++++++++++--------- include/uapi/linux/kvm.h | 3 ++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 48e7888291f5..8d135672b69a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -969,18 +969,20 @@ uniprocessor guests). Possible values are: - - KVM_MP_STATE_RUNNABLE: the vcpu is currently running + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86, ia64] - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) - which has not yet received an INIT signal + which has not yet received an INIT signal [x86, + ia64] - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is - now ready for a SIPI + now ready for a SIPI [x86, ia64] - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and - is waiting for an interrupt + is waiting for an interrupt [x86, ia64] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector - accessible via KVM_GET_VCPU_EVENTS) + accessible via KVM_GET_VCPU_EVENTS) [x86, ia64] -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. +On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +in-kernel irqchip, the multiprocessing state must be maintained by userspace on +these architectures. 4.39 KVM_SET_MP_STATE @@ -994,8 +996,9 @@ Returns: 0 on success; -1 on error Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for arguments. -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. +On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +in-kernel irqchip, the multiprocessing state must be maintained by userspace on +these architectures. 4.40 KVM_SET_IDENTITY_MAP_ADDR diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0a70874e4a63..521e4c0a08ac 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -399,8 +399,9 @@ struct kvm_vapic_addr { __u64 vapic_addr; }; -/* for KVM_SET_MPSTATE */ +/* for KVM_SET_MP_STATE */ +/* not all states are valid on all architectures */ #define KVM_MP_STATE_RUNNABLE 0 #define KVM_MP_STATE_UNINITIALIZED 1 #define KVM_MP_STATE_INIT_RECEIVED 2 From bdcfc46a6f2760588d3e59e2ff7abf235a2a513a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 9 May 2014 23:31:31 +0200 Subject: [PATCH 182/287] arm/arm64: KVM: Fix and refactor unmap_range unmap_range() was utterly broken, to quote Marc, and broke in all sorts of situations. It was also quite complicated to follow and didn't follow the usual scheme of having a separate iterating function for each level of page tables. Address this by refactoring the code and introduce a pgd_clear() function. Reviewed-by: Jungseok Lee Reviewed-by: Mario Smarduch Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 4f853a714bf16338ff5261128e6c7ae2569e9505) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 12 +++ arch/arm/kvm/mmu.c | 165 ++++++++++++++++--------------- arch/arm64/include/asm/kvm_mmu.h | 15 +++ 3 files changed, 115 insertions(+), 77 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5c7aa3c1519f..5cc0b0f5f72f 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -127,6 +127,18 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#define kvm_pud_table_empty(pudp) (0) + + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16f804938b8f..23360610aeac 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -90,104 +90,115 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static bool page_empty(void *ptr) +static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { - struct page *ptr_page = virt_to_page(ptr); - return page_count(ptr_page) == 1; + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); + pgd_clear(pgd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); } static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - if (pud_huge(*pud)) { - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } else { - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); - } + pmd_t *pmd_table = pmd_offset(pud, 0); + VM_BUG_ON(pud_huge(*pud)); + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pmd_free(NULL, pmd_table); put_page(virt_to_page(pud)); } static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { - if (kvm_pmd_huge(*pmd)) { - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } else { - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pte_free_kernel(NULL, pte_table); - } + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(kvm_pmd_huge(*pmd)); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); put_page(virt_to_page(pmd)); } -static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) +static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) { - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } + phys_addr_t start_addr = addr; + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (kvm_pte_table_empty(start_pte)) + clear_pmd_entry(kvm, pmd, start_addr); } -static void unmap_range(struct kvm *kvm, pgd_t *pgdp, - unsigned long long start, u64 size) +static void unmap_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long long addr = start, end = start + size; - u64 next; + phys_addr_t next, start_addr = addr; + pmd_t *pmd, *start_pmd; - while (addr < end) { - pgd = pgdp + pgd_index(addr); - pud = pud_offset(pgd, addr); - pte = NULL; - if (pud_none(*pud)) { - addr = kvm_pud_addr_end(addr, end); - continue; - } - - if (pud_huge(*pud)) { - /* - * If we are dealing with a huge pud, just clear it and - * move on. - */ - clear_pud_entry(kvm, pud, addr); - addr = kvm_pud_addr_end(addr, end); - continue; - } - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr = kvm_pmd_addr_end(addr, end); - continue; - } - - if (!kvm_pmd_huge(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - next = addr + PAGE_SIZE; - } - - /* - * If the pmd entry is to be cleared, walk back up the ladder - */ - if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) { - clear_pmd_entry(kvm, pmd, addr); - next = kvm_pmd_addr_end(addr, end); - if (page_empty(pmd) && !page_empty(pud)) { - clear_pud_entry(kvm, pud, addr); - next = kvm_pud_addr_end(addr, end); + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pmd)); + } else { + unmap_ptes(kvm, pmd, addr, next); } } + } while (pmd++, addr = next, addr != end); - addr = next; - } + if (kvm_pmd_table_empty(start_pmd)) + clear_pud_entry(kvm, pud, start_addr); +} + +static void unmap_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next, start_addr = addr; + pud_t *pud, *start_pud; + + start_pud = pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + if (pud_huge(*pud)) { + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pud)); + } else { + unmap_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); + + if (kvm_pud_table_empty(start_pud)) + clear_pgd_entry(kvm, pgd, start_addr); +} + + +static void unmap_range(struct kvm *kvm, pgd_t *pgdp, + phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + pgd = pgdp + pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + unmap_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); } static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7d29847a893b..8e138c7c53ac 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -125,6 +125,21 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#ifndef CONFIG_ARM64_64K_PAGES +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#else +#define kvm_pmd_table_empty(pmdp) (0) +#endif +#define kvm_pud_table_empty(pudp) (0) + + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) From 332075bc83a401bc1a4deb1caeea1d45bd340866 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 6 Jun 2014 11:10:23 +0200 Subject: [PATCH 183/287] ARM: KVM: Unmap IPA on memslot delete/move Currently when a KVM region is deleted or moved after KVM_SET_USER_MEMORY_REGION ioctl, the corresponding intermediate physical memory is not unmapped. This patch corrects this and unmaps the region's IPA range in kvm_arch_commit_memory_region using unmap_stage2_range. Signed-off-by: Eric Auger Signed-off-by: Christoffer Dall (cherry picked from commit df6ce24f2ee485c4f9a5cb610063a5eb60da8267) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 37 ------------------------------------- arch/arm/kvm/mmu.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 354dc42b6395..e2c2bfd4da95 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -155,16 +155,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} /** * kvm_arch_destroy_vm - destroy the VM data structure @@ -225,33 +215,6 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - -int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, - enum kvm_mr_change change) -{ - return 0; -} - -void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) -{ -} - -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 23360610aeac..b2a708be1407 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1111,3 +1111,49 @@ int kvm_mmu_init(void) free_hyp_pgds(); return err; } + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) +{ + gpa_t gpa = old->base_gfn << PAGE_SHIFT; + phys_addr_t size = old->npages << PAGE_SHIFT; + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { + spin_lock(&kvm->mmu_lock); + unmap_stage2_range(kvm, gpa, size); + spin_unlock(&kvm->mmu_lock); + } +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + return 0; +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} From c2ca19e2dc3f71d003a4ab691462d1df6974d00f Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 26 Jun 2014 01:45:51 +0100 Subject: [PATCH 184/287] ARM: KVM: user_mem_abort: support stage 2 MMIO page mapping A userspace process can map device MMIO memory via VFIO or /dev/mem, e.g., for platform device passthrough support in QEMU. During early development, we found the PAGE_S2 memory type being used for MMIO mappings. This patch corrects that by using the more strongly ordered memory type for device MMIO mappings: PAGE_S2_DEVICE. Signed-off-by: Kim Phillips Acked-by: Christoffer Dall Acked-by: Will Deacon Signed-off-by: Marc Zyngier (cherry picked from commit b88657674d39fc2127d62d0de9ca142e166443c8) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b2a708be1407..16e7994bf347 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -759,6 +759,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; pfn_t pfn; + pgprot_t mem_type = PAGE_S2; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { @@ -809,6 +810,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_error_pfn(pfn)) return -EFAULT; + if (kvm_is_mmio_pfn(pfn)) + mem_type = PAGE_S2_DEVICE; + spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; @@ -816,7 +820,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); if (hugetlb) { - pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); + pmd_t new_pmd = pfn_pmd(pfn, mem_type); new_pmd = pmd_mkhuge(new_pmd); if (writable) { kvm_set_s2pmd_writable(&new_pmd); @@ -825,13 +829,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { - pte_t new_pte = pfn_pte(pfn, PAGE_S2); + pte_t new_pte = pfn_pte(pfn, mem_type); if (writable) { kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, + mem_type == PAGE_S2_DEVICE); } From 896f51b90443e91d7358914df4a24d9c152c2b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Tue, 1 Jul 2014 16:53:13 +0100 Subject: [PATCH 185/287] arm64: KVM: export demux regids as KVM_REG_ARM64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I suspect this is a -ECUTPASTE fault from the initial implementation. If we don't declare the register ID to be KVM_REG_ARM64 the KVM_GET_ONE_REG implementation kvm_arm_get_reg() returns -EINVAL and hilarity ensues. The kvm/api.txt document describes all arm64 registers as starting with 0x60xx... (i.e KVM_REG_ARM64). Signed-off-by: Alex Bennée Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit efd48ceacea78e4d4656aa0a6bf4c5b92ed22130) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c59a1bdab5eb..34f25a590bd7 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -962,7 +962,7 @@ static unsigned int num_demux_regs(void) static int write_demux_regids(u64 __user *uindices) { - u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; + u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; unsigned int i; val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; From c675c7cef37bad776609e6923fe64019c10e2945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 4 Jul 2014 15:54:14 +0100 Subject: [PATCH 186/287] arm64: KVM: allow export and import of generic timer regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For correct guest suspend/resume behaviour we need to ensure we include the generic timer registers for 64 bit guests. As CONFIG_KVM_ARM_TIMER is always set for arm64 we don't need to worry about null implementations. However I have re-jigged the kvm_arm_timer_set/get_reg declarations to be in the common include/kvm/arm_arch_timer.h headers. Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall (cherry picked from commit 1df08ba0aa95f1a8832b7162eec51069bd9be7ae) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 3 -- arch/arm/kvm/guest.c | 10 ----- arch/arm64/kvm/guest.c | 68 ++++++++++++++++++++++++++++++++- include/kvm/arm_arch_timer.h | 14 +++++++ 4 files changed, 81 insertions(+), 14 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 193ceaf01bfd..dc4e3edf39cc 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -228,7 +228,4 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); -int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); - #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index b23a59c1c522..986e625b5dbd 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -124,16 +124,6 @@ static bool is_timer_reg(u64 index) return false; } -int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - return 0; -} - -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - return 0; -} - #else #define NUM_TIMER_REGS 3 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 60b5c31f3c10..8d1ec2887a26 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -135,6 +135,59 @@ static unsigned long num_core_regs(void) return sizeof(struct kvm_regs) / sizeof(__u32); } +/** + * ARM64 versions of the TIMER registers, always available on arm64 + */ + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + /** * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG * @@ -142,7 +195,8 @@ static unsigned long num_core_regs(void) */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu); + return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) + + NUM_TIMER_REGS; } /** @@ -154,6 +208,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { if (put_user(core_reg | i, uindices)) @@ -161,6 +216,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -174,6 +234,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -187,6 +250,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + return kvm_arm_sys_reg_set_reg(vcpu, reg); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 6d9aeddc09bf..ad9db6045b2f 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -67,6 +67,10 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); +int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); + #else static inline int kvm_timer_hyp_init(void) { @@ -84,6 +88,16 @@ static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} + +static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + return 0; +} + +static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + return 0; +} #endif #endif From 7f124577d6247ae088a0acb0762297a5bdaeb9a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 May 2013 10:20:36 +0100 Subject: [PATCH 187/287] KVM: arm/arm64: vgic: move GICv2 registers to their own structure In order to make way for the GICv3 registers, move the v2-specific registers to their own structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit eede821dbfd58df89edb072da64e006321eaef58) Signed-off-by: Christoffer Dall --- arch/arm/kernel/asm-offsets.c | 14 ++++----- arch/arm/kvm/interrupts_head.S | 26 +++++++-------- arch/arm64/kernel/asm-offsets.c | 14 ++++----- arch/arm64/kvm/hyp.S | 26 +++++++-------- include/kvm/arm_vgic.h | 20 +++++++----- virt/kvm/arm/vgic.c | 56 ++++++++++++++++----------------- 6 files changed, 81 insertions(+), 75 deletions(-) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index aa2acc1dd986..776d9186e9c1 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -176,13 +176,13 @@ int main(void) DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); #ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); - DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); - DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); - DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); - DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); - DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); - DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); + DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); + DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); + DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); + DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); + DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); + DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); #ifdef CONFIG_KVM_ARM_TIMER DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 76af93025574..e4eaf30205c5 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -421,14 +421,14 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r9, [r2, #GICH_ELRSR1] ldr r10, [r2, #GICH_APR] - str r3, [r11, #VGIC_CPU_HCR] - str r4, [r11, #VGIC_CPU_VMCR] - str r5, [r11, #VGIC_CPU_MISR] - str r6, [r11, #VGIC_CPU_EISR] - str r7, [r11, #(VGIC_CPU_EISR + 4)] - str r8, [r11, #VGIC_CPU_ELRSR] - str r9, [r11, #(VGIC_CPU_ELRSR + 4)] - str r10, [r11, #VGIC_CPU_APR] + str r3, [r11, #VGIC_V2_CPU_HCR] + str r4, [r11, #VGIC_V2_CPU_VMCR] + str r5, [r11, #VGIC_V2_CPU_MISR] + str r6, [r11, #VGIC_V2_CPU_EISR] + str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] + str r8, [r11, #VGIC_V2_CPU_ELRSR] + str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] + str r10, [r11, #VGIC_V2_CPU_APR] /* Clear GICH_HCR */ mov r5, #0 @@ -436,7 +436,7 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Save list registers */ add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_CPU_LR + add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r2], #4 str r6, [r3], #4 @@ -463,9 +463,9 @@ vcpu .req r0 @ vcpu pointer always in r0 add r11, vcpu, #VCPU_VGIC_CPU /* We only restore a minimal set of registers */ - ldr r3, [r11, #VGIC_CPU_HCR] - ldr r4, [r11, #VGIC_CPU_VMCR] - ldr r8, [r11, #VGIC_CPU_APR] + ldr r3, [r11, #VGIC_V2_CPU_HCR] + ldr r4, [r11, #VGIC_V2_CPU_VMCR] + ldr r8, [r11, #VGIC_V2_CPU_APR] str r3, [r2, #GICH_HCR] str r4, [r2, #GICH_VMCR] @@ -473,7 +473,7 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Restore list registers */ add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_CPU_LR + add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r3], #4 str r6, [r2], #4 diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 666e231d410b..dcfd8a616a94 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -127,13 +127,13 @@ int main(void) DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); - DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); - DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); - DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); - DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); - DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); - DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); + DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); + DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); + DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); + DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); + DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); + DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index b0d1512acf08..877d82a134bc 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -412,14 +412,14 @@ CPU_BE( rev w9, w9 ) CPU_BE( rev w10, w10 ) CPU_BE( rev w11, w11 ) - str w4, [x3, #VGIC_CPU_HCR] - str w5, [x3, #VGIC_CPU_VMCR] - str w6, [x3, #VGIC_CPU_MISR] - str w7, [x3, #VGIC_CPU_EISR] - str w8, [x3, #(VGIC_CPU_EISR + 4)] - str w9, [x3, #VGIC_CPU_ELRSR] - str w10, [x3, #(VGIC_CPU_ELRSR + 4)] - str w11, [x3, #VGIC_CPU_APR] + str w4, [x3, #VGIC_V2_CPU_HCR] + str w5, [x3, #VGIC_V2_CPU_VMCR] + str w6, [x3, #VGIC_V2_CPU_MISR] + str w7, [x3, #VGIC_V2_CPU_EISR] + str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] + str w9, [x3, #VGIC_V2_CPU_ELRSR] + str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_V2_CPU_APR] /* Clear GICH_HCR */ str wzr, [x2, #GICH_HCR] @@ -427,7 +427,7 @@ CPU_BE( rev w11, w11 ) /* Save list registers */ add x2, x2, #GICH_LR0 ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_CPU_LR + add x3, x3, #VGIC_V2_CPU_LR 1: ldr w5, [x2], #4 CPU_BE( rev w5, w5 ) str w5, [x3], #4 @@ -452,9 +452,9 @@ CPU_BE( rev w5, w5 ) add x3, x0, #VCPU_VGIC_CPU /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_CPU_HCR] - ldr w5, [x3, #VGIC_CPU_VMCR] - ldr w6, [x3, #VGIC_CPU_APR] + ldr w4, [x3, #VGIC_V2_CPU_HCR] + ldr w5, [x3, #VGIC_V2_CPU_VMCR] + ldr w6, [x3, #VGIC_V2_CPU_APR] CPU_BE( rev w4, w4 ) CPU_BE( rev w5, w5 ) CPU_BE( rev w6, w6 ) @@ -466,7 +466,7 @@ CPU_BE( rev w6, w6 ) /* Restore list registers */ add x2, x2, #GICH_LR0 ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_CPU_LR + add x3, x3, #VGIC_V2_CPU_LR 1: ldr w5, [x3], #4 CPU_BE( rev w5, w5 ) str w5, [x2], #4 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f27000f55a83..f738e5a69ee9 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -110,6 +110,16 @@ struct vgic_dist { #endif }; +struct vgic_v2_cpu_if { + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr[2]; /* Saved only */ + u32 vgic_elrsr[2]; /* Saved only */ + u32 vgic_apr; + u32 vgic_lr[VGIC_MAX_LRS]; +}; + struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ @@ -126,13 +136,9 @@ struct vgic_cpu { int nr_lr; /* CPU vif control registers for world switch */ - u32 vgic_hcr; - u32 vgic_vmcr; - u32 vgic_misr; /* Saved only */ - u32 vgic_eisr[2]; /* Saved only */ - u32 vgic_elrsr[2]; /* Saved only */ - u32 vgic_apr; - u32 vgic_lr[VGIC_MAX_LRS]; + union { + struct vgic_v2_cpu_if vgic_v2; + }; #endif }; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 56ff9bebb577..0ba1ab0721fd 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -601,7 +601,7 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) { clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE; + vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE; vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; } @@ -626,7 +626,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) u32 *lr; for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - lr = &vgic_cpu->vgic_lr[i]; + lr = &vgic_cpu->vgic_v2.vgic_lr[i]; irq = LR_IRQID(*lr); source_cpu = LR_CPUID(*lr); @@ -1007,7 +1007,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int lr; for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; if (!vgic_irq_is_enabled(vcpu, irq)) { vgic_retire_lr(lr, irq, vgic_cpu); @@ -1037,11 +1037,11 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Do we have an active interrupt for the same CPUID? */ if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { + (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) { kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_lr[lr]); + lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; + vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT; return true; } @@ -1052,12 +1052,12 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); + vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; + vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI; return true; } @@ -1155,9 +1155,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) epilog: if (overflow) { - vgic_cpu->vgic_hcr |= GICH_HCR_UIE; + vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE; } else { - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; /* * We're about to run this VCPU, and we've consumed * everything the distributor had in store for @@ -1173,21 +1173,21 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; bool level_pending = false; - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr); - if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { + if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) { /* * Some level interrupts have been EOIed. Clear their * active bit. */ int lr, irq; - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; + vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI; /* Any additional pending interrupt? */ if (vgic_dist_irq_is_pending(vcpu, irq)) { @@ -1201,13 +1201,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Despite being EOIed, the LR may not have * been marked as empty. */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; + set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); + vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } - if (vgic_cpu->vgic_misr & GICH_MISR_U) - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U) + vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; return level_pending; } @@ -1226,21 +1226,21 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) level_pending = vgic_process_maintenance(vcpu); /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr) { int irq; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) continue; - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; BUG_ON(irq >= VGIC_NR_IRQS); vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, + pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr); if (level_pending || pending < vgic_cpu->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); @@ -1436,10 +1436,10 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * points to their reset values. Anything else resets to zero * anyway. */ - vgic_cpu->vgic_vmcr = 0; + vgic_cpu->vgic_v2.vgic_vmcr = 0; vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ return 0; } @@ -1746,15 +1746,15 @@ static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, } if (!mmio->is_write) { - reg = (vgic_cpu->vgic_vmcr & mask) >> shift; + reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift; mmio_data_write(mmio, ~0, reg); } else { reg = mmio_data_read(mmio, ~0); reg = (reg << shift) & mask; - if (reg != (vgic_cpu->vgic_vmcr & mask)) + if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask)) updated = true; - vgic_cpu->vgic_vmcr &= ~mask; - vgic_cpu->vgic_vmcr |= reg; + vgic_cpu->vgic_v2.vgic_vmcr &= ~mask; + vgic_cpu->vgic_v2.vgic_vmcr |= reg; } return updated; } From 9f92df7009267f1a105e1338def7e99da648417c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 3 Jun 2013 15:55:02 +0100 Subject: [PATCH 188/287] KVM: ARM: vgic: introduce vgic_ops and LR manipulation primitives In order to split the various register manipulation from the main vgic code, introduce a vgic_ops structure, and start by abstracting the LR manipulation code with a couple of accessors. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 8d5c6b06a5d5f8ebcf40558e566781d572920740) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 18 +++++ virt/kvm/arm/vgic.c | 162 ++++++++++++++++++++++++++++------------- 2 files changed, 128 insertions(+), 52 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f738e5a69ee9..17bbe51b79a1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -68,6 +68,24 @@ struct vgic_bytemap { u32 shared[VGIC_NR_SHARED_IRQS / 4]; }; +struct kvm_vcpu; + +#define LR_STATE_PENDING (1 << 0) +#define LR_STATE_ACTIVE (1 << 1) +#define LR_STATE_MASK (3 << 0) +#define LR_EOI_INT (1 << 2) + +struct vgic_lr { + u16 irq; + u8 source; + u8 state; +}; + +struct vgic_ops { + struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); + void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); +}; + struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 0ba1ab0721fd..11408fee600e 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -94,9 +94,12 @@ static struct device_node *vgic_node; #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); +static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); static void vgic_kick_vcpus(struct kvm *kvm); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); +static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); +static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static u32 vgic_nr_lr; static unsigned int vgic_maint_irq; @@ -593,18 +596,6 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) -#define LR_IRQID(lr) \ - ((lr) & GICH_LR_VIRTUALID) - -static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) -{ - clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE; - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; -} - /** * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs @@ -622,13 +613,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int vcpu_id = vcpu->vcpu_id; - int i, irq, source_cpu; - u32 *lr; + int i; for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - lr = &vgic_cpu->vgic_v2.vgic_lr[i]; - irq = LR_IRQID(*lr); - source_cpu = LR_CPUID(*lr); + struct vgic_lr lr = vgic_get_lr(vcpu, i); /* * There are three options for the state bits: @@ -640,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * If the LR holds only an active interrupt (not pending) then * just leave it alone. */ - if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT) + if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE) continue; /* @@ -649,18 +637,19 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set(vcpu, irq); - if (irq < VGIC_NR_SGIS) - dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu; - *lr &= ~GICH_LR_PENDING_BIT; + vgic_dist_irq_set(vcpu, lr.irq); + if (lr.irq < VGIC_NR_SGIS) + dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; + lr.state &= ~LR_STATE_PENDING; + vgic_set_lr(vcpu, i, lr); /* * If there's no state left on the LR (it could still be * active), then the LR does not hold any useful info and can * be marked as free for other use. */ - if (!(*lr & GICH_LR_STATE)) - vgic_retire_lr(i, irq, vgic_cpu); + if (!(lr.state & LR_STATE_MASK)) + vgic_retire_lr(i, lr.irq, vcpu); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -989,8 +978,69 @@ static void vgic_update_state(struct kvm *kvm) } } -#define MK_LR_PEND(src, irq) \ - (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) +static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & GICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & GICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & GICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= GICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= GICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= GICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; +} + +static const struct vgic_ops vgic_ops = { + .get_lr = vgic_v2_get_lr, + .set_lr = vgic_v2_set_lr, +}; + +static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + return vgic_ops.get_lr(vcpu, lr); +} + +static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr vlr) +{ + vgic_ops.set_lr(vcpu, lr, vlr); +} + +static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); + + vlr.state = 0; + vgic_set_lr(vcpu, lr_nr, vlr); + clear_bit(lr_nr, vgic_cpu->lr_used); + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; +} /* * An interrupt may have been disabled after being made pending on the @@ -1007,12 +1057,12 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int lr; for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_retire_lr(lr, irq, vgic_cpu); - if (vgic_irq_is_active(vcpu, irq)) - vgic_irq_clear_active(vcpu, irq); + if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { + vgic_retire_lr(lr, vlr.irq, vcpu); + if (vgic_irq_is_active(vcpu, vlr.irq)) + vgic_irq_clear_active(vcpu, vlr.irq); } } } @@ -1024,6 +1074,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_lr vlr; int lr; /* Sanitize the input... */ @@ -1036,13 +1087,15 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) lr = vgic_cpu->vgic_irq_lr_map[irq]; /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) { - kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT; - return true; + if (lr != LR_EMPTY) { + vlr = vgic_get_lr(vcpu, lr); + if (vlr.source == sgi_source_id) { + kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); + BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); + vlr.state |= LR_STATE_PENDING; + vgic_set_lr(vcpu, lr, vlr); + return true; + } } /* Try to use another LR for this interrupt */ @@ -1052,12 +1105,16 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); + vlr.irq = irq; + vlr.source = sgi_source_id; + vlr.state = LR_STATE_PENDING; if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI; + vlr.state |= LR_EOI_INT; + + vgic_set_lr(vcpu, lr, vlr); return true; } @@ -1180,21 +1237,23 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Some level interrupts have been EOIed. Clear their * active bit. */ - int lr, irq; + int lr; for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI; + vgic_irq_clear_active(vcpu, vlr.irq); + WARN_ON(vlr.state & LR_STATE_MASK); + vlr.state = 0; + vgic_set_lr(vcpu, lr, vlr); /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, irq)) { - vgic_cpu_irq_set(vcpu, irq); + if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); level_pending = true; } else { - vgic_cpu_irq_clear(vcpu, irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); } /* @@ -1202,7 +1261,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * been marked as empty. */ set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); - vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } @@ -1228,15 +1286,15 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Clear mappings for empty LRs */ for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr) { - int irq; + struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) continue; - irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + vlr = vgic_get_lr(vcpu, lr); - BUG_ON(irq >= VGIC_NR_IRQS); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + BUG_ON(vlr.irq >= VGIC_NR_IRQS); + vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ From 32d98f25d20daedfa49c32a73e57630bdc0e74a0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 10:29:39 +0100 Subject: [PATCH 189/287] KVM: ARM: vgic: abstract access to the ELRSR bitmap Move the GICH_ELRSR access to its own functions, and add them to the vgic_ops structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 69bb2c9fbc11d9d4358fbb798db15c9092eb4d8c) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 ++ virt/kvm/arm/vgic.c | 46 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 17bbe51b79a1..38864f5e47bc 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -84,6 +84,8 @@ struct vgic_lr { struct vgic_ops { struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); + void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); + u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 11408fee600e..6dcc974fa2b4 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1015,9 +1015,32 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; } +static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); +} + +static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; +#endif + return val; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, + .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, + .get_elrsr = vgic_v2_get_elrsr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1031,6 +1054,17 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, vgic_ops.set_lr(vcpu, lr, vlr); } +static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr vlr) +{ + vgic_ops.sync_lr_elrsr(vcpu, lr, vlr); +} + +static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_elrsr(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1260,7 +1294,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Despite being EOIed, the LR may not have * been marked as empty. */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); + vgic_sync_lr_elrsr(vcpu, lr, vlr); } } @@ -1278,14 +1312,17 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u64 elrsr; + unsigned long *elrsr_ptr; int lr, pending; bool level_pending; level_pending = vgic_process_maintenance(vcpu); + elrsr = vgic_get_elrsr(vcpu); + elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, - vgic_cpu->nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1298,8 +1335,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, - vgic_cpu->nr_lr); + pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr); if (level_pending || pending < vgic_cpu->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } From b874bb5cb4795de150d1157a0cf10138122cc093 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 10:33:43 +0100 Subject: [PATCH 190/287] KVM: ARM: vgic: abstract EISR bitmap access Move the GICH_EISR access to its own function. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 8d6a0313c125c3c7b208b75695fe6ab00afab4c5) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/vgic.c | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 38864f5e47bc..ccb9b59818f4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -86,6 +86,7 @@ struct vgic_ops { void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); + u64 (*get_eisr)(const struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 6dcc974fa2b4..1e857e6e66b5 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1036,11 +1036,26 @@ static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) return val; } +static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; +#endif + return val; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, + .get_eisr = vgic_v2_get_eisr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1065,6 +1080,11 @@ static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) return vgic_ops.get_elrsr(vcpu); } +static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_eisr(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1271,10 +1291,11 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Some level interrupts have been EOIed. Clear their * active bit. */ + u64 eisr = vgic_get_eisr(vcpu); + unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, - vgic_cpu->nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); From 3f79fb5a583821ca4c97d1b694285d36697e69c9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:02:10 +0100 Subject: [PATCH 191/287] KVM: ARM: vgic: abstract MISR decoding Instead of directly dealing with the GICH_MISR bits, move the code to its own function and use a couple of public flags to represent the actual state. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 495dd859f304689a7cd5ef413c439cb090dc25e6) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 4 ++++ virt/kvm/arm/vgic.c | 26 +++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ccb9b59818f4..4857508b12e7 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -87,6 +87,7 @@ struct vgic_ops { void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); + u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); }; struct vgic_dist { @@ -165,6 +166,9 @@ struct vgic_cpu { #define LR_EMPTY 0xff +#define INT_STATUS_EOI (1 << 0) +#define INT_STATUS_UNDERFLOW (1 << 1) + struct kvm; struct kvm_vcpu; struct kvm_run; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1e857e6e66b5..c0bcc9735424 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1050,12 +1050,26 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) return val; } +static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; + u32 ret = 0; + + if (misr & GICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & GICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, + .get_interrupt_status = vgic_v2_get_interrupt_status, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1085,6 +1099,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) return vgic_ops.get_eisr(vcpu); } +static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_interrupt_status(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1282,11 +1301,12 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u32 status = vgic_get_interrupt_status(vcpu); bool level_pending = false; - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr); + kvm_debug("STATUS = %08x\n", status); - if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) { + if (status & INT_STATUS_EOI) { /* * Some level interrupts have been EOIed. Clear their * active bit. @@ -1319,7 +1339,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } } - if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U) + if (status & INT_STATUS_UNDERFLOW) vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; return level_pending; From 900f2c6eeaaa0daf752a06d654fed8d98d626f9f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:24:17 +0100 Subject: [PATCH 192/287] KVM: ARM: vgic: move underflow handling to vgic_ops Move the code dealing with LR underflow handling to its own functions, and make them accessible through vgic_ops. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 909d9b5025f149af6cfc304a76ad6218e6622cc0) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 ++ virt/kvm/arm/vgic.c | 28 +++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4857508b12e7..cdfa5d9567c6 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -88,6 +88,8 @@ struct vgic_ops { u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); + void (*enable_underflow)(struct kvm_vcpu *vcpu); + void (*disable_underflow)(struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c0bcc9735424..6d618e0b08a1 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1063,6 +1063,16 @@ static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) return ret; } +static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; +} + +static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1070,6 +1080,8 @@ static const struct vgic_ops vgic_ops = { .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, .get_interrupt_status = vgic_v2_get_interrupt_status, + .enable_underflow = vgic_v2_enable_underflow, + .disable_underflow = vgic_v2_disable_underflow, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1104,6 +1116,16 @@ static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) return vgic_ops.get_interrupt_status(vcpu); } +static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) +{ + vgic_ops.enable_underflow(vcpu); +} + +static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) +{ + vgic_ops.disable_underflow(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1285,9 +1307,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) epilog: if (overflow) { - vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE; + vgic_enable_underflow(vcpu); } else { - vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; + vgic_disable_underflow(vcpu); /* * We're about to run this VCPU, and we've consumed * everything the distributor had in store for @@ -1340,7 +1362,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } if (status & INT_STATUS_UNDERFLOW) - vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; + vgic_disable_underflow(vcpu); return level_pending; } From 32a0e35ba5210fab6b6379971bba24886b69952a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2014 17:48:10 +0000 Subject: [PATCH 193/287] KVM: ARM: vgic: abstract VMCR access Instead of directly messing with with the GICH_VMCR bits for the CPU interface save/restore code, add accessors that encode/decode the entire set of registers exposed by VMCR. Not the most efficient thing, but given that this code is only used by the save/restore code, performance is far from being critical. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit beee38b9d0c0ea6cf2a7f35c3108f7d8281d4545) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 9 ++++++ virt/kvm/arm/vgic.c | 69 ++++++++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index cdfa5d9567c6..f51580043170 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -81,6 +81,13 @@ struct vgic_lr { u8 state; }; +struct vgic_vmcr { + u32 ctlr; + u32 abpr; + u32 bpr; + u32 pmr; +}; + struct vgic_ops { struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); @@ -90,6 +97,8 @@ struct vgic_ops { u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); void (*enable_underflow)(struct kvm_vcpu *vcpu); void (*disable_underflow)(struct kvm_vcpu *vcpu); + void (*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); + void (*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 6d618e0b08a1..5c706393956d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -100,8 +100,10 @@ static void vgic_kick_vcpus(struct kvm *kvm); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); -static u32 vgic_nr_lr; +static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +static u32 vgic_nr_lr; static unsigned int vgic_maint_irq; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, @@ -1073,6 +1075,28 @@ static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; } +static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + + vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; +} + +static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; + vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1082,6 +1106,8 @@ static const struct vgic_ops vgic_ops = { .get_interrupt_status = vgic_v2_get_interrupt_status, .enable_underflow = vgic_v2_enable_underflow, .disable_underflow = vgic_v2_disable_underflow, + .get_vmcr = vgic_v2_get_vmcr, + .set_vmcr = vgic_v2_set_vmcr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1126,6 +1152,16 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) vgic_ops.disable_underflow(vcpu); } +static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + vgic_ops.get_vmcr(vcpu, vmcr); +} + +static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + vgic_ops.set_vmcr(vcpu, vmcr); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1879,39 +1915,40 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u32 reg, mask = 0, shift = 0; bool updated = false; + struct vgic_vmcr vmcr; + u32 *vmcr_field; + u32 reg; + + vgic_get_vmcr(vcpu, &vmcr); switch (offset & ~0x3) { case GIC_CPU_CTRL: - mask = GICH_VMCR_CTRL_MASK; - shift = GICH_VMCR_CTRL_SHIFT; + vmcr_field = &vmcr.ctlr; break; case GIC_CPU_PRIMASK: - mask = GICH_VMCR_PRIMASK_MASK; - shift = GICH_VMCR_PRIMASK_SHIFT; + vmcr_field = &vmcr.pmr; break; case GIC_CPU_BINPOINT: - mask = GICH_VMCR_BINPOINT_MASK; - shift = GICH_VMCR_BINPOINT_SHIFT; + vmcr_field = &vmcr.bpr; break; case GIC_CPU_ALIAS_BINPOINT: - mask = GICH_VMCR_ALIAS_BINPOINT_MASK; - shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcr_field = &vmcr.abpr; break; + default: + BUG(); } if (!mmio->is_write) { - reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift; + reg = *vmcr_field; mmio_data_write(mmio, ~0, reg); } else { reg = mmio_data_read(mmio, ~0); - reg = (reg << shift) & mask; - if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask)) + if (reg != *vmcr_field) { + *vmcr_field = reg; + vgic_set_vmcr(vcpu, &vmcr); updated = true; - vgic_cpu->vgic_v2.vgic_vmcr &= ~mask; - vgic_cpu->vgic_v2.vgic_vmcr |= reg; + } } return updated; } From 6d6775fa71660fd7c58741881d6bbbfd2be68f3a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:36:38 +0100 Subject: [PATCH 194/287] KVM: ARM: vgic: introduce vgic_enable Move the code dealing with enabling the VGIC on to vgic_ops. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit da8dafd1777cdd93091207952297d221a88e6479) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/vgic.c | 29 +++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f51580043170..2228973ea8e4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -99,6 +99,7 @@ struct vgic_ops { void (*disable_underflow)(struct kvm_vcpu *vcpu); void (*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void (*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); + void (*enable)(struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5c706393956d..70f674bb13a1 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1097,6 +1097,19 @@ static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; } +static void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1108,6 +1121,7 @@ static const struct vgic_ops vgic_ops = { .disable_underflow = vgic_v2_disable_underflow, .get_vmcr = vgic_v2_get_vmcr, .set_vmcr = vgic_v2_set_vmcr, + .enable = vgic_v2_enable, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1162,6 +1176,11 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) vgic_ops.set_vmcr(vcpu, vmcr); } +static inline void vgic_enable(struct kvm_vcpu *vcpu) +{ + vgic_ops.enable(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1624,15 +1643,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; } - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_cpu->vgic_v2.vgic_vmcr = 0; - vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + + vgic_enable(vcpu); return 0; } From 6e707c119719a9261ac7305194c0004d11bb594a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Jun 2013 19:17:28 +0100 Subject: [PATCH 195/287] KVM: ARM: introduce vgic_params structure Move all the data specific to a given GIC implementation into its own little structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit ca85f623e37d096206e092ef037a145a60fa7f85) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 11 +++++++ virt/kvm/arm/vgic.c | 70 ++++++++++++++++++++---------------------- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2228973ea8e4..ce2e14226dbf 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -102,6 +102,17 @@ struct vgic_ops { void (*enable)(struct kvm_vcpu *vcpu); }; +struct vgic_params { + /* Physical address of vgic virtual cpu interface */ + phys_addr_t vcpu_base; + /* Number of list registers */ + u32 nr_lr; + /* Interrupt number */ + unsigned int maint_irq; + /* Virtual control interface base address */ + void __iomem *vctrl_base; +}; + struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 70f674bb13a1..f3a996d0a100 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -76,14 +76,6 @@ #define IMPLEMENTER_ARM 0x43b #define GICC_ARCH_VERSION_V2 0x2 -/* Physical address of vgic virtual cpu interface */ -static phys_addr_t vgic_vcpu_base; - -/* Virtual control interface base address */ -static void __iomem *vgic_vctrl_base; - -static struct device_node *vgic_node; - #define ACCESS_READ_VALUE (1 << 0) #define ACCESS_READ_RAZ (0 << 0) #define ACCESS_READ_MASK(x) ((x) & (1 << 0)) @@ -103,8 +95,7 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -static u32 vgic_nr_lr; -static unsigned int vgic_maint_irq; +static struct vgic_params vgic; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) @@ -1206,7 +1197,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1250,8 +1241,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Try to use another LR for this interrupt */ lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic_cpu->nr_lr); - if (lr >= vgic_cpu->nr_lr) + vgic.nr_lr); + if (lr >= vgic.nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); @@ -1377,7 +1368,6 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; u32 status = vgic_get_interrupt_status(vcpu); bool level_pending = false; @@ -1392,7 +1382,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); @@ -1440,7 +1430,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1453,8 +1443,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr); - if (level_pending || pending < vgic_cpu->nr_lr) + pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr); + if (level_pending || pending < vgic.nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } @@ -1643,7 +1633,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; } - vgic_cpu->nr_lr = vgic_nr_lr; + /* + * Store the number of LRs per vcpu, so we don't have to go + * all the way to the distributor structure to find out. Only + * assembly code should use this one. + */ + vgic_cpu->nr_lr = vgic.nr_lr; vgic_enable(vcpu); @@ -1652,7 +1647,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) static void vgic_init_maintenance_interrupt(void *info) { - enable_percpu_irq(vgic_maint_irq, 0); + enable_percpu_irq(vgic.maint_irq, 0); } static int vgic_cpu_notify(struct notifier_block *self, @@ -1665,7 +1660,7 @@ static int vgic_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(vgic_maint_irq); + disable_percpu_irq(vgic.maint_irq); break; } @@ -1681,6 +1676,7 @@ int kvm_vgic_hyp_init(void) int ret; struct resource vctrl_res; struct resource vcpu_res; + struct device_node *vgic_node; vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); if (!vgic_node) { @@ -1688,17 +1684,17 @@ int kvm_vgic_hyp_init(void) return -ENODEV; } - vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic_maint_irq) { + vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic.maint_irq) { kvm_err("error getting vgic maintenance irq from DT\n"); ret = -ENXIO; goto out; } - ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, + ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic_maint_irq); + kvm_err("Cannot register interrupt %d\n", vgic.maint_irq); goto out; } @@ -1714,18 +1710,18 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - vgic_vctrl_base = of_iomap(vgic_node, 2); - if (!vgic_vctrl_base) { + vgic.vctrl_base = of_iomap(vgic_node, 2); + if (!vgic.vctrl_base) { kvm_err("Cannot ioremap VCTRL\n"); ret = -ENOMEM; goto out_free_irq; } - vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR); - vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1; + vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR); + vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1; - ret = create_hyp_io_mappings(vgic_vctrl_base, - vgic_vctrl_base + resource_size(&vctrl_res), + ret = create_hyp_io_mappings(vgic.vctrl_base, + vgic.vctrl_base + resource_size(&vctrl_res), vctrl_res.start); if (ret) { kvm_err("Cannot map VCTRL into hyp\n"); @@ -1733,7 +1729,7 @@ int kvm_vgic_hyp_init(void) } kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic_maint_irq); + vctrl_res.start, vgic.maint_irq); on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { @@ -1741,14 +1737,14 @@ int kvm_vgic_hyp_init(void) ret = -ENXIO; goto out_unmap; } - vgic_vcpu_base = vcpu_res.start; + vgic.vcpu_base = vcpu_res.start; goto out; out_unmap: - iounmap(vgic_vctrl_base); + iounmap(vgic.vctrl_base); out_free_irq: - free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus()); + free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus()); out: of_node_put(vgic_node); return ret; @@ -1783,7 +1779,7 @@ int kvm_vgic_init(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE); + vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; @@ -1829,7 +1825,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic_vctrl_base; + kvm->arch.vgic.vctrl_base = vgic.vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; From ab8bb40339927ab4e5875330461a9a9ffaf42a56 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2014 18:13:03 +0000 Subject: [PATCH 196/287] KVM: ARM: vgic: split GICv2 backend from the main vgic code Brutally hack the innocent vgic code, and move the GICv2 specific code to its own file, using vgic_ops and vgic_params as a way to pass information between the two blocks. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 8f186d522c69bb18dd9b93a634da4953228c67d4) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Makefile | 1 + arch/arm64/kvm/Makefile | 2 +- include/kvm/arm_vgic.h | 11 +- virt/kvm/arm/vgic-v2.c | 248 +++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic.c | 267 +++++++--------------------------------- 5 files changed, 304 insertions(+), 225 deletions(-) create mode 100644 virt/kvm/arm/vgic-v2.c diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 789bca9e64a7..f7057ed045b6 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -21,4 +21,5 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 72a9fd583ad3..7e92952d139e 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -19,5 +19,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ce2e14226dbf..d8d52a9ca6a1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -32,7 +32,8 @@ #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) #define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) #define VGIC_MAX_CPUS KVM_MAX_VCPUS -#define VGIC_MAX_LRS (1 << 6) + +#define VGIC_V2_MAX_LRS (1 << 6) /* Sanity checks... */ #if (VGIC_MAX_CPUS > 8) @@ -162,7 +163,7 @@ struct vgic_v2_cpu_if { u32 vgic_eisr[2]; /* Saved only */ u32 vgic_elrsr[2]; /* Saved only */ u32 vgic_apr; - u32 vgic_lr[VGIC_MAX_LRS]; + u32 vgic_lr[VGIC_V2_MAX_LRS]; }; struct vgic_cpu { @@ -175,7 +176,7 @@ struct vgic_cpu { DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); /* Bitmap of used/free list registers */ - DECLARE_BITMAP( lr_used, VGIC_MAX_LRS); + DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); /* Number of list registers on this CPU */ int nr_lr; @@ -214,6 +215,10 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) #define vgic_initialized(k) ((k)->arch.vgic.ready) +int vgic_v2_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params); + #else static inline int kvm_vgic_hyp_init(void) { diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c new file mode 100644 index 000000000000..940418ebd0d0 --- /dev/null +++ b/virt/kvm/arm/vgic-v2.c @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & GICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & GICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & GICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= GICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= GICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= GICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; +} + +static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); +} + +static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; +#endif + return val; +} + +static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; +#endif + return val; +} + +static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; + u32 ret = 0; + + if (misr & GICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & GICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + +static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; +} + +static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; +} + +static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + + vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; +} + +static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; + vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; +} + +static void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + +static const struct vgic_ops vgic_v2_ops = { + .get_lr = vgic_v2_get_lr, + .set_lr = vgic_v2_set_lr, + .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, + .get_elrsr = vgic_v2_get_elrsr, + .get_eisr = vgic_v2_get_eisr, + .get_interrupt_status = vgic_v2_get_interrupt_status, + .enable_underflow = vgic_v2_enable_underflow, + .disable_underflow = vgic_v2_disable_underflow, + .get_vmcr = vgic_v2_get_vmcr, + .set_vmcr = vgic_v2_set_vmcr, + .enable = vgic_v2_enable, +}; + +static struct vgic_params vgic_v2_params; + +/** + * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT + * @node: pointer to the DT node + * @ops: address of a pointer to the GICv2 operations + * @params: address of a pointer to HW-specific parameters + * + * Returns 0 if a GICv2 has been found, with the low level operations + * in *ops and the HW parameters in *params. Returns an error code + * otherwise. + */ +int vgic_v2_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + int ret; + struct resource vctrl_res; + struct resource vcpu_res; + struct vgic_params *vgic = &vgic_v2_params; + + vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic->maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ret = of_address_to_resource(vgic_node, 2, &vctrl_res); + if (ret) { + kvm_err("Cannot obtain GICH resource\n"); + goto out; + } + + vgic->vctrl_base = of_iomap(vgic_node, 2); + if (!vgic->vctrl_base) { + kvm_err("Cannot ioremap GICH\n"); + ret = -ENOMEM; + goto out; + } + + vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR); + vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1; + + ret = create_hyp_io_mappings(vgic->vctrl_base, + vgic->vctrl_base + resource_size(&vctrl_res), + vctrl_res.start); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out_unmap; + } + + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { + kvm_err("Cannot obtain GICV resource\n"); + ret = -ENXIO; + goto out_unmap; + } + vgic->vcpu_base = vcpu_res.start; + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vctrl_res.start, vgic->maint_irq); + + *ops = &vgic_v2_ops; + *params = vgic; + goto out; + +out_unmap: + iounmap(vgic->vctrl_base); +out: + of_node_put(vgic_node); + return ret; +} diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f3a996d0a100..e4b9cbbbee4c 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -95,7 +95,8 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -static struct vgic_params vgic; +static const struct vgic_ops *vgic_ops; +static const struct vgic_params *vgic; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) @@ -971,205 +972,61 @@ static void vgic_update_state(struct kvm *kvm) } } -static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - struct vgic_lr lr_desc; - u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; - - lr_desc.irq = val & GICH_LR_VIRTUALID; - if (lr_desc.irq <= 15) - lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; - else - lr_desc.source = 0; - lr_desc.state = 0; - - if (val & GICH_LR_PENDING_BIT) - lr_desc.state |= LR_STATE_PENDING; - if (val & GICH_LR_ACTIVE_BIT) - lr_desc.state |= LR_STATE_ACTIVE; - if (val & GICH_LR_EOI) - lr_desc.state |= LR_EOI_INT; - - return lr_desc; -} - -static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; - - if (lr_desc.state & LR_STATE_PENDING) - lr_val |= GICH_LR_PENDING_BIT; - if (lr_desc.state & LR_STATE_ACTIVE) - lr_val |= GICH_LR_ACTIVE_BIT; - if (lr_desc.state & LR_EOI_INT) - lr_val |= GICH_LR_EOI; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; -} - -static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - if (!(lr_desc.state & LR_STATE_MASK)) - set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); -} - -static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) -{ - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; -#endif - return val; -} - -static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) -{ - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; -#endif - return val; -} - -static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) -{ - u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; - u32 ret = 0; - - if (misr & GICH_MISR_EOI) - ret |= INT_STATUS_EOI; - if (misr & GICH_MISR_U) - ret |= INT_STATUS_UNDERFLOW; - - return ret; -} - -static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; -} - -static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; -} - -static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; - - vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; - vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; - vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; -} - -static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; - vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; - vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; -} - -static void vgic_v2_enable(struct kvm_vcpu *vcpu) -{ - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - - /* Get the show on the road... */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; -} - -static const struct vgic_ops vgic_ops = { - .get_lr = vgic_v2_get_lr, - .set_lr = vgic_v2_set_lr, - .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, - .get_elrsr = vgic_v2_get_elrsr, - .get_eisr = vgic_v2_get_eisr, - .get_interrupt_status = vgic_v2_get_interrupt_status, - .enable_underflow = vgic_v2_enable_underflow, - .disable_underflow = vgic_v2_disable_underflow, - .get_vmcr = vgic_v2_get_vmcr, - .set_vmcr = vgic_v2_set_vmcr, - .enable = vgic_v2_enable, -}; - static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) { - return vgic_ops.get_lr(vcpu, lr); + return vgic_ops->get_lr(vcpu, lr); } static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { - vgic_ops.set_lr(vcpu, lr, vlr); + vgic_ops->set_lr(vcpu, lr, vlr); } static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { - vgic_ops.sync_lr_elrsr(vcpu, lr, vlr); + vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); } static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) { - return vgic_ops.get_elrsr(vcpu); + return vgic_ops->get_elrsr(vcpu); } static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) { - return vgic_ops.get_eisr(vcpu); + return vgic_ops->get_eisr(vcpu); } static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) { - return vgic_ops.get_interrupt_status(vcpu); + return vgic_ops->get_interrupt_status(vcpu); } static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) { - vgic_ops.enable_underflow(vcpu); + vgic_ops->enable_underflow(vcpu); } static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) { - vgic_ops.disable_underflow(vcpu); + vgic_ops->disable_underflow(vcpu); } static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - vgic_ops.get_vmcr(vcpu, vmcr); + vgic_ops->get_vmcr(vcpu, vmcr); } static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - vgic_ops.set_vmcr(vcpu, vmcr); + vgic_ops->set_vmcr(vcpu, vmcr); } static inline void vgic_enable(struct kvm_vcpu *vcpu) { - vgic_ops.enable(vcpu); + vgic_ops->enable(vcpu); } static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) @@ -1197,7 +1054,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) { + for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1241,8 +1098,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Try to use another LR for this interrupt */ lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic.nr_lr); - if (lr >= vgic.nr_lr) + vgic->nr_lr); + if (lr >= vgic->nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); @@ -1382,7 +1239,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); @@ -1430,7 +1287,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1443,8 +1300,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr); - if (level_pending || pending < vgic.nr_lr) + pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); + if (level_pending || pending < vgic->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } @@ -1638,7 +1495,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * all the way to the distributor structure to find out. Only * assembly code should use this one. */ - vgic_cpu->nr_lr = vgic.nr_lr; + vgic_cpu->nr_lr = vgic->nr_lr; vgic_enable(vcpu); @@ -1647,7 +1504,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) static void vgic_init_maintenance_interrupt(void *info) { - enable_percpu_irq(vgic.maint_irq, 0); + enable_percpu_irq(vgic->maint_irq, 0); } static int vgic_cpu_notify(struct notifier_block *self, @@ -1660,7 +1517,7 @@ static int vgic_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(vgic.maint_irq); + disable_percpu_irq(vgic->maint_irq); break; } @@ -1671,31 +1528,36 @@ static struct notifier_block vgic_cpu_nb = { .notifier_call = vgic_cpu_notify, }; +static const struct of_device_id vgic_ids[] = { + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + {}, +}; + int kvm_vgic_hyp_init(void) { - int ret; - struct resource vctrl_res; - struct resource vcpu_res; + const struct of_device_id *matched_id; + int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); struct device_node *vgic_node; + int ret; - vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); + vgic_node = of_find_matching_node_and_match(NULL, + vgic_ids, &matched_id); if (!vgic_node) { - kvm_err("error: no compatible vgic node in DT\n"); + kvm_err("error: no compatible GIC node found\n"); return -ENODEV; } - vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic.maint_irq) { - kvm_err("error getting vgic maintenance irq from DT\n"); - ret = -ENXIO; - goto out; - } + vgic_probe = matched_id->data; + ret = vgic_probe(vgic_node, &vgic_ops, &vgic); + if (ret) + return ret; - ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler, + ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic.maint_irq); - goto out; + kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); + return ret; } ret = __register_cpu_notifier(&vgic_cpu_nb); @@ -1704,49 +1566,12 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - ret = of_address_to_resource(vgic_node, 2, &vctrl_res); - if (ret) { - kvm_err("Cannot obtain VCTRL resource\n"); - goto out_free_irq; - } - - vgic.vctrl_base = of_iomap(vgic_node, 2); - if (!vgic.vctrl_base) { - kvm_err("Cannot ioremap VCTRL\n"); - ret = -ENOMEM; - goto out_free_irq; - } - - vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR); - vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1; - - ret = create_hyp_io_mappings(vgic.vctrl_base, - vgic.vctrl_base + resource_size(&vctrl_res), - vctrl_res.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out_unmap; - } - - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic.maint_irq); on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { - kvm_err("Cannot obtain VCPU resource\n"); - ret = -ENXIO; - goto out_unmap; - } - vgic.vcpu_base = vcpu_res.start; + return 0; - goto out; - -out_unmap: - iounmap(vgic.vctrl_base); out_free_irq: - free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus()); -out: - of_node_put(vgic_node); + free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); return ret; } @@ -1779,7 +1604,7 @@ int kvm_vgic_init(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE); + vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; @@ -1825,7 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic.vctrl_base; + kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; From 79f17b6074e34191952a9249b061012a23f3888a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 May 2014 10:03:25 +0100 Subject: [PATCH 197/287] KVM: ARM: vgic: revisit implementation of irqchip_in_kernel So far, irqchip_in_kernel() was implemented by testing the value of vctrl_base, which worked fine with GICv2. With GICv3, this field is useless, as we're using system registers instead of a emmory mapped interface. To solve this, add a boolean flag indicating if the we're using a vgic or not. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit f982cf4e9c37b19478c7bc6e0484a43a7e78cf57) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 3 ++- virt/kvm/arm/vgic.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d8d52a9ca6a1..f6b9fec6fcac 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -117,6 +117,7 @@ struct vgic_params { struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; + bool in_kernel; bool ready; /* Virtual control interface mapping */ @@ -212,7 +213,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio); -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) +#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.ready) int vgic_v2_probe(struct device_node *vgic_node, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e4b9cbbbee4c..1348e74c4d11 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1650,6 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); + kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; From cab0beb93ff52f5a47cf9a1a03c9c73d473d28d6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 26 Jun 2013 15:16:40 +0100 Subject: [PATCH 198/287] arm64: KVM: remove __kvm_hyp_code_{start,end} from hyp.S We already have __hyp_text_{start,end} to express the boundaries of the HYP text section, and __kvm_hyp_code_{start,end} are getting in the way of a more modular world switch code. Just turn __kvm_hyp_code_{start,end} into #defines mapping the linker-emited symbols. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 45451914c875bba44903ce4f1445e047b7992bf7) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 6 ++++-- arch/arm64/include/asm/virt.h | 4 ++++ arch/arm64/kvm/hyp.S | 6 ------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9fcd54b1e16d..d0bfc4ba82c0 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -18,6 +18,8 @@ #ifndef __ARM_KVM_ASM_H__ #define __ARM_KVM_ASM_H__ +#include + /* * 0 is reserved as an invalid value. * Order *must* be kept in sync with the hyp switch code. @@ -96,8 +98,8 @@ extern char __kvm_hyp_init_end[]; extern char __kvm_hyp_vector[]; -extern char __kvm_hyp_code_start[]; -extern char __kvm_hyp_code_end[]; +#define __kvm_hyp_code_start __hyp_text_start +#define __kvm_hyp_code_end __hyp_text_end extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 215ad4649dd7..7a5df5252dd7 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -50,6 +50,10 @@ static inline bool is_hyp_mode_mismatched(void) return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } +/* The section containing the hypervisor text */ +extern char __hyp_text_start[]; +extern char __hyp_text_end[]; + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 877d82a134bc..9c5d0acb3654 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -36,9 +36,6 @@ .pushsection .hyp.text, "ax" .align PAGE_SHIFT -__kvm_hyp_code_start: - .globl __kvm_hyp_code_start - .macro save_common_regs // x2: base address for cpu context // x3: tmp register @@ -880,7 +877,4 @@ ENTRY(__kvm_hyp_vector) ventry el1_error_invalid // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) -__kvm_hyp_code_end: - .globl __kvm_hyp_code_end - .popsection From fd17281069b65b1672718613d8c89cc9a409bfcd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 11:57:56 +0100 Subject: [PATCH 199/287] arm64: KVM: split GICv2 world switch from hyp code Move the GICv2 world switch code into its own file, and add the necessary indirection to the arm64 switch code. Also introduce a new type field to the vgic_params structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 1a9b13056dde7e3092304d6041ccc60a913042ea) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 5 ++ arch/arm64/include/asm/kvm_asm.h | 4 + arch/arm64/include/asm/kvm_host.h | 21 +++++ arch/arm64/kernel/asm-offsets.c | 3 + arch/arm64/kvm/Makefile | 4 +- arch/arm64/kvm/hyp.S | 104 ++++------------------- arch/arm64/kvm/vgic-v2-switch.S | 133 ++++++++++++++++++++++++++++++ include/kvm/arm_vgic.h | 7 +- virt/kvm/arm/vgic-v2.c | 1 + virt/kvm/arm/vgic.c | 3 + 10 files changed, 195 insertions(+), 90 deletions(-) create mode 100644 arch/arm64/kvm/vgic-v2-switch.S diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index dc4e3edf39cc..6dfb404f6c46 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -225,6 +225,11 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) return 0; } +static inline void vgic_arch_setup(const struct vgic_params *vgic) +{ + BUG_ON(vgic->type != VGIC_V2); +} + int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index d0bfc4ba82c0..6252264341c8 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -105,6 +105,10 @@ extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); + +extern char __save_vgic_v2_state[]; +extern char __restore_vgic_v2_state[]; + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 92242ce06309..4c182d0aae70 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -200,4 +200,25 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, hyp_stack_ptr, vector_ptr); } +struct vgic_sr_vectors { + void *save_vgic; + void *restore_vgic; +}; + +static inline void vgic_arch_setup(const struct vgic_params *vgic) +{ + extern struct vgic_sr_vectors __vgic_sr_vectors; + + switch(vgic->type) + { + case VGIC_V2: + __vgic_sr_vectors.save_vgic = __save_vgic_v2_state; + __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state; + break; + + default: + BUG(); + } +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index dcfd8a616a94..9ff0b2b97392 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -127,6 +127,9 @@ int main(void) DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic)); + DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic)); + DEFINE(VGIC_SR_VECTOR_SZ, sizeof(struct vgic_sr_vectors)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 7e92952d139e..daf24dc59e2c 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -19,5 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 9c5d0acb3654..56df9a352a81 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -16,7 +16,6 @@ */ #include -#include #include #include @@ -376,100 +375,23 @@ .endm /* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! + * Call into the vgic backend for state saving */ .macro save_vgic_state - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* Save all interesting registers */ - ldr w4, [x2, #GICH_HCR] - ldr w5, [x2, #GICH_VMCR] - ldr w6, [x2, #GICH_MISR] - ldr w7, [x2, #GICH_EISR0] - ldr w8, [x2, #GICH_EISR1] - ldr w9, [x2, #GICH_ELRSR0] - ldr w10, [x2, #GICH_ELRSR1] - ldr w11, [x2, #GICH_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) -CPU_BE( rev w8, w8 ) -CPU_BE( rev w9, w9 ) -CPU_BE( rev w10, w10 ) -CPU_BE( rev w11, w11 ) - - str w4, [x3, #VGIC_V2_CPU_HCR] - str w5, [x3, #VGIC_V2_CPU_VMCR] - str w6, [x3, #VGIC_V2_CPU_MISR] - str w7, [x3, #VGIC_V2_CPU_EISR] - str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] - str w9, [x3, #VGIC_V2_CPU_ELRSR] - str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] - str w11, [x3, #VGIC_V2_CPU_APR] - - /* Clear GICH_HCR */ - str wzr, [x2, #GICH_HCR] - - /* Save list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x2], #4 -CPU_BE( rev w5, w5 ) - str w5, [x3], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: + adr x24, __vgic_sr_vectors + ldr x24, [x24, VGIC_SAVE_FN] + kern_hyp_va x24 + blr x24 .endm /* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct + * Call into the vgic backend for state restoring */ .macro restore_vgic_state - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_V2_CPU_HCR] - ldr w5, [x3, #VGIC_V2_CPU_VMCR] - ldr w6, [x3, #VGIC_V2_CPU_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) - - str w4, [x2, #GICH_HCR] - str w5, [x2, #GICH_VMCR] - str w6, [x2, #GICH_APR] - - /* Restore list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x3], #4 -CPU_BE( rev w5, w5 ) - str w5, [x2], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: + adr x24, __vgic_sr_vectors + ldr x24, [x24, #VGIC_RESTORE_FN] + kern_hyp_va x24 + blr x24 .endm .macro save_timer_state @@ -650,6 +572,12 @@ ENTRY(__kvm_flush_vm_context) ret ENDPROC(__kvm_flush_vm_context) + // struct vgic_sr_vectors __vgi_sr_vectors; + .align 3 +ENTRY(__vgic_sr_vectors) + .skip VGIC_SR_VECTOR_SZ +ENDPROC(__vgic_sr_vectors) + __kvm_hyp_panic: // Guess the context by looking at VTTBR: // If zero, then we're already a host. diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S new file mode 100644 index 000000000000..ae211772f991 --- /dev/null +++ b/arch/arm64/kvm/vgic-v2-switch.S @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + .text + .pushsection .hyp.text, "ax" + +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +ENTRY(__save_vgic_v2_state) +__save_vgic_v2_state: + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* Save all interesting registers */ + ldr w4, [x2, #GICH_HCR] + ldr w5, [x2, #GICH_VMCR] + ldr w6, [x2, #GICH_MISR] + ldr w7, [x2, #GICH_EISR0] + ldr w8, [x2, #GICH_EISR1] + ldr w9, [x2, #GICH_ELRSR0] + ldr w10, [x2, #GICH_ELRSR1] + ldr w11, [x2, #GICH_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) +CPU_BE( rev w9, w9 ) +CPU_BE( rev w10, w10 ) +CPU_BE( rev w11, w11 ) + + str w4, [x3, #VGIC_V2_CPU_HCR] + str w5, [x3, #VGIC_V2_CPU_VMCR] + str w6, [x3, #VGIC_V2_CPU_MISR] + str w7, [x3, #VGIC_V2_CPU_EISR] + str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] + str w9, [x3, #VGIC_V2_CPU_ELRSR] + str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_V2_CPU_APR] + + /* Clear GICH_HCR */ + str wzr, [x2, #GICH_HCR] + + /* Save list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_V2_CPU_LR +1: ldr w5, [x2], #4 +CPU_BE( rev w5, w5 ) + str w5, [x3], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: + ret +ENDPROC(__save_vgic_v2_state) + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +ENTRY(__restore_vgic_v2_state) +__restore_vgic_v2_state: + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* We only restore a minimal set of registers */ + ldr w4, [x3, #VGIC_V2_CPU_HCR] + ldr w5, [x3, #VGIC_V2_CPU_VMCR] + ldr w6, [x3, #VGIC_V2_CPU_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) + + str w4, [x2, #GICH_HCR] + str w5, [x2, #GICH_VMCR] + str w6, [x2, #GICH_APR] + + /* Restore list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_V2_CPU_LR +1: ldr w5, [x3], #4 +CPU_BE( rev w5, w5 ) + str w5, [x2], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: + ret +ENDPROC(__restore_vgic_v2_state) + + .popsection diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f6b9fec6fcac..65f1121a3beb 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -24,7 +24,6 @@ #include #include #include -#include #define VGIC_NR_IRQS 256 #define VGIC_NR_SGIS 16 @@ -71,6 +70,10 @@ struct vgic_bytemap { struct kvm_vcpu; +enum vgic_type { + VGIC_V2, /* Good ol' GICv2 */ +}; + #define LR_STATE_PENDING (1 << 0) #define LR_STATE_ACTIVE (1 << 1) #define LR_STATE_MASK (3 << 0) @@ -104,6 +107,8 @@ struct vgic_ops { }; struct vgic_params { + /* vgic type */ + enum vgic_type type; /* Physical address of vgic virtual cpu interface */ phys_addr_t vcpu_base; /* Number of list registers */ diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 940418ebd0d0..d6c9c142f813 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -236,6 +236,7 @@ int vgic_v2_probe(struct device_node *vgic_node, kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, vgic->maint_irq); + vgic->type = VGIC_V2; *ops = &vgic_v2_ops; *params = vgic; goto out; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1348e74c4d11..7867b9a1f694 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1568,6 +1568,9 @@ int kvm_vgic_hyp_init(void) on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + /* Callback into for arch code for setup */ + vgic_arch_setup(vgic); + return 0; out_free_irq: From c292a45791af6d60cc7c644809f84810a38e1771 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Aug 2013 18:19:11 +0100 Subject: [PATCH 200/287] arm64: KVM: move HCR_EL2.{IMO,FMO} manipulation into the vgic switch code GICv3 requires the IMO and FMO bits to be tightly coupled with some of the interrupt controller's register switch. In order to have similar code paths, move the manipulation of these bits to the GICv2 switch code. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit ac3c3747e2db2f326ffc601651de544cdd33a8e9) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 5 +++-- arch/arm64/kvm/hyp.S | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3d6903006a8a..cc83520459ed 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -76,9 +76,10 @@ */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ - HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_SWIO | HCR_TIDCP | HCR_RW) + HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) +#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) + /* Hyp System Control Register (SCTLR_EL2) bits */ #define SCTLR_EL2_EE (1 << 25) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 56df9a352a81..5945f3bdea7a 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -335,11 +335,8 @@ .endm .macro activate_traps - ldr x2, [x0, #VCPU_IRQ_LINES] - ldr x1, [x0, #VCPU_HCR_EL2] - orr x2, x2, x1 - msr hcr_el2, x2 - + ldr x2, [x0, #VCPU_HCR_EL2] + msr hcr_el2, x2 ldr x2, =(CPTR_EL2_TTA) msr cptr_el2, x2 @@ -382,12 +379,22 @@ ldr x24, [x24, VGIC_SAVE_FN] kern_hyp_va x24 blr x24 + mrs x24, hcr_el2 + mov x25, #HCR_INT_OVERRIDE + neg x25, x25 + and x24, x24, x25 + msr hcr_el2, x24 .endm /* * Call into the vgic backend for state restoring */ .macro restore_vgic_state + mrs x24, hcr_el2 + ldr x25, [x0, #VCPU_IRQ_LINES] + orr x24, x24, #HCR_INT_OVERRIDE + orr x24, x24, x25 + msr hcr_el2, x24 adr x24, __vgic_sr_vectors ldr x24, [x24, #VGIC_RESTORE_FN] kern_hyp_va x24 From 9a35d57d6984382050d493e2fa1140c9a6b78671 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Jul 2013 15:15:23 +0100 Subject: [PATCH 201/287] KVM: ARM: vgic: add the GICv3 backend Introduce the support code for emulating a GICv2 on top of GICv3 hardware. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit b2fb1c0d378399e1427a91bb991c094f2ca09a2f) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 2 + arch/arm64/kvm/vgic-v3-switch.S | 29 ++++ include/kvm/arm_vgic.h | 28 ++++ virt/kvm/arm/vgic-v3.c | 231 +++++++++++++++++++++++++++++++ 4 files changed, 290 insertions(+) create mode 100644 arch/arm64/kvm/vgic-v3-switch.S create mode 100644 virt/kvm/arm/vgic-v3.c diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6252264341c8..ed4987bf9ac7 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -106,6 +106,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +extern u64 __vgic_v3_get_ich_vtr_el2(void); + extern char __save_vgic_v2_state[]; extern char __restore_vgic_v2_state[]; diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S new file mode 100644 index 000000000000..9fbf27350c84 --- /dev/null +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + + .text + .pushsection .hyp.text, "ax" + +ENTRY(__vgic_v3_get_ich_vtr_el2) + mrs x0, ICH_VTR_EL2 + ret +ENDPROC(__vgic_v3_get_ich_vtr_el2) + + .popsection diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 65f1121a3beb..35b0c121bb65 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -33,6 +33,7 @@ #define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) +#define VGIC_V3_MAX_LRS 16 /* Sanity checks... */ #if (VGIC_MAX_CPUS > 8) @@ -72,6 +73,7 @@ struct kvm_vcpu; enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ + VGIC_V3, /* New fancy GICv3 */ }; #define LR_STATE_PENDING (1 << 0) @@ -172,6 +174,19 @@ struct vgic_v2_cpu_if { u32 vgic_lr[VGIC_V2_MAX_LRS]; }; +struct vgic_v3_cpu_if { +#ifdef CONFIG_ARM_GIC_V3 + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr; /* Saved only */ + u32 vgic_elrsr; /* Saved only */ + u32 vgic_ap0r[4]; + u32 vgic_ap1r[4]; + u64 vgic_lr[VGIC_V3_MAX_LRS]; +#endif +}; + struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ @@ -190,6 +205,7 @@ struct vgic_cpu { /* CPU vif control registers for world switch */ union { struct vgic_v2_cpu_if vgic_v2; + struct vgic_v3_cpu_if vgic_v3; }; #endif }; @@ -224,6 +240,18 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, int vgic_v2_probe(struct device_node *vgic_node, const struct vgic_ops **ops, const struct vgic_params **params); +#ifdef CONFIG_ARM_GIC_V3 +int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params); +#else +static inline int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + return -ENODEV; +} +#endif #else static inline int kvm_vgic_hyp_init(void) diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c new file mode 100644 index 000000000000..f01d44685720 --- /dev/null +++ b/virt/kvm/arm/vgic-v3.c @@ -0,0 +1,231 @@ +/* + * Copyright (C) 2013 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + +/* + * LRs are stored in reverse order in memory. make sure we index them + * correctly. + */ +#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) + +static u32 ich_vtr_el2; + +static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & ICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & ICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & ICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | + lr_desc.irq); + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= ICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= ICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= ICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; +} + +static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); +} + +static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr; +} + +static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr; +} + +static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr; + u32 ret = 0; + + if (misr & ICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & ICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + +static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; + + vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; + vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; +} + +static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE; +} + +static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE; +} + +static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; + vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; + vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; + vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; +} + +static void vgic_v3_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN; +} + +static const struct vgic_ops vgic_v3_ops = { + .get_lr = vgic_v3_get_lr, + .set_lr = vgic_v3_set_lr, + .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, + .get_elrsr = vgic_v3_get_elrsr, + .get_eisr = vgic_v3_get_eisr, + .get_interrupt_status = vgic_v3_get_interrupt_status, + .enable_underflow = vgic_v3_enable_underflow, + .disable_underflow = vgic_v3_disable_underflow, + .get_vmcr = vgic_v3_get_vmcr, + .set_vmcr = vgic_v3_set_vmcr, + .enable = vgic_v3_enable, +}; + +static struct vgic_params vgic_v3_params; + +/** + * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT + * @node: pointer to the DT node + * @ops: address of a pointer to the GICv3 operations + * @params: address of a pointer to HW-specific parameters + * + * Returns 0 if a GICv3 has been found, with the low level operations + * in *ops and the HW parameters in *params. Returns an error code + * otherwise. + */ +int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + int ret = 0; + u32 gicv_idx; + struct resource vcpu_res; + struct vgic_params *vgic = &vgic_v3_params; + + vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic->maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); + + /* + * The ListRegs field is 5 bits, but there is a architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; + + if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) + gicv_idx = 1; + + gicv_idx += 3; /* Also skip GICD, GICC, GICH */ + if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { + kvm_err("Cannot obtain GICV region\n"); + ret = -ENXIO; + goto out; + } + vgic->vcpu_base = vcpu_res.start; + vgic->vctrl_base = NULL; + vgic->type = VGIC_V3; + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vcpu_res.start, vgic->maint_irq); + + *ops = &vgic_v3_ops; + *params = vgic; + +out: + of_node_put(vgic_node); + return ret; +} From 825ae8cc53eeb522fb8c8bf9095fbee898cc1766 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Jul 2013 10:45:49 +0100 Subject: [PATCH 202/287] arm64: KVM: vgic: add GICv3 world switch Introduce the GICv3 world switch code used to save/restore the GICv3 context. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 754d37726010d872f1f714a8ce8920acdfa4978c) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 2 + arch/arm64/kernel/asm-offsets.c | 8 ++ arch/arm64/kvm/vgic-v3-switch.S | 238 +++++++++++++++++++++++++++++++ 3 files changed, 248 insertions(+) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ed4987bf9ac7..a28c35b337ec 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -110,6 +110,8 @@ extern u64 __vgic_v3_get_ich_vtr_el2(void); extern char __save_vgic_v2_state[]; extern char __restore_vgic_v2_state[]; +extern char __save_vgic_v3_state[]; +extern char __restore_vgic_v3_state[]; #endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9ff0b2b97392..65ebb2ccde5f 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -137,6 +137,14 @@ int main(void) DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); + DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); + DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); + DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); + DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); + DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); + DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); + DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); + DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S index 9fbf27350c84..21e68f606a8f 100644 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -18,9 +18,247 @@ #include #include +#include +#include +#include +#include +#include +#include + .text .pushsection .hyp.text, "ax" +/* + * We store LRs in reverse order to let the CPU deal with streaming + * access. Use this macro to make it look saner... + */ +#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8) + +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +.macro save_vgic_v3_state + // Compute the address of struct vgic_cpu + add x3, x0, #VCPU_VGIC_CPU + + // Make sure stores to the GIC via the memory mapped interface + // are now visible to the system register interface + dsb st + + // Save all interesting registers + mrs x4, ICH_HCR_EL2 + mrs x5, ICH_VMCR_EL2 + mrs x6, ICH_MISR_EL2 + mrs x7, ICH_EISR_EL2 + mrs x8, ICH_ELSR_EL2 + + str w4, [x3, #VGIC_V3_CPU_HCR] + str w5, [x3, #VGIC_V3_CPU_VMCR] + str w6, [x3, #VGIC_V3_CPU_MISR] + str w7, [x3, #VGIC_V3_CPU_EISR] + str w8, [x3, #VGIC_V3_CPU_ELRSR] + + msr ICH_HCR_EL2, xzr + + mrs x21, ICH_VTR_EL2 + mvn w22, w21 + ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + mrs x20, ICH_LR15_EL2 + mrs x19, ICH_LR14_EL2 + mrs x18, ICH_LR13_EL2 + mrs x17, ICH_LR12_EL2 + mrs x16, ICH_LR11_EL2 + mrs x15, ICH_LR10_EL2 + mrs x14, ICH_LR9_EL2 + mrs x13, ICH_LR8_EL2 + mrs x12, ICH_LR7_EL2 + mrs x11, ICH_LR6_EL2 + mrs x10, ICH_LR5_EL2 + mrs x9, ICH_LR4_EL2 + mrs x8, ICH_LR3_EL2 + mrs x7, ICH_LR2_EL2 + mrs x6, ICH_LR1_EL2 + mrs x5, ICH_LR0_EL2 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + str x20, [x3, #LR_OFFSET(15)] + str x19, [x3, #LR_OFFSET(14)] + str x18, [x3, #LR_OFFSET(13)] + str x17, [x3, #LR_OFFSET(12)] + str x16, [x3, #LR_OFFSET(11)] + str x15, [x3, #LR_OFFSET(10)] + str x14, [x3, #LR_OFFSET(9)] + str x13, [x3, #LR_OFFSET(8)] + str x12, [x3, #LR_OFFSET(7)] + str x11, [x3, #LR_OFFSET(6)] + str x10, [x3, #LR_OFFSET(5)] + str x9, [x3, #LR_OFFSET(4)] + str x8, [x3, #LR_OFFSET(3)] + str x7, [x3, #LR_OFFSET(2)] + str x6, [x3, #LR_OFFSET(1)] + str x5, [x3, #LR_OFFSET(0)] + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + mrs x20, ICH_AP0R3_EL2 + str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] + mrs x19, ICH_AP0R2_EL2 + str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] +6: mrs x18, ICH_AP0R1_EL2 + str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] +5: mrs x17, ICH_AP0R0_EL2 + str w17, [x3, #VGIC_V3_CPU_AP0R] + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + mrs x20, ICH_AP1R3_EL2 + str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] + mrs x19, ICH_AP1R2_EL2 + str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] +6: mrs x18, ICH_AP1R1_EL2 + str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] +5: mrs x17, ICH_AP1R0_EL2 + str w17, [x3, #VGIC_V3_CPU_AP1R] + + // Restore SRE_EL1 access and re-enable SRE at EL1. + mrs x5, ICC_SRE_EL2 + orr x5, x5, #ICC_SRE_EL2_ENABLE + msr ICC_SRE_EL2, x5 + isb + mov x5, #1 + msr ICC_SRE_EL1, x5 +.endm + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +.macro restore_vgic_v3_state + // Disable SRE_EL1 access. Necessary, otherwise + // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens... + msr ICC_SRE_EL1, xzr + isb + + // Compute the address of struct vgic_cpu + add x3, x0, #VCPU_VGIC_CPU + + // Restore all interesting registers + ldr w4, [x3, #VGIC_V3_CPU_HCR] + ldr w5, [x3, #VGIC_V3_CPU_VMCR] + + msr ICH_HCR_EL2, x4 + msr ICH_VMCR_EL2, x5 + + mrs x21, ICH_VTR_EL2 + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] + msr ICH_AP1R3_EL2, x20 + ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] + msr ICH_AP1R2_EL2, x19 +6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] + msr ICH_AP1R1_EL2, x18 +5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] + msr ICH_AP1R0_EL2, x17 + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] + msr ICH_AP0R3_EL2, x20 + ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] + msr ICH_AP0R2_EL2, x19 +6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] + msr ICH_AP0R1_EL2, x18 +5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] + msr ICH_AP0R0_EL2, x17 + + and w22, w21, #0xf + mvn w22, w21 + ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + ldr x20, [x3, #LR_OFFSET(15)] + ldr x19, [x3, #LR_OFFSET(14)] + ldr x18, [x3, #LR_OFFSET(13)] + ldr x17, [x3, #LR_OFFSET(12)] + ldr x16, [x3, #LR_OFFSET(11)] + ldr x15, [x3, #LR_OFFSET(10)] + ldr x14, [x3, #LR_OFFSET(9)] + ldr x13, [x3, #LR_OFFSET(8)] + ldr x12, [x3, #LR_OFFSET(7)] + ldr x11, [x3, #LR_OFFSET(6)] + ldr x10, [x3, #LR_OFFSET(5)] + ldr x9, [x3, #LR_OFFSET(4)] + ldr x8, [x3, #LR_OFFSET(3)] + ldr x7, [x3, #LR_OFFSET(2)] + ldr x6, [x3, #LR_OFFSET(1)] + ldr x5, [x3, #LR_OFFSET(0)] + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + msr ICH_LR15_EL2, x20 + msr ICH_LR14_EL2, x19 + msr ICH_LR13_EL2, x18 + msr ICH_LR12_EL2, x17 + msr ICH_LR11_EL2, x16 + msr ICH_LR10_EL2, x15 + msr ICH_LR9_EL2, x14 + msr ICH_LR8_EL2, x13 + msr ICH_LR7_EL2, x12 + msr ICH_LR6_EL2, x11 + msr ICH_LR5_EL2, x10 + msr ICH_LR4_EL2, x9 + msr ICH_LR3_EL2, x8 + msr ICH_LR2_EL2, x7 + msr ICH_LR1_EL2, x6 + msr ICH_LR0_EL2, x5 + + // Ensure that the above will have reached the + // (re)distributors. This ensure the guest will read + // the correct values from the memory-mapped interface. + isb + dsb sy + + // Prevent the guest from touching the GIC system registers + mrs x5, ICC_SRE_EL2 + and x5, x5, #~ICC_SRE_EL2_ENABLE + msr ICC_SRE_EL2, x5 +.endm + +ENTRY(__save_vgic_v3_state) + save_vgic_v3_state + ret +ENDPROC(__save_vgic_v3_state) + +ENTRY(__restore_vgic_v3_state) + restore_vgic_v3_state + ret +ENDPROC(__restore_vgic_v3_state) + ENTRY(__vgic_v3_get_ich_vtr_el2) mrs x0, ICH_VTR_EL2 ret From 0e705de508452a4437225048231f8e7aa5f56d2c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Jul 2013 10:45:49 +0100 Subject: [PATCH 203/287] arm64: KVM: vgic: enable GICv2 emulation on top on GICv3 hardware Add the last missing bits that enable GICv2 emulation on top of GICv3 hardware. Signed-off-by: Marc Zyngier (cherry picked from commit 67b2abfedb7b861bead93400fa315c5c30879d51) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 7 +++++++ arch/arm64/kvm/Makefile | 2 ++ virt/kvm/arm/vgic.c | 1 + 3 files changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4c182d0aae70..4ae9213aa997 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -216,6 +216,13 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state; break; +#ifdef CONFIG_ARM_GIC_V3 + case VGIC_V3: + __vgic_sr_vectors.save_vgic = __save_vgic_v3_state; + __vgic_sr_vectors.restore_vgic = __restore_vgic_v3_state; + break; +#endif + default: BUG(); } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index daf24dc59e2c..32a096174b94 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -22,4 +22,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7867b9a1f694..795ab482333d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1530,6 +1530,7 @@ static struct notifier_block vgic_cpu_nb = { static const struct of_device_id vgic_ids[] = { { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, {}, }; From ff815e50964d2593064d47c9dbd4a3bb03ea3e18 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:01 -0700 Subject: [PATCH 204/287] ARM: KVM: fix vgic V7 assembler code to work in BE image The vgic h/w registers are little endian; when BE asm code reads/writes from/to them, it needs to do byteswap after/before. Byteswap code uses ARM_BE8 wrapper to add swap only if CONFIG_CPU_BIG_ENDIAN is configured. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 64054c25cf7e060cd6780744fefe7ed3990e4f21) Signed-off-by: Christoffer Dall --- arch/arm/kvm/interrupts_head.S | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index e4eaf30205c5..68d99c69639c 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -1,4 +1,5 @@ #include +#include #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) #define VCPU_USR_SP (VCPU_USR_REG(13)) @@ -420,6 +421,14 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r8, [r2, #GICH_ELRSR0] ldr r9, [r2, #GICH_ELRSR1] ldr r10, [r2, #GICH_APR] +ARM_BE8(rev r3, r3 ) +ARM_BE8(rev r4, r4 ) +ARM_BE8(rev r5, r5 ) +ARM_BE8(rev r6, r6 ) +ARM_BE8(rev r7, r7 ) +ARM_BE8(rev r8, r8 ) +ARM_BE8(rev r9, r9 ) +ARM_BE8(rev r10, r10 ) str r3, [r11, #VGIC_V2_CPU_HCR] str r4, [r11, #VGIC_V2_CPU_VMCR] @@ -439,6 +448,7 @@ vcpu .req r0 @ vcpu pointer always in r0 add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r2], #4 +ARM_BE8(rev r6, r6 ) str r6, [r3], #4 subs r4, r4, #1 bne 1b @@ -466,6 +476,9 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r3, [r11, #VGIC_V2_CPU_HCR] ldr r4, [r11, #VGIC_V2_CPU_VMCR] ldr r8, [r11, #VGIC_V2_CPU_APR] +ARM_BE8(rev r3, r3 ) +ARM_BE8(rev r4, r4 ) +ARM_BE8(rev r8, r8 ) str r3, [r2, #GICH_HCR] str r4, [r2, #GICH_VMCR] @@ -476,6 +489,7 @@ vcpu .req r0 @ vcpu pointer always in r0 add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r3], #4 +ARM_BE8(rev r6, r6 ) str r6, [r2], #4 subs r4, r4, #1 bne 1b From af39d18ecef850d57705c56cda31da0d855424ea Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:02 -0700 Subject: [PATCH 205/287] ARM: KVM: handle 64bit values passed to mrcc or from mcrr instructions in BE case In some cases the mcrr and mrrc instructions in combination with the ldrd and strd instructions need to deal with 64bit value in memory. The ldrd and strd instructions already handle endianness within word (register) boundaries but to get effect of the whole 64bit value represented correctly, rr_lo_hi macro is introduced and is used to swap registers positions when the mcrr and mrrc instructions are used. That has the effect of swapping two words. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 19b0e60a63f758a28329aa40f4270a6c98c2dcb7) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_asm.h | 18 ++++++++++++++++++ arch/arm/kvm/init.S | 4 ++-- arch/arm/kvm/interrupts.S | 4 ++-- arch/arm/kvm/interrupts_head.S | 6 +++--- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 53b3c4a50d5c..3a67bec72d0c 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -61,6 +61,24 @@ #define ARM_EXCEPTION_FIQ 6 #define ARM_EXCEPTION_HVC 7 +/* + * The rr_lo_hi macro swaps a pair of registers depending on + * current endianness. It is used in conjunction with ldrd and strd + * instructions that load/store a 64-bit value from/to memory to/from + * a pair of registers which are used with the mrrc and mcrr instructions. + * If used with the ldrd/strd instructions, the a1 parameter is the first + * source/destination register and the a2 parameter is the second + * source/destination register. Note that the ldrd/strd instructions + * already swap the bytes within the words correctly according to the + * endianness setting, but the order of the registers need to be effectively + * swapped when used with the mrrc/mcrr instructions. + */ +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define rr_lo_hi(a1, a2) a2, a1 +#else +#define rr_lo_hi(a1, a2) a1, a2 +#endif + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 1b9844d369cc..2cc14dfad049 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -71,7 +71,7 @@ __do_hyp_init: bne phase2 @ Yes, second stage init @ Set the HTTBR to point to the hypervisor PGD pointer passed - mcrr p15, 4, r2, r3, c2 + mcrr p15, 4, rr_lo_hi(r2, r3), c2 @ Set the HTCR and VTCR to the same shareability and cacheability @ settings as the non-secure TTBCR and with T0SZ == 0. @@ -137,7 +137,7 @@ phase2: mov pc, r0 target: @ We're now in the trampoline code, switch page tables - mcrr p15, 4, r2, r3, c2 + mcrr p15, 4, rr_lo_hi(r2, r3), c2 isb @ Invalidate the old TLBs diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 0d68d4073068..24d4e65806a7 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -52,7 +52,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) dsb ishst add r0, r0, #KVM_VTTBR ldrd r2, r3, [r0] - mcrr p15, 6, r2, r3, c2 @ Write VTTBR + mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR isb mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) dsb ish @@ -135,7 +135,7 @@ ENTRY(__kvm_vcpu_run) ldr r1, [vcpu, #VCPU_KVM] add r1, r1, #KVM_VTTBR ldrd r2, r3, [r1] - mcrr p15, 6, r2, r3, c2 @ Write VTTBR + mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR @ We're all done, just restore the GPRs and go to the guest restore_guest_regs diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 68d99c69639c..98c8c5b9a87f 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -520,7 +520,7 @@ ARM_BE8(rev r6, r6 ) mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL isb - mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL + mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 strd r2, r3, [r5] @@ -560,12 +560,12 @@ ARM_BE8(rev r6, r6 ) ldr r2, [r4, #KVM_TIMER_CNTVOFF] ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)] - mcrr p15, 4, r2, r3, c14 @ CNTVOFF + mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 ldrd r2, r3, [r5] - mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL + mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL isb ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL] From b76b02ab6245e8641cc4acdc16b0050132bd9065 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:03 -0700 Subject: [PATCH 206/287] ARM: KVM: __kvm_vcpu_run function return result fix in BE case The __kvm_vcpu_run function returns a 64-bit result in two registers, which has to be adjusted for BE case. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 6d7311b520864531c81f0e0237e96146d8057d77) Signed-off-by: Christoffer Dall --- arch/arm/kvm/interrupts.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 24d4e65806a7..01dcb0e752d9 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -199,8 +199,13 @@ after_vfp_restore: restore_host_regs clrex @ Clear exclusive monitor +#ifndef CONFIG_CPU_ENDIAN_BE8 mov r0, r1 @ Return the return code mov r1, #0 @ Clear upper bits in return value +#else + @ r1 already has return code + mov r0, #0 @ Clear upper bits in return value +#endif /* CONFIG_CPU_ENDIAN_BE8 */ bx lr @ return to IOCTL /******************************************************************** From df5bce90aca5ced2e4ccee2a9ef913b064a47aea Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:04 -0700 Subject: [PATCH 207/287] ARM: KVM: vgic mmio should hold data as LE bytes array in BE case According to recent clarifications of mmio.data array meaning - the mmio.data array should hold bytes as they would appear in memory. Vgic is little endian device. And in case of BE image kernel side that emulates vgic, holds data in BE form. So we need to byteswap cpu<->le32 vgic registers when we read/write them from mmio.data[]. Change has no effect in LE case because cpu already runs in le32. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 1c9f04717ca8326e8df759d5dda9cd1b3d968b5b) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 795ab482333d..b0edc8c670f8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -238,12 +238,12 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) { - return *((u32 *)mmio->data) & mask; + return le32_to_cpu(*((u32 *)mmio->data)) & mask; } static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) { - *((u32 *)mmio->data) = value & mask; + *((u32 *)mmio->data) = cpu_to_le32(value) & mask; } /** From d6ff09058017a8dfd96fa24502691c83df6566e7 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:05 -0700 Subject: [PATCH 208/287] ARM: KVM: MMIO support BE host running LE code In case of status register E bit is not set (LE mode) and host runs in BE mode we need byteswap data, so read/write is emulated correctly. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 27f194fd360a96cc64bebb2d69dd5abd67984b8a) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 0fa90c962ac8..69b746955fca 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -185,9 +185,16 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, default: return be32_to_cpu(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return le16_to_cpu(data & 0xffff); + default: + return le32_to_cpu(data); + } } - - return data; /* Leave LE untouched */ } static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, @@ -203,9 +210,16 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, default: return cpu_to_be32(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_le16(data & 0xffff); + default: + return cpu_to_le32(data); + } } - - return data; /* Leave LE untouched */ } #endif /* __ARM_KVM_EMULATE_H__ */ From 1f5f8779f88483402671d58f0e307934218e22f6 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:06 -0700 Subject: [PATCH 209/287] ARM: KVM: one_reg coproc set and get BE fixes Fix code that handles KVM_SET_ONE_REG, KVM_GET_ONE_REG ioctls to work in BE image. Before this fix get/set_one_reg functions worked correctly only in LE case - reg_from_user was taking 'void *' kernel address that actually could be target/source memory of either 4 bytes size or 8 bytes size, and code copied from/to user memory that could hold either 4 bytes register, 8 byte register or pair of 4 bytes registers. In order to work in endian agnostic way reg_from_user to reg_to_user functions should copy register value only to kernel variable with size that matches register size. In few place where size mismatch existed fix issue on macro caller side. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 73891f72c414afff6da6f01e7af2ff5a44a8b823) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 88 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 9 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index c58a35116f63..37a0fe1bb9bb 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -44,6 +44,31 @@ static u32 cache_levels; /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ #define CSSELR_MAX 12 +/* + * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some + * of cp15 registers can be viewed either as couple of two u32 registers + * or one u64 register. Current u64 register encoding is that least + * significant u32 word is followed by most significant u32 word. + */ +static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu, + const struct coproc_reg *r, + u64 val) +{ + vcpu->arch.cp15[r->reg] = val & 0xffffffff; + vcpu->arch.cp15[r->reg + 1] = val >> 32; +} + +static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, + const struct coproc_reg *r) +{ + u64 val; + + val = vcpu->arch.cp15[r->reg + 1]; + val = val << 32; + val = val | vcpu->arch.cp15[r->reg]; + return val; +} + int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run) { kvm_inject_undefined(vcpu); @@ -682,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = { { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, }; +/* + * Reads a register value from a userspace address to a kernel + * variable. Make sure that register size matches sizeof(*__val). + */ static int reg_from_user(void *val, const void __user *uaddr, u64 id) { - /* This Just Works because we are little endian. */ if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; } +/* + * Writes a register value to a userspace address from a kernel variable. + * Make sure that register size matches sizeof(*__val). + */ static int reg_to_user(void __user *uaddr, const void *val, u64 id) { - /* This Just Works because we are little endian. */ if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; @@ -702,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr) { struct coproc_params params; const struct coproc_reg *r; + int ret; if (!index_to_params(id, ¶ms)) return -ENOENT; @@ -710,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr) if (!r) return -ENOENT; - return reg_to_user(uaddr, &r->val, id); + ret = -ENOENT; + if (KVM_REG_SIZE(id) == 4) { + u32 val = r->val; + + ret = reg_to_user(uaddr, &val, id); + } else if (KVM_REG_SIZE(id) == 8) { + ret = reg_to_user(uaddr, &r->val, id); + } + return ret; } static int set_invariant_cp15(u64 id, void __user *uaddr) @@ -718,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr) struct coproc_params params; const struct coproc_reg *r; int err; - u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ + u64 val; if (!index_to_params(id, ¶ms)) return -ENOENT; @@ -726,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr) if (!r) return -ENOENT; - err = reg_from_user(&val, uaddr, id); + err = -ENOENT; + if (KVM_REG_SIZE(id) == 4) { + u32 val32; + + err = reg_from_user(&val32, uaddr, id); + if (!err) + val = val32; + } else if (KVM_REG_SIZE(id) == 8) { + err = reg_from_user(&val, uaddr, id); + } if (err) return err; @@ -1004,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { const struct coproc_reg *r; void __user *uaddr = (void __user *)(long)reg->addr; + int ret; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_get(reg->id, uaddr); @@ -1015,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!r) return get_invariant_cp15(reg->id, uaddr); - /* Note: copies two regs if size is 64 bit. */ - return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); + ret = -ENOENT; + if (KVM_REG_SIZE(reg->id) == 8) { + u64 val; + + val = vcpu_cp15_reg64_get(vcpu, r); + ret = reg_to_user(uaddr, &val, reg->id); + } else if (KVM_REG_SIZE(reg->id) == 4) { + ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); + } + + return ret; } int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { const struct coproc_reg *r; void __user *uaddr = (void __user *)(long)reg->addr; + int ret; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_set(reg->id, uaddr); @@ -1034,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!r) return set_invariant_cp15(reg->id, uaddr); - /* Note: copies two regs if size is 64 bit */ - return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); + ret = -ENOENT; + if (KVM_REG_SIZE(reg->id) == 8) { + u64 val; + + ret = reg_from_user(&val, uaddr, reg->id); + if (!ret) + vcpu_cp15_reg64_set(vcpu, r, val); + } else if (KVM_REG_SIZE(reg->id) == 4) { + ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); + } + + return ret; } static unsigned int num_demux_regs(void) From 7c1537fb45e64848a4c6f19b66fec1671d608d1b Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:07 -0700 Subject: [PATCH 210/287] ARM: KVM: enable KVM in Kconfig on big-endian systems Previous patches addresses ARMV7 big-endian virtualiztion, kvm related issues, so enable ARM_VIRT_EXT for big-endian now. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit f5aa462147a209dab40b02f0f70234784b913bf9) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 4be5bb150bdd..466bd299b1a8 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,7 +23,7 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST - depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN + depends on ARM_VIRT_EXT && ARM_LPAE ---help--- Support hosting virtualized guest machines. You will also need to select one or more of the processor modules below. From 5d129b5dba663c42acca6a4711cc8732f692a18a Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:08 -0700 Subject: [PATCH 211/287] ARM64: KVM: MMIO support BE host running LE code In case of guest CPU running in LE mode and host runs in BE mode we need byteswap data, so read/write is emulated correctly. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit b30070862edbdb252f9d0d3a1e61b8dc4c68e3d2) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_emulate.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index dd8ecfc3f995..fdc3e21abd8d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -213,6 +213,17 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, default: return be64_to_cpu(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return le16_to_cpu(data & 0xffff); + case 4: + return le32_to_cpu(data & 0xffffffff); + default: + return le64_to_cpu(data); + } } return data; /* Leave LE untouched */ @@ -233,6 +244,17 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, default: return cpu_to_be64(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_le16(data & 0xffff); + case 4: + return cpu_to_le32(data & 0xffffffff); + default: + return cpu_to_le64(data); + } } return data; /* Leave LE untouched */ From e51ab0ff6436524131e948a9a31258bb859b1ec5 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:09 -0700 Subject: [PATCH 212/287] ARM64: KVM: store kvm_vcpu_fault_info est_el2 as word esr_el2 field of struct kvm_vcpu_fault_info has u32 type. It should be stored as word. Current code works in LE case because existing puts least significant word of x1 into esr_el2, and it puts most significant work of x1 into next field, which accidentally is OK because it is updated again by next instruction. But existing code breaks in BE case. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit ba083d20d8cfa9e999043cd89c4ebc964ccf8927) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 5945f3bdea7a..7874e022d077 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -762,7 +762,7 @@ el1_trap: mrs x2, far_el2 2: mrs x0, tpidr_el2 - str x1, [x0, #VCPU_ESR_EL2] + str w1, [x0, #VCPU_ESR_EL2] str x2, [x0, #VCPU_FAR_EL2] str x3, [x0, #VCPU_HPFAR_EL2] From df032d7ed86bcaf850b549698c62eb182c2cb4e7 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:10 -0700 Subject: [PATCH 213/287] ARM64: KVM: fix vgic_bitmap_get_reg function for BE 64bit case Fix vgic_bitmap_get_reg function to return 'right' word address of 'unsigned long' bitmap value in case of BE 64bit image. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 9662fb4854e1319b4affda47f279c3f210316def) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b0edc8c670f8..ede8f6466c95 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -98,14 +98,34 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; +/* + * struct vgic_bitmap contains unions that provide two views of + * the same data. In one case it is an array of registers of + * u32's, and in the other case it is a bitmap of unsigned + * longs. + * + * This does not work on 64-bit BE systems, because the bitmap access + * will store two consecutive 32-bit words with the higher-addressed + * register's bits at the lower index and the lower-addressed register's + * bits at the higher index. + * + * Therefore, swizzle the register index when accessing the 32-bit word + * registers to access the right register's value. + */ +#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64 +#define REG_OFFSET_SWIZZLE 1 +#else +#define REG_OFFSET_SWIZZLE 0 +#endif + static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) { offset >>= 2; if (!offset) - return x->percpu[cpuid].reg; + return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); else - return x->shared.reg + offset - 1; + return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); } static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, From 21c40b1a24a48d76af181a1b18315a4f0e61b37e Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:12 -0700 Subject: [PATCH 214/287] ARM64: KVM: set and get of sys registers in BE case Since size of all sys registers is always 8 bytes. Current code is actually endian agnostic. Just clean it up a bit. Removed comment about little endian. Change type of pointer from 'void *' to 'u64 *' to enforce stronger type checking. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 26c99af1018c35020cfad1d20f02acb224807655) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 34f25a590bd7..f0ceceffa95a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -776,17 +776,15 @@ static struct sys_reg_desc invariant_sys_regs[] = { NULL, get_ctr_el0 }, }; -static int reg_from_user(void *val, const void __user *uaddr, u64 id) +static int reg_from_user(u64 *val, const void __user *uaddr, u64 id) { - /* This Just Works because we are little endian. */ if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; } -static int reg_to_user(void __user *uaddr, const void *val, u64 id) +static int reg_to_user(void __user *uaddr, const u64 *val, u64 id) { - /* This Just Works because we are little endian. */ if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; From 5208e0ff0d5097acc955d60d4daa395893a772ce Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Wed, 2 Jul 2014 17:19:30 +0100 Subject: [PATCH 215/287] ARM64: KVM: fix big endian issue in access_vm_reg for 32bit guest Fix issue with 32bit guests running on top of BE KVM host. Indexes of high and low words of 64bit cp15 register are swapped in case of big endian code, since 64bit cp15 state is restored or saved with double word write or read instruction. Define helper macro to access low words of 64bit cp15 register. Signed-off-by: Victor Kamensky Signed-off-by: Marc Zyngier (cherry picked from commit f0a3eaff71b8bd5d5acfda1f0cf3eedf49755622) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 6 ++++++ arch/arm64/kvm/sys_regs.c | 10 ++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4ae9213aa997..503c70661636 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -140,6 +140,12 @@ struct kvm_vcpu_arch { #define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) #define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)]) +#ifdef CONFIG_CPU_BIG_ENDIAN +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)]) +#else +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)]) +#endif + struct kvm_vm_stat { u32 remote_tlb_flush; }; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f0ceceffa95a..56288f31c12d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -134,13 +134,11 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, BUG_ON(!p->is_write); val = *vcpu_reg(vcpu, p->Rt); - if (!p->is_aarch32) { + if (!p->is_aarch32 || !p->is_32bit) vcpu_sys_reg(vcpu, r->reg) = val; - } else { - vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL; - if (!p->is_32bit) - vcpu_cp15(vcpu, r->reg + 1) = val >> 32; - } + else + vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; + return true; } From 129fb6b2b94c46e279406736a959cc39a23500c6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:21:16 +0100 Subject: [PATCH 216/287] arm64: KVM: rename pm_fake handler to trap_raz_wi pm_fake doesn't quite describe what the handler does (ignoring writes and returning 0 for reads). As we're about to use it (a lot) in a different context, rename it with a (admitedly cryptic) name that make sense for all users. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 7609c1251f9d8bbcd6a05ba22153e50cf4f88cff) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 83 ++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 56288f31c12d..492ba301e10c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -161,18 +161,9 @@ static bool access_sctlr(struct kvm_vcpu *vcpu, return true; } -/* - * We could trap ID_DFR0 and tell the guest we don't support performance - * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was - * NAKed, so it will read the PMCR anyway. - * - * Therefore we tell the guest we have 0 counters. Unfortunately, we - * must always support PMCCNTR (the cycle counter): we just RAZ/WI for - * all PM registers, which doesn't crash the guest kernel at least. - */ -static bool pm_fake(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool trap_raz_wi(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) { if (p->is_write) return ignore_write(vcpu, p); @@ -199,6 +190,17 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + * + * We could trap ID_DFR0 and tell the guest we don't support performance + * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was + * NAKed, so it will read the PMCR anyway. + * + * Therefore we tell the guest we have 0 counters. Unfortunately, we + * must always support PMCCNTR (the cycle counter): we just RAZ/WI for + * all PM registers, which doesn't crash the guest kernel at least. + * + * Same goes for the whole debug infrastructure, which probably breaks + * some guest functionnality. This should be fixed. */ static const struct sys_reg_desc sys_reg_descs[] = { /* DC ISW */ @@ -258,10 +260,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* PMINTENSET_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), - pm_fake }, + trap_raz_wi }, /* PMINTENCLR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), - pm_fake }, + trap_raz_wi }, /* MAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), @@ -290,43 +292,43 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* PMCR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), - pm_fake }, + trap_raz_wi }, /* PMCNTENSET_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), - pm_fake }, + trap_raz_wi }, /* PMCNTENCLR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), - pm_fake }, + trap_raz_wi }, /* PMOVSCLR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), - pm_fake }, + trap_raz_wi }, /* PMSWINC_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), - pm_fake }, + trap_raz_wi }, /* PMSELR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), - pm_fake }, + trap_raz_wi }, /* PMCEID0_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), - pm_fake }, + trap_raz_wi }, /* PMCEID1_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), - pm_fake }, + trap_raz_wi }, /* PMCCNTR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), - pm_fake }, + trap_raz_wi }, /* PMXEVTYPER_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), - pm_fake }, + trap_raz_wi }, /* PMXEVCNTR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), - pm_fake }, + trap_raz_wi }, /* PMUSERENR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), - pm_fake }, + trap_raz_wi }, /* PMOVSSET_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), - pm_fake }, + trap_raz_wi }, /* TPIDR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), @@ -372,19 +374,20 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, - { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake }, - { Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake }, - { Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake }, - { Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake }, - { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, - { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, - { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, + /* PMU */ + { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi }, { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, From efacbc423cf057a9051ca0fd240e68f5596700e7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 May 2014 12:13:14 +0100 Subject: [PATCH 217/287] arm64: move DBG_MDSCR_* to asm/debug-monitors.h In order to be able to use the DBG_MDSCR_* macros from the KVM code, move the relevant definitions to the obvious include file. Also move the debug_el enum to a portion of the file that is guarded by #ifndef __ASSEMBLY__ in order to use that file from assembly code. Acked-by: Will Deacon Reviewed-by: Anup Patel Signed-off-by: Marc Zyngier (cherry picked from commit 51ba248164d0eeb8b4f94d405430c18a56c6ac9a) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/debug-monitors.h | 19 ++++++++++++++----- arch/arm64/kernel/debug-monitors.c | 9 --------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7c951a510b54..aab72ce22348 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -18,6 +18,15 @@ #ifdef __KERNEL__ +/* Low-level stepping controls. */ +#define DBG_MDSCR_SS (1 << 0) +#define DBG_SPSR_SS (1 << 21) + +/* MDSCR_EL1 enabling bits */ +#define DBG_MDSCR_KDE (1 << 13) +#define DBG_MDSCR_MDE (1 << 15) +#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) + #define DBG_ESR_EVT(x) (((x) >> 27) & 0x7) /* AArch64 */ @@ -73,11 +82,6 @@ #define CACHE_FLUSH_IS_SAFE 1 -enum debug_el { - DBG_ACTIVE_EL0 = 0, - DBG_ACTIVE_EL1, -}; - /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 #define DBG_ESR_EVT_VECC 0x5 @@ -115,6 +119,11 @@ void unregister_break_hook(struct break_hook *hook); u8 debug_monitors_arch(void); +enum debug_el { + DBG_ACTIVE_EL0 = 0, + DBG_ACTIVE_EL1, +}; + void enable_debug_monitors(enum debug_el el); void disable_debug_monitors(enum debug_el el); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 553a120fc838..fea84694fce4 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -31,15 +31,6 @@ #include #include -/* Low-level stepping controls. */ -#define DBG_MDSCR_SS (1 << 0) -#define DBG_SPSR_SS (1 << 21) - -/* MDSCR_EL1 enabling bits */ -#define DBG_MDSCR_KDE (1 << 13) -#define DBG_MDSCR_MDE (1 << 15) -#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) - /* Determine debug architecture. */ u8 debug_monitors_arch(void) { From ad4686e6b3c82d7cf3f79853cc12f655ce444668 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:24:46 +0100 Subject: [PATCH 218/287] arm64: KVM: add trap handlers for AArch64 debug registers Add handlers for all the AArch64 debug registers that are accessible from EL0 or EL1. The trapping code keeps track of the state of the debug registers, allowing for the switch code to implement a lazy switching strategy. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 0c557ed4983b7abe152212b5b1726c2a789b2c61) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 28 ++++-- arch/arm64/include/asm/kvm_host.h | 3 + arch/arm64/kvm/sys_regs.c | 150 +++++++++++++++++++++++++++++- 3 files changed, 172 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a28c35b337ec..660f75c48bbb 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -45,14 +45,25 @@ #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ #define PAR_EL1 21 /* Physical Address Register */ +#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ +#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ +#define DBGBCR15_EL1 38 +#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ +#define DBGBVR15_EL1 54 +#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ +#define DBGWCR15_EL1 70 +#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ +#define DBGWVR15_EL1 86 +#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ + /* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 22 /* Domain Access Control Register */ -#define IFSR32_EL2 23 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */ -#define TEECR32_EL1 26 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 28 +#define DACR32_EL2 88 /* Domain Access Control Register */ +#define IFSR32_EL2 89 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ +#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 94 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ @@ -89,6 +100,9 @@ #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 +#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 +#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 503c70661636..8e410f761918 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -101,6 +101,9 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; + /* Debug state */ + u64 debug_flags; + /* Pointer to host CPU context */ kvm_cpu_context_t *host_cpu_context; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 492ba301e10c..d53ce430b178 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "sys_regs.h" @@ -171,6 +172,73 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, return read_zero(vcpu, p); } +static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + *vcpu_reg(vcpu, p->Rt) = (1 << 3); + return true; + } +} + +static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + u32 val; + asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val)); + *vcpu_reg(vcpu, p->Rt) = val; + return true; + } +} + +/* + * We want to avoid world-switching all the DBG registers all the + * time: + * + * - If we've touched any debug register, it is likely that we're + * going to touch more of them. It then makes sense to disable the + * traps and start doing the save/restore dance + * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is + * then mandatory to save/restore the registers, as the guest + * depends on them. + * + * For this, we use a DIRTY bit, indicating the guest has modified the + * debug registers, used as follow: + * + * On guest entry: + * - If the dirty bit is set (because we're coming back from trapping), + * disable the traps, save host registers, restore guest registers. + * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), + * set the dirty bit, disable the traps, save host registers, + * restore guest registers. + * - Otherwise, enable the traps + * + * On guest exit: + * - If the dirty bit is set, save guest registers, restore host + * registers and clear the dirty bit. This ensure that the host can + * now use the debug registers. + */ +static bool trap_debug_regs(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); + } + + return true; +} + static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 amair; @@ -187,6 +255,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); } +/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ +#define DBG_BCR_BVR_WCR_WVR_EL1(n) \ + /* DBGBVRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ + trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ + /* DBGBCRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ + trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ + /* DBGWVRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ + trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \ + /* DBGWCRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ + trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 } + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -199,8 +282,12 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * must always support PMCCNTR (the cycle counter): we just RAZ/WI for * all PM registers, which doesn't crash the guest kernel at least. * - * Same goes for the whole debug infrastructure, which probably breaks - * some guest functionnality. This should be fixed. + * Debug handling: We do trap most, if not all debug related system + * registers. The implementation is good enough to ensure that a guest + * can use these with minimal performance degradation. The drawback is + * that we don't implement any of the external debug, none of the + * OSlock protocol. This should be revisited if we ever encounter a + * more demanding guest... */ static const struct sys_reg_desc sys_reg_descs[] = { /* DC ISW */ @@ -213,12 +300,71 @@ static const struct sys_reg_desc sys_reg_descs[] = { { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), access_dcsw }, + DBG_BCR_BVR_WCR_WVR_EL1(0), + DBG_BCR_BVR_WCR_WVR_EL1(1), + /* MDCCINT_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), + trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, + /* MDSCR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), + trap_debug_regs, reset_val, MDSCR_EL1, 0 }, + DBG_BCR_BVR_WCR_WVR_EL1(2), + DBG_BCR_BVR_WCR_WVR_EL1(3), + DBG_BCR_BVR_WCR_WVR_EL1(4), + DBG_BCR_BVR_WCR_WVR_EL1(5), + DBG_BCR_BVR_WCR_WVR_EL1(6), + DBG_BCR_BVR_WCR_WVR_EL1(7), + DBG_BCR_BVR_WCR_WVR_EL1(8), + DBG_BCR_BVR_WCR_WVR_EL1(9), + DBG_BCR_BVR_WCR_WVR_EL1(10), + DBG_BCR_BVR_WCR_WVR_EL1(11), + DBG_BCR_BVR_WCR_WVR_EL1(12), + DBG_BCR_BVR_WCR_WVR_EL1(13), + DBG_BCR_BVR_WCR_WVR_EL1(14), + DBG_BCR_BVR_WCR_WVR_EL1(15), + + /* MDRAR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), + trap_raz_wi }, + /* OSLAR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100), + trap_raz_wi }, + /* OSLSR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100), + trap_oslsr_el1 }, + /* OSDLR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100), + trap_raz_wi }, + /* DBGPRCR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100), + trap_raz_wi }, + /* DBGCLAIMSET_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110), + trap_raz_wi }, + /* DBGCLAIMCLR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110), + trap_raz_wi }, + /* DBGAUTHSTATUS_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), + trap_dbgauthstatus_el1 }, + /* TEECR32_EL1 */ { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), NULL, reset_val, TEECR32_EL1, 0 }, /* TEEHBR32_EL1 */ { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), NULL, reset_val, TEEHBR32_EL1, 0 }, + + /* MDCCSR_EL1 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), + trap_raz_wi }, + /* DBGDTR_EL0 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000), + trap_raz_wi }, + /* DBGDTR[TR]X_EL0 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000), + trap_raz_wi }, + /* DBGVCR32_EL2 */ { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), NULL, reset_val, DBGVCR32_EL2, 0 }, From d62af886035cc34e73e82e06179951f6c4e72c6d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:27:13 +0100 Subject: [PATCH 219/287] arm64: KVM: common infrastructure for handling AArch32 CP14/CP15 As we're about to trap a bunch of CP14 registers, let's rework the CP15 handling so it can be generalized and work with multiple tables. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 72564016aae45f42e488f926bc803f9a2e1c771c) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 2 +- arch/arm64/include/asm/kvm_coproc.h | 3 +- arch/arm64/include/asm/kvm_host.h | 13 ++- arch/arm64/kvm/handle_exit.c | 4 +- arch/arm64/kvm/sys_regs.c | 133 +++++++++++++++++++++++----- 5 files changed, 124 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 660f75c48bbb..69027ded5006 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -95,7 +95,7 @@ #define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ -#define NR_CP15_REGS (NR_SYS_REGS * 2) +#define NR_COPRO_REGS (NR_SYS_REGS * 2) #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 9a59301cd014..0b52377a6c11 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -39,7 +39,8 @@ void kvm_register_target_sys_reg_table(unsigned int target, struct kvm_sys_reg_target_table *table); int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8e410f761918..79812be4f25f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -86,7 +86,7 @@ struct kvm_cpu_context { struct kvm_regs gp_regs; union { u64 sys_regs[NR_SYS_REGS]; - u32 cp15[NR_CP15_REGS]; + u32 copro[NR_COPRO_REGS]; }; }; @@ -141,12 +141,17 @@ struct kvm_vcpu_arch { #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) #define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)]) +/* + * CP14 and CP15 live in the same array, as they are backed by the + * same system registers. + */ +#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) #ifdef CONFIG_CPU_BIG_ENDIAN -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)]) +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)]) #else -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)]) +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)]) #endif struct kvm_vm_stat { diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 182415e1a952..e28be510380c 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -73,9 +73,9 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_EL2_EC_WFI] = kvm_handle_wfx, [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, - [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_32, [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store, - [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_64, [ESR_EL2_EC_HVC32] = handle_hvc, [ESR_EL2_EC_SMC32] = handle_smc, [ESR_EL2_EC_HVC64] = handle_hvc, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d53ce430b178..266afd972ad3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -494,6 +494,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_val, FPEXC32_EL2, 0x70 }, }; +/* Trapped cp14 registers */ +static const struct sys_reg_desc cp14_regs[] = { +}; + /* * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, * depending on the way they are accessed (as a 32bit or a 64bit @@ -601,26 +605,29 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) +/* + * emulate_cp -- tries to match a sys_reg access in a handling table, and + * call the corresponding trap handler. + * + * @params: pointer to the descriptor of the access + * @table: array of trap descriptors + * @num: size of the trap descriptor array + * + * Return 0 if the access has been handled, and -1 if not. + */ +static int emulate_cp(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params, + const struct sys_reg_desc *table, + size_t num) { - kvm_inject_undefined(vcpu); - return 1; -} + const struct sys_reg_desc *r; -static void emulate_cp15(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) -{ - size_t num; - const struct sys_reg_desc *table, *r; + if (!table) + return -1; /* Not handled */ - table = get_target_table(vcpu->arch.target, false, &num); - - /* Search target-specific then generic table. */ r = find_reg(params, table, num); - if (!r) - r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs)); - if (likely(r)) { + if (r) { /* * Not having an accessor means that we have * configured a trap that we don't know how to @@ -632,22 +639,51 @@ static void emulate_cp15(struct kvm_vcpu *vcpu, if (likely(r->access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - return; } - /* If access function fails, it should complain. */ + + /* Handled */ + return 0; } - kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu)); + /* Not handled */ + return -1; +} + +static void unhandled_cp_access(struct kvm_vcpu *vcpu, + struct sys_reg_params *params) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + int cp; + + switch(hsr_ec) { + case ESR_EL2_EC_CP15_32: + case ESR_EL2_EC_CP15_64: + cp = 15; + break; + case ESR_EL2_EC_CP14_MR: + case ESR_EL2_EC_CP14_64: + cp = 14; + break; + default: + WARN_ON((cp = -1)); + } + + kvm_err("Unsupported guest CP%d access at: %08lx\n", + cp, *vcpu_pc(vcpu)); print_sys_reg_instr(params); kvm_inject_undefined(vcpu); } /** - * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access * @vcpu: The VCPU pointer * @run: The kvm_run struct */ -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *global, + size_t nr_global, + const struct sys_reg_desc *target_specific, + size_t nr_specific) { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); @@ -676,8 +712,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) *vcpu_reg(vcpu, params.Rt) = val; } - emulate_cp15(vcpu, ¶ms); + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) + goto out; + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) + goto out; + unhandled_cp_access(vcpu, ¶ms); + +out: /* Do the opposite hack for the read side */ if (!params.is_write) { u64 val = *vcpu_reg(vcpu, params.Rt); @@ -693,7 +735,11 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) * @vcpu: The VCPU pointer * @run: The kvm_run struct */ -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *global, + size_t nr_global, + const struct sys_reg_desc *target_specific, + size_t nr_specific) { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); @@ -708,10 +754,51 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Op1 = (hsr >> 14) & 0x7; params.Op2 = (hsr >> 17) & 0x7; - emulate_cp15(vcpu, ¶ms); + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) + return 1; + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) + return 1; + + unhandled_cp_access(vcpu, ¶ms); return 1; } +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + const struct sys_reg_desc *target_specific; + size_t num; + + target_specific = get_target_table(vcpu->arch.target, false, &num); + return kvm_handle_cp_64(vcpu, + cp15_regs, ARRAY_SIZE(cp15_regs), + target_specific, num); +} + +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + const struct sys_reg_desc *target_specific; + size_t num; + + target_specific = get_target_table(vcpu->arch.target, false, &num); + return kvm_handle_cp_32(vcpu, + cp15_regs, ARRAY_SIZE(cp15_regs), + target_specific, num); +} + +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return kvm_handle_cp_64(vcpu, + cp14_regs, ARRAY_SIZE(cp14_regs), + NULL, 0); +} + +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return kvm_handle_cp_32(vcpu, + cp14_regs, ARRAY_SIZE(cp14_regs), + NULL, 0); +} + static int emulate_sys_reg(struct kvm_vcpu *vcpu, const struct sys_reg_params *params) { From 693292e6a93a775a91421b16f87f632e5d7f077b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 14:11:48 +0100 Subject: [PATCH 220/287] arm64: KVM: use separate tables for AArch32 32 and 64bit traps An interesting "feature" of the CP14 encoding is that there is an overlap between 32 and 64bit registers, meaning they cannot live in the same table as we did for CP15. Create separate tables for 64bit CP14 and CP15 registers, and let the top level handler use the right one. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit a9866ba0cddfc497335fa02a175c4578b96722ff) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 266afd972ad3..499a351fd1b9 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -498,13 +498,16 @@ static const struct sys_reg_desc sys_reg_descs[] = { static const struct sys_reg_desc cp14_regs[] = { }; +/* Trapped cp14 64bit registers */ +static const struct sys_reg_desc cp14_64_regs[] = { +}; + /* * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, * depending on the way they are accessed (as a 32bit or a 64bit * register). */ static const struct sys_reg_desc cp15_regs[] = { - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, @@ -545,6 +548,10 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, +}; + +static const struct sys_reg_desc cp15_64_regs[] = { + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, }; @@ -770,7 +777,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) target_specific = get_target_table(vcpu->arch.target, false, &num); return kvm_handle_cp_64(vcpu, - cp15_regs, ARRAY_SIZE(cp15_regs), + cp15_64_regs, ARRAY_SIZE(cp15_64_regs), target_specific, num); } @@ -788,7 +795,7 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { return kvm_handle_cp_64(vcpu, - cp14_regs, ARRAY_SIZE(cp14_regs), + cp14_64_regs, ARRAY_SIZE(cp14_64_regs), NULL, 0); } From 2c4bc5c6442250ebe7a1b08ed297fc48aef99547 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 May 2014 13:43:39 +0100 Subject: [PATCH 221/287] arm64: KVM: check ordering of all system register tables We now have multiple tables for the various system registers we trap. Make sure we check the order of all of them, as it is critical that we get the order right (been there, done that...). Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit e6a9551760623d1703487e8a16bb9c3ea8a7e7a8) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 499a351fd1b9..8ab47c7326ed 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1308,14 +1308,32 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } +static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n) +{ + unsigned int i; + + for (i = 1; i < n; i++) { + if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) { + kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); + return 1; + } + } + + return 0; +} + void kvm_sys_reg_table_init(void) { unsigned int i; struct sys_reg_desc clidr; /* Make sure tables are unique and in order. */ - for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++) - BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0); + BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs))); + BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs))); + BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs))); + BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); + BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs))); + BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs))); /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) From c85f50a20b43754698af82b27279ca87cb04bd3a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:31:37 +0100 Subject: [PATCH 222/287] arm64: KVM: add trap handlers for AArch32 debug registers Add handlers for all the AArch32 debug registers that are accessible from EL0 or EL1. The code follow the same strategy as the AArch64 counterpart with regards to tracking the dirty state of the debug registers. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit bdfb4b389c8d8f07e2d5b8e1291e01c789ba4aad) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 9 ++ arch/arm64/kvm/sys_regs.c | 144 ++++++++++++++++++++++++++++++- 2 files changed, 151 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 69027ded5006..483842180f8f 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -95,6 +95,15 @@ #define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ + +#define cp14_DBGDSCRext (MDSCR_EL1 * 2) +#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) +#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) +#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) +#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) +#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) +#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) + #define NR_COPRO_REGS (NR_SYS_REGS * 2) #define ARM_EXCEPTION_IRQ 0 diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8ab47c7326ed..a4fd5267c65b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -494,12 +494,153 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_val, FPEXC32_EL2, 0x70 }, }; -/* Trapped cp14 registers */ +static bool trap_dbgidr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + u64 dfr = read_cpuid(ID_AA64DFR0_EL1); + u64 pfr = read_cpuid(ID_AA64PFR0_EL1); + u32 el3 = !!((pfr >> 12) & 0xf); + + *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) | + (((dfr >> 12) & 0xf) << 24) | + (((dfr >> 28) & 0xf) << 20) | + (6 << 16) | (el3 << 14) | (el3 << 12)); + return true; + } +} + +static bool trap_debug32(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg); + } + + return true; +} + +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \ + NULL, (cp14_DBGBVR0 + (n) * 2) }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \ + NULL, (cp14_DBGBCR0 + (n) * 2) }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \ + NULL, (cp14_DBGWVR0 + (n) * 2) }, \ + /* DBGWCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \ + NULL, (cp14_DBGWCR0 + (n) * 2) } + +#define DBGBXVR(n) \ + { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \ + NULL, cp14_DBGBXVR0 + n * 2 } + +/* + * Trapped cp14 registers. We generally ignore most of the external + * debug, on the principle that they don't really make sense to a + * guest. Revisit this one day, whould this principle change. + */ static const struct sys_reg_desc cp14_regs[] = { + /* DBGIDR */ + { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr }, + /* DBGDTRRXext */ + { Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi }, + + DBG_BCR_BVR_WCR_WVR(0), + /* DBGDSCRint */ + { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(1), + /* DBGDCCINT */ + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + /* DBGDSCRext */ + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + DBG_BCR_BVR_WCR_WVR(2), + /* DBGDTR[RT]Xint */ + { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, + /* DBGDTR[RT]Xext */ + { Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(3), + DBG_BCR_BVR_WCR_WVR(4), + DBG_BCR_BVR_WCR_WVR(5), + /* DBGWFAR */ + { Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi }, + /* DBGOSECCR */ + { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(6), + /* DBGVCR */ + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + DBG_BCR_BVR_WCR_WVR(7), + DBG_BCR_BVR_WCR_WVR(8), + DBG_BCR_BVR_WCR_WVR(9), + DBG_BCR_BVR_WCR_WVR(10), + DBG_BCR_BVR_WCR_WVR(11), + DBG_BCR_BVR_WCR_WVR(12), + DBG_BCR_BVR_WCR_WVR(13), + DBG_BCR_BVR_WCR_WVR(14), + DBG_BCR_BVR_WCR_WVR(15), + + /* DBGDRAR (32bit) */ + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi }, + + DBGBXVR(0), + /* DBGOSLAR */ + { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi }, + DBGBXVR(1), + /* DBGOSLSR */ + { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 }, + DBGBXVR(2), + DBGBXVR(3), + /* DBGOSDLR */ + { Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi }, + DBGBXVR(4), + /* DBGPRCR */ + { Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi }, + DBGBXVR(5), + DBGBXVR(6), + DBGBXVR(7), + DBGBXVR(8), + DBGBXVR(9), + DBGBXVR(10), + DBGBXVR(11), + DBGBXVR(12), + DBGBXVR(13), + DBGBXVR(14), + DBGBXVR(15), + + /* DBGDSAR (32bit) */ + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi }, + + /* DBGDEVID2 */ + { Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi }, + /* DBGDEVID1 */ + { Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi }, + /* DBGDEVID */ + { Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi }, + /* DBGCLAIMSET */ + { Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi }, + /* DBGCLAIMCLR */ + { Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi }, + /* DBGAUTHSTATUS */ + { Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 }, }; /* Trapped cp14 64bit registers */ static const struct sys_reg_desc cp14_64_regs[] = { + /* DBGDRAR (64bit) */ + { Op1( 0), CRm( 1), .access = trap_raz_wi }, + + /* DBGDSAR (64bit) */ + { Op1( 0), CRm( 2), .access = trap_raz_wi }, }; /* @@ -547,7 +688,6 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, - }; static const struct sys_reg_desc cp15_64_regs[] = { From 44af263503e0e31b1f88ce86df73e467f18e3198 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 May 2014 13:44:49 +0100 Subject: [PATCH 223/287] arm64: KVM: implement lazy world switch for debug registers Implement switching of the debug registers. While the number of registers is massive, CPUs usually don't implement them all (A57 has 6 breakpoints and 4 watchpoints, which gives us a total of 22 registers "only"). Also, we only save/restore them when MDSCR_EL1 has debug enabled, or when we've flagged the debug registers as dirty. It means that most of the time, we only save/restore MDSCR_EL1. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit b0e626b380872b663918230fafdac128c34fea56) Signed-off-by: Christoffer Dall --- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/hyp.S | 463 +++++++++++++++++++++++++++++++- 2 files changed, 458 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 65ebb2ccde5f..825d76c21d84 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -118,6 +118,7 @@ int main(void) DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 7874e022d077..100494b5c7d4 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,7 @@ mrs x22, amair_el1 mrs x23, cntkctl_el1 mrs x24, par_el1 + mrs x25, mdscr_el1 stp x4, x5, [x3] stp x6, x7, [x3, #16] @@ -222,7 +224,202 @@ stp x18, x19, [x3, #112] stp x20, x21, [x3, #128] stp x22, x23, [x3, #144] - str x24, [x3, #160] + stp x24, x25, [x3, #160] +.endm + +.macro save_debug + // x2: base address for cpu context + // x3: tmp register + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + mrs x20, dbgbcr15_el1 + mrs x19, dbgbcr14_el1 + mrs x18, dbgbcr13_el1 + mrs x17, dbgbcr12_el1 + mrs x16, dbgbcr11_el1 + mrs x15, dbgbcr10_el1 + mrs x14, dbgbcr9_el1 + mrs x13, dbgbcr8_el1 + mrs x12, dbgbcr7_el1 + mrs x11, dbgbcr6_el1 + mrs x10, dbgbcr5_el1 + mrs x9, dbgbcr4_el1 + mrs x8, dbgbcr3_el1 + mrs x7, dbgbcr2_el1 + mrs x6, dbgbcr1_el1 + mrs x5, dbgbcr0_el1 + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + mrs x20, dbgbvr15_el1 + mrs x19, dbgbvr14_el1 + mrs x18, dbgbvr13_el1 + mrs x17, dbgbvr12_el1 + mrs x16, dbgbvr11_el1 + mrs x15, dbgbvr10_el1 + mrs x14, dbgbvr9_el1 + mrs x13, dbgbvr8_el1 + mrs x12, dbgbvr7_el1 + mrs x11, dbgbvr6_el1 + mrs x10, dbgbvr5_el1 + mrs x9, dbgbvr4_el1 + mrs x8, dbgbvr3_el1 + mrs x7, dbgbvr2_el1 + mrs x6, dbgbvr1_el1 + mrs x5, dbgbvr0_el1 + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + mrs x20, dbgwcr15_el1 + mrs x19, dbgwcr14_el1 + mrs x18, dbgwcr13_el1 + mrs x17, dbgwcr12_el1 + mrs x16, dbgwcr11_el1 + mrs x15, dbgwcr10_el1 + mrs x14, dbgwcr9_el1 + mrs x13, dbgwcr8_el1 + mrs x12, dbgwcr7_el1 + mrs x11, dbgwcr6_el1 + mrs x10, dbgwcr5_el1 + mrs x9, dbgwcr4_el1 + mrs x8, dbgwcr3_el1 + mrs x7, dbgwcr2_el1 + mrs x6, dbgwcr1_el1 + mrs x5, dbgwcr0_el1 + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + mrs x20, dbgwvr15_el1 + mrs x19, dbgwvr14_el1 + mrs x18, dbgwvr13_el1 + mrs x17, dbgwvr12_el1 + mrs x16, dbgwvr11_el1 + mrs x15, dbgwvr10_el1 + mrs x14, dbgwvr9_el1 + mrs x13, dbgwvr8_el1 + mrs x12, dbgwvr7_el1 + mrs x11, dbgwvr6_el1 + mrs x10, dbgwvr5_el1 + mrs x9, dbgwvr4_el1 + mrs x8, dbgwvr3_el1 + mrs x7, dbgwvr2_el1 + mrs x6, dbgwvr1_el1 + mrs x5, dbgwvr0_el1 + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + mrs x21, mdccint_el1 + str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] .endm .macro restore_sysregs @@ -241,7 +438,7 @@ ldp x18, x19, [x3, #112] ldp x20, x21, [x3, #128] ldp x22, x23, [x3, #144] - ldr x24, [x3, #160] + ldp x24, x25, [x3, #160] msr vmpidr_el2, x4 msr csselr_el1, x5 @@ -264,6 +461,198 @@ msr amair_el1, x22 msr cntkctl_el1, x23 msr par_el1, x24 + msr mdscr_el1, x25 +.endm + +.macro restore_debug + // x2: base address for cpu context + // x3: tmp register + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + msr dbgbcr15_el1, x20 + msr dbgbcr14_el1, x19 + msr dbgbcr13_el1, x18 + msr dbgbcr12_el1, x17 + msr dbgbcr11_el1, x16 + msr dbgbcr10_el1, x15 + msr dbgbcr9_el1, x14 + msr dbgbcr8_el1, x13 + msr dbgbcr7_el1, x12 + msr dbgbcr6_el1, x11 + msr dbgbcr5_el1, x10 + msr dbgbcr4_el1, x9 + msr dbgbcr3_el1, x8 + msr dbgbcr2_el1, x7 + msr dbgbcr1_el1, x6 + msr dbgbcr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + msr dbgbvr15_el1, x20 + msr dbgbvr14_el1, x19 + msr dbgbvr13_el1, x18 + msr dbgbvr12_el1, x17 + msr dbgbvr11_el1, x16 + msr dbgbvr10_el1, x15 + msr dbgbvr9_el1, x14 + msr dbgbvr8_el1, x13 + msr dbgbvr7_el1, x12 + msr dbgbvr6_el1, x11 + msr dbgbvr5_el1, x10 + msr dbgbvr4_el1, x9 + msr dbgbvr3_el1, x8 + msr dbgbvr2_el1, x7 + msr dbgbvr1_el1, x6 + msr dbgbvr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + msr dbgwcr15_el1, x20 + msr dbgwcr14_el1, x19 + msr dbgwcr13_el1, x18 + msr dbgwcr12_el1, x17 + msr dbgwcr11_el1, x16 + msr dbgwcr10_el1, x15 + msr dbgwcr9_el1, x14 + msr dbgwcr8_el1, x13 + msr dbgwcr7_el1, x12 + msr dbgwcr6_el1, x11 + msr dbgwcr5_el1, x10 + msr dbgwcr4_el1, x9 + msr dbgwcr3_el1, x8 + msr dbgwcr2_el1, x7 + msr dbgwcr1_el1, x6 + msr dbgwcr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + msr dbgwvr15_el1, x20 + msr dbgwvr14_el1, x19 + msr dbgwvr13_el1, x18 + msr dbgwvr12_el1, x17 + msr dbgwvr11_el1, x16 + msr dbgwvr10_el1, x15 + msr dbgwvr9_el1, x14 + msr dbgwvr8_el1, x13 + msr dbgwvr7_el1, x12 + msr dbgwvr6_el1, x11 + msr dbgwvr5_el1, x10 + msr dbgwvr4_el1, x9 + msr dbgwvr3_el1, x8 + msr dbgwvr2_el1, x7 + msr dbgwvr1_el1, x6 + msr dbgwvr0_el1, x5 + + ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] + msr mdccint_el1, x21 .endm .macro skip_32bit_state tmp, target @@ -278,6 +667,35 @@ tbz \tmp, #12, \target .endm +.macro skip_debug_state tmp, target + ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] + tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target +.endm + +.macro compute_debug_state target + // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY + // is set, we do a full save/restore cycle and disable trapping. + add x25, x0, #VCPU_CONTEXT + + // Check the state of MDSCR_EL1 + ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)] + and x26, x25, #DBG_MDSCR_KDE + and x25, x25, #DBG_MDSCR_MDE + adds xzr, x25, x26 + b.eq 9998f // Nothing to see there + + // If any interesting bits was set, we must set the flag + mov x26, #KVM_ARM64_DEBUG_DIRTY + str x26, [x0, #VCPU_DEBUG_FLAGS] + b 9999f // Don't skip restore + +9998: + // Otherwise load the flags from memory in case we recently + // trapped + skip_debug_state x25, \target +9999: +.endm + .macro save_guest_32bit_state skip_32bit_state x3, 1f @@ -293,10 +711,13 @@ mrs x4, dacr32_el2 mrs x5, ifsr32_el2 mrs x6, fpexc32_el2 - mrs x7, dbgvcr32_el2 stp x4, x5, [x3] - stp x6, x7, [x3, #16] + str x6, [x3, #16] + skip_debug_state x8, 2f + mrs x7, dbgvcr32_el2 + str x7, [x3, #24] +2: skip_tee_state x8, 1f add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) @@ -319,12 +740,15 @@ add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] + ldr x6, [x3, #16] msr dacr32_el2, x4 msr ifsr32_el2, x5 msr fpexc32_el2, x6 - msr dbgvcr32_el2, x7 + skip_debug_state x8, 2f + ldr x7, [x3, #24] + msr dbgvcr32_el2, x7 +2: skip_tee_state x8, 1f add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) @@ -463,6 +887,14 @@ __restore_sysregs: restore_sysregs ret +__save_debug: + save_debug + ret + +__restore_debug: + restore_debug + ret + __save_fpsimd: save_fpsimd ret @@ -494,6 +926,9 @@ ENTRY(__kvm_vcpu_run) bl __save_fpsimd bl __save_sysregs + compute_debug_state 1f + bl __save_debug +1: activate_traps activate_vm @@ -505,6 +940,10 @@ ENTRY(__kvm_vcpu_run) bl __restore_sysregs bl __restore_fpsimd + + skip_debug_state x3, 1f + bl __restore_debug +1: restore_guest_32bit_state restore_guest_regs @@ -521,6 +960,10 @@ __kvm_vcpu_return: save_guest_regs bl __save_fpsimd bl __save_sysregs + + skip_debug_state x3, 1f + bl __save_debug +1: save_guest_32bit_state save_timer_state @@ -535,6 +978,14 @@ __kvm_vcpu_return: bl __restore_sysregs bl __restore_fpsimd + + skip_debug_state x3, 1f + // Clear the dirty flag for the next run, as all the state has + // already been saved. Note that we nuke the whole 64bit word. + // If we ever add more flags, we'll have to be more careful... + str xzr, [x0, #VCPU_DEBUG_FLAGS] + bl __restore_debug +1: restore_host_regs mov x0, x1 From 99999d6ea6ff7b0b02a4ac6abef0d49ec1fd9fcb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:32:03 +0100 Subject: [PATCH 224/287] arm64: KVM: enable trapping of all debug registers Enable trapping of the debug registers, preventing the guests to mess with the host state (and allowing guests to use the debug infrastructure as well). Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit d329de09333aeee127aaf22eb7cee9c2dc4cf475) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 100494b5c7d4..b72aa9f9215c 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -770,6 +770,14 @@ mrs x2, mdcr_el2 and x2, x2, #MDCR_EL2_HPMN_MASK orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) + orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA) + + // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap + // if not dirty. + ldr x3, [x0, #VCPU_DEBUG_FLAGS] + tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f + orr x2, x2, #MDCR_EL2_TDA +1: msr mdcr_el2, x2 .endm From 38ca7b90842e554439e2291803cdb0eb850b4807 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 24 Jun 2014 19:43:15 +0100 Subject: [PATCH 225/287] ARM: make it easier to check the CPU part number correctly Ensure that platform maintainers check the CPU part number in the right manner: the CPU part number is meaningless without also checking the CPU implement(e|o)r (choose your preferred spelling!) Provide an interface which returns both the implementer and part number together, and update the definitions to include the implementer. Mark the old function as being deprecated... indeed, using the old function with the definitions will now always evaluate as false, so people must update their un-merged code to the new function. While this could be avoided by adding new definitions, we'd also have to create new names for them which would be awkward. Acked-by: Nicolas Pitre Signed-off-by: Russell King (cherry picked from commit af040ffc9ba1e079ee4c0748aff64fa3d4716fa5) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/cputype.h | 35 +++++++++++------ arch/arm/include/asm/smp_scu.h | 2 +- arch/arm/kernel/perf_event_cpu.c | 64 ++++++++++++++------------------ arch/arm/kvm/guest.c | 8 +--- 4 files changed, 53 insertions(+), 56 deletions(-) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index dba62cb1ad08..3392fe2d3174 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -43,15 +43,18 @@ #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_INTEL 0x69 -#define ARM_CPU_PART_ARM1136 0xB360 -#define ARM_CPU_PART_ARM1156 0xB560 -#define ARM_CPU_PART_ARM1176 0xB760 -#define ARM_CPU_PART_ARM11MPCORE 0xB020 -#define ARM_CPU_PART_CORTEX_A8 0xC080 -#define ARM_CPU_PART_CORTEX_A9 0xC090 -#define ARM_CPU_PART_CORTEX_A5 0xC050 -#define ARM_CPU_PART_CORTEX_A15 0xC0F0 -#define ARM_CPU_PART_CORTEX_A7 0xC070 +/* ARM implemented processors */ +#define ARM_CPU_PART_ARM1136 0x4100b360 +#define ARM_CPU_PART_ARM1156 0x4100b560 +#define ARM_CPU_PART_ARM1176 0x4100b760 +#define ARM_CPU_PART_ARM11MPCORE 0x4100b020 +#define ARM_CPU_PART_CORTEX_A8 0x4100c080 +#define ARM_CPU_PART_CORTEX_A9 0x4100c090 +#define ARM_CPU_PART_CORTEX_A5 0x4100c050 +#define ARM_CPU_PART_CORTEX_A7 0x4100c070 +#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0 +#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0 +#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0 #define ARM_CPU_XSCALE_ARCH_MASK 0xe000 #define ARM_CPU_XSCALE_ARCH_V1 0x2000 @@ -122,14 +125,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void) return (read_cpuid_id() & 0xFF000000) >> 24; } -static inline unsigned int __attribute_const__ read_cpuid_part_number(void) +/* + * The CPU part number is meaningless without referring to the CPU + * implementer: implementers are free to define their own part numbers + * which are permitted to clash with other implementer part numbers. + */ +static inline unsigned int __attribute_const__ read_cpuid_part(void) +{ + return read_cpuid_id() & 0xff00fff0; +} + +static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void) { return read_cpuid_id() & 0xFFF0; } static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void) { - return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK; + return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK; } static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h index 18d169373612..1a292d8be988 100644 --- a/arch/arm/include/asm/smp_scu.h +++ b/arch/arm/include/asm/smp_scu.h @@ -11,7 +11,7 @@ static inline bool scu_a9_has_base(void) { - return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; } static inline unsigned long scu_a9_get_base(void) diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 1f2740e3dbc0..0e9609657c79 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -201,49 +201,39 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { static int probe_current_pmu(struct arm_pmu *pmu) { int cpu = get_cpu(); - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); int ret = -ENODEV; pr_info("probing PMU on CPU %d\n", cpu); + switch (read_cpuid_part()) { /* ARM Ltd CPUs. */ - if (implementor == ARM_CPU_IMP_ARM) { - switch (part_number) { - case ARM_CPU_PART_ARM1136: - case ARM_CPU_PART_ARM1156: - case ARM_CPU_PART_ARM1176: - ret = armv6pmu_init(pmu); - break; - case ARM_CPU_PART_ARM11MPCORE: - ret = armv6mpcore_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A8: - ret = armv7_a8_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A9: - ret = armv7_a9_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A5: - ret = armv7_a5_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A15: - ret = armv7_a15_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A7: - ret = armv7_a7_pmu_init(pmu); - break; - } - /* Intel CPUs [xscale]. */ - } else if (implementor == ARM_CPU_IMP_INTEL) { - switch (xscale_cpu_arch_version()) { - case ARM_CPU_XSCALE_ARCH_V1: - ret = xscale1pmu_init(pmu); - break; - case ARM_CPU_XSCALE_ARCH_V2: - ret = xscale2pmu_init(pmu); - break; + case ARM_CPU_PART_ARM1136: + case ARM_CPU_PART_ARM1156: + case ARM_CPU_PART_ARM1176: + ret = armv6pmu_init(pmu); + break; + case ARM_CPU_PART_ARM11MPCORE: + ret = armv6mpcore_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A8: + ret = armv7_a8_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A9: + ret = armv7_a9_pmu_init(pmu); + break; + + default: + if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) { + switch (xscale_cpu_arch_version()) { + case ARM_CPU_XSCALE_ARCH_V1: + ret = xscale1pmu_init(pmu); + break; + case ARM_CPU_XSCALE_ARCH_V2: + ret = xscale2pmu_init(pmu); + break; + } } + break; } put_cpu(); diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 986e625b5dbd..813e49258690 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -264,13 +264,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int __attribute_const__ kvm_target_cpu(void) { - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); - - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; - - switch (part_number) { + switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A7: return KVM_ARM_TARGET_CORTEX_A7; case ARM_CPU_PART_CORTEX_A15: From 2d05a876aea8c40db12b9b721be21773aad6057b Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 25 Jul 2014 06:27:03 -0700 Subject: [PATCH 226/287] kvm: Resolve missing-field-initializers warnings Resolve missing-field-initializers warnings seen in W=2 kernel builds by having macros generate more elaborated initializers. That is enough to silence the warnings. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: Paolo Bonzini (cherry picked from commit 25f97ff451a4aab534afc1290af97d23ea0b4fb3) Signed-off-by: Christoffer Dall --- virt/kvm/irq_comm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ced4a542a031..a228ee82bad2 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -323,13 +323,13 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } + .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) #ifdef CONFIG_X86 # define PIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } + .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } # define ROUTING_ENTRY2(irq) \ IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) #else From 7b6de249336851f2fb8515b2f428bc9e56a8dbb1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:27:35 +0200 Subject: [PATCH 227/287] KVM: Rename and add argument to check_extension In preparation to make the check_extension function available to VM scope we add a struct kvm * argument to the function header and rename the function accordingly. It will still be called from the /dev/kvm fd, but with a NULL argument for struct kvm *. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini (cherry picked from commit 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 +- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 6 +++--- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e2c2bfd4da95..0e3d3dc8eea2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -174,7 +174,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; switch (ext) { diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 53f44bee9ebb..b48aa69e35e1 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn) *(int *)rtn = 0; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 750835a4ef70..aacf3e35404f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -300,7 +300,7 @@ void kvm_arch_sync_events(struct kvm *kvm) { } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 54612d0e79dd..9133f869b070 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -127,7 +127,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 805c8e92cf66..46a8c74fd431 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2500,7 +2500,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, return r; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a3c83491a791..1fb1bd4593a5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -578,7 +578,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); -int kvm_dev_ioctl_check_extension(long ext); +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3db56912caed..e3315356b69a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2564,7 +2564,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_dev_ioctl_check_extension_generic(long arg) +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { case KVM_CAP_USER_MEMORY: @@ -2588,7 +2588,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) default: break; } - return kvm_dev_ioctl_check_extension(arg); + return kvm_vm_ioctl_check_extension(kvm, arg); } static long kvm_dev_ioctl(struct file *filp, @@ -2607,7 +2607,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(arg); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension_generic(arg); + r = kvm_vm_ioctl_check_extension_generic(NULL, arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; From c509fab667c6a02b0763a4343c73d134c6a09328 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:33:08 +0200 Subject: [PATCH 228/287] KVM: Allow KVM_CHECK_EXTENSION on the vm fd The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately on PPC some of the capabilities change depending on the way a VM was created. So instead we need a way to expose capabilities as VM ioctl, so that we can see which VM type we're using (HV or PR). To enable this, add the KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini (cherry picked from commit 92b591a4c46b103ebd3fc0d03a084e1efd331253) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 7 ++-- include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 58 +++++++++++++++++-------------- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 8d135672b69a..257a1f1eecc7 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl. 4.4 KVM_CHECK_EXTENSION -Capability: basic +Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl Architectures: all -Type: system ioctl +Type: system ioctl, vm ioctl Parameters: extension identifier (KVM_CAP_*) Returns: 0 if unsupported; 1 (or some other positive integer) if supported @@ -160,6 +160,9 @@ receives an integer that describes the extension availability. Generally 0 means no and 1 means yes, but some extensions may report additional information in the integer return value. +Based on their initialization different VMs may have different capabilities. +It is thus encouraged to use the vm ioctl to query for capabilities (available +with KVM_CAP_CHECK_EXTENSION_VM on the vm fd) 4.5 KVM_GET_VCPU_MMAP_SIZE diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 521e4c0a08ac..1af686a82703 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -679,6 +679,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_EXT_EMUL_CPUID 95 #define KVM_CAP_ARM_PSCI_0_2 102 +#define KVM_CAP_CHECK_EXTENSION_VM 105 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e3315356b69a..1edb15da7acf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2317,6 +2317,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return 0; } +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) +{ + switch (arg) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + case KVM_CAP_SET_BOOT_CPU_ID: +#endif + case KVM_CAP_INTERNAL_ERROR_DATA: +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_CAP_SIGNAL_MSI: +#endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif + case KVM_CAP_CHECK_EXTENSION_VM: + return 1; +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQ_ROUTING: + return KVM_MAX_IRQ_ROUTES; +#endif + default: + break; + } + return kvm_vm_ioctl_check_extension(kvm, arg); +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2480,6 +2508,9 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_CHECK_EXTENSION: + r = kvm_vm_ioctl_check_extension_generic(kvm, arg); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2564,33 +2595,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) -{ - switch (arg) { - case KVM_CAP_USER_MEMORY: - case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: - case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_CAP_SET_BOOT_CPU_ID: -#endif - case KVM_CAP_INTERNAL_ERROR_DATA: -#ifdef CONFIG_HAVE_KVM_MSI - case KVM_CAP_SIGNAL_MSI: -#endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQFD_RESAMPLE: -#endif - return 1; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQ_ROUTING: - return KVM_MAX_IRQ_ROUTES; -#endif - default: - break; - } - return kvm_vm_ioctl_check_extension(kvm, arg); -} - static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { From edff6ae9e04008800e518454cf7fa0efe3023252 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Jul 2014 16:29:12 +0100 Subject: [PATCH 229/287] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x80010000, then guest physical addresses 0x80010000 - 0x8001efff will map host physical region 0x2c020000 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. SBSA recommends that systems alias the 4k GICV across the bounding 64k region, in which case GICV physical could be described as 0x2c020000 in the above scenario. This patch fixes the problem by failing the vgic probe if the physical base address or the size of GICV aren't page-aligned. Note that this generated a warning in dmesg about freeing enabled IRQs, so I had to move the IRQ enabling later in the probe. Cc: Christoffer Dall Cc: Marc Zyngier Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Joel Schopp Cc: Don Dutile Acked-by: Peter Maydell Acked-by: Joel Schopp Acked-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 63afbe7a0ac184ef8485dac4914e87b211b5bfaa) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v2.c | 16 ++++++++++++++++ virt/kvm/arm/vgic.c | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index d6c9c142f813..01124ef3690a 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -231,6 +231,22 @@ int vgic_v2_probe(struct device_node *vgic_node, ret = -ENXIO; goto out_unmap; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out_unmap; + } + + if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { + kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", + (unsigned long long)resource_size(&vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic->vcpu_base = vcpu_res.start; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index ede8f6466c95..73eba793b17f 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1587,11 +1587,11 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - /* Callback into for arch code for setup */ vgic_arch_setup(vgic); + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + return 0; out_free_irq: From 45104cfeec66c1314a2ae5e4cb436a9ace725e59 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Jul 2014 14:16:39 +0100 Subject: [PATCH 230/287] arm64: KVM: GICv3: move system register access to msr_s/mrs_s Commit 72c583951526 (arm64: gicv3: Allow GICv3 compilation with older binutils) changed the way we express the GICv3 system registers, but couldn't change the occurences used by KVM as the code wasn't merged yet. Just fix the accessors. Cc: Will Deacon Cc: Catalin Marinas Cc: Christoffer Dall Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit f4c321eb268e932786c112e0b902ee942d91a336) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/vgic-v3-switch.S | 130 ++++++++++++++++---------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S index 21e68f606a8f..d16046999e06 100644 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -48,11 +48,11 @@ dsb st // Save all interesting registers - mrs x4, ICH_HCR_EL2 - mrs x5, ICH_VMCR_EL2 - mrs x6, ICH_MISR_EL2 - mrs x7, ICH_EISR_EL2 - mrs x8, ICH_ELSR_EL2 + mrs_s x4, ICH_HCR_EL2 + mrs_s x5, ICH_VMCR_EL2 + mrs_s x6, ICH_MISR_EL2 + mrs_s x7, ICH_EISR_EL2 + mrs_s x8, ICH_ELSR_EL2 str w4, [x3, #VGIC_V3_CPU_HCR] str w5, [x3, #VGIC_V3_CPU_VMCR] @@ -60,9 +60,9 @@ str w7, [x3, #VGIC_V3_CPU_EISR] str w8, [x3, #VGIC_V3_CPU_ELRSR] - msr ICH_HCR_EL2, xzr + msr_s ICH_HCR_EL2, xzr - mrs x21, ICH_VTR_EL2 + mrs_s x21, ICH_VTR_EL2 mvn w22, w21 ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 @@ -71,22 +71,22 @@ br x24 1: - mrs x20, ICH_LR15_EL2 - mrs x19, ICH_LR14_EL2 - mrs x18, ICH_LR13_EL2 - mrs x17, ICH_LR12_EL2 - mrs x16, ICH_LR11_EL2 - mrs x15, ICH_LR10_EL2 - mrs x14, ICH_LR9_EL2 - mrs x13, ICH_LR8_EL2 - mrs x12, ICH_LR7_EL2 - mrs x11, ICH_LR6_EL2 - mrs x10, ICH_LR5_EL2 - mrs x9, ICH_LR4_EL2 - mrs x8, ICH_LR3_EL2 - mrs x7, ICH_LR2_EL2 - mrs x6, ICH_LR1_EL2 - mrs x5, ICH_LR0_EL2 + mrs_s x20, ICH_LR15_EL2 + mrs_s x19, ICH_LR14_EL2 + mrs_s x18, ICH_LR13_EL2 + mrs_s x17, ICH_LR12_EL2 + mrs_s x16, ICH_LR11_EL2 + mrs_s x15, ICH_LR10_EL2 + mrs_s x14, ICH_LR9_EL2 + mrs_s x13, ICH_LR8_EL2 + mrs_s x12, ICH_LR7_EL2 + mrs_s x11, ICH_LR6_EL2 + mrs_s x10, ICH_LR5_EL2 + mrs_s x9, ICH_LR4_EL2 + mrs_s x8, ICH_LR3_EL2 + mrs_s x7, ICH_LR2_EL2 + mrs_s x6, ICH_LR1_EL2 + mrs_s x5, ICH_LR0_EL2 adr x24, 1f add x24, x24, x23 @@ -113,34 +113,34 @@ tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits - mrs x20, ICH_AP0R3_EL2 + mrs_s x20, ICH_AP0R3_EL2 str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - mrs x19, ICH_AP0R2_EL2 + mrs_s x19, ICH_AP0R2_EL2 str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] -6: mrs x18, ICH_AP0R1_EL2 +6: mrs_s x18, ICH_AP0R1_EL2 str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] -5: mrs x17, ICH_AP0R0_EL2 +5: mrs_s x17, ICH_AP0R0_EL2 str w17, [x3, #VGIC_V3_CPU_AP0R] tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits - mrs x20, ICH_AP1R3_EL2 + mrs_s x20, ICH_AP1R3_EL2 str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - mrs x19, ICH_AP1R2_EL2 + mrs_s x19, ICH_AP1R2_EL2 str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] -6: mrs x18, ICH_AP1R1_EL2 +6: mrs_s x18, ICH_AP1R1_EL2 str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] -5: mrs x17, ICH_AP1R0_EL2 +5: mrs_s x17, ICH_AP1R0_EL2 str w17, [x3, #VGIC_V3_CPU_AP1R] // Restore SRE_EL1 access and re-enable SRE at EL1. - mrs x5, ICC_SRE_EL2 + mrs_s x5, ICC_SRE_EL2 orr x5, x5, #ICC_SRE_EL2_ENABLE - msr ICC_SRE_EL2, x5 + msr_s ICC_SRE_EL2, x5 isb mov x5, #1 - msr ICC_SRE_EL1, x5 + msr_s ICC_SRE_EL1, x5 .endm /* @@ -150,7 +150,7 @@ .macro restore_vgic_v3_state // Disable SRE_EL1 access. Necessary, otherwise // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens... - msr ICC_SRE_EL1, xzr + msr_s ICC_SRE_EL1, xzr isb // Compute the address of struct vgic_cpu @@ -160,34 +160,34 @@ ldr w4, [x3, #VGIC_V3_CPU_HCR] ldr w5, [x3, #VGIC_V3_CPU_VMCR] - msr ICH_HCR_EL2, x4 - msr ICH_VMCR_EL2, x5 + msr_s ICH_HCR_EL2, x4 + msr_s ICH_VMCR_EL2, x5 - mrs x21, ICH_VTR_EL2 + mrs_s x21, ICH_VTR_EL2 tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - msr ICH_AP1R3_EL2, x20 + msr_s ICH_AP1R3_EL2, x20 ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] - msr ICH_AP1R2_EL2, x19 + msr_s ICH_AP1R2_EL2, x19 6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] - msr ICH_AP1R1_EL2, x18 + msr_s ICH_AP1R1_EL2, x18 5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] - msr ICH_AP1R0_EL2, x17 + msr_s ICH_AP1R0_EL2, x17 tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - msr ICH_AP0R3_EL2, x20 + msr_s ICH_AP0R3_EL2, x20 ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] - msr ICH_AP0R2_EL2, x19 + msr_s ICH_AP0R2_EL2, x19 6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] - msr ICH_AP0R1_EL2, x18 + msr_s ICH_AP0R1_EL2, x18 5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] - msr ICH_AP0R0_EL2, x17 + msr_s ICH_AP0R0_EL2, x17 and w22, w21, #0xf mvn w22, w21 @@ -220,22 +220,22 @@ br x24 1: - msr ICH_LR15_EL2, x20 - msr ICH_LR14_EL2, x19 - msr ICH_LR13_EL2, x18 - msr ICH_LR12_EL2, x17 - msr ICH_LR11_EL2, x16 - msr ICH_LR10_EL2, x15 - msr ICH_LR9_EL2, x14 - msr ICH_LR8_EL2, x13 - msr ICH_LR7_EL2, x12 - msr ICH_LR6_EL2, x11 - msr ICH_LR5_EL2, x10 - msr ICH_LR4_EL2, x9 - msr ICH_LR3_EL2, x8 - msr ICH_LR2_EL2, x7 - msr ICH_LR1_EL2, x6 - msr ICH_LR0_EL2, x5 + msr_s ICH_LR15_EL2, x20 + msr_s ICH_LR14_EL2, x19 + msr_s ICH_LR13_EL2, x18 + msr_s ICH_LR12_EL2, x17 + msr_s ICH_LR11_EL2, x16 + msr_s ICH_LR10_EL2, x15 + msr_s ICH_LR9_EL2, x14 + msr_s ICH_LR8_EL2, x13 + msr_s ICH_LR7_EL2, x12 + msr_s ICH_LR6_EL2, x11 + msr_s ICH_LR5_EL2, x10 + msr_s ICH_LR4_EL2, x9 + msr_s ICH_LR3_EL2, x8 + msr_s ICH_LR2_EL2, x7 + msr_s ICH_LR1_EL2, x6 + msr_s ICH_LR0_EL2, x5 // Ensure that the above will have reached the // (re)distributors. This ensure the guest will read @@ -244,9 +244,9 @@ dsb sy // Prevent the guest from touching the GIC system registers - mrs x5, ICC_SRE_EL2 + mrs_s x5, ICC_SRE_EL2 and x5, x5, #~ICC_SRE_EL2_ENABLE - msr ICC_SRE_EL2, x5 + msr_s ICC_SRE_EL2, x5 .endm ENTRY(__save_vgic_v3_state) @@ -260,7 +260,7 @@ ENTRY(__restore_vgic_v3_state) ENDPROC(__restore_vgic_v3_state) ENTRY(__vgic_v3_get_ich_vtr_el2) - mrs x0, ICH_VTR_EL2 + mrs_s x0, ICH_VTR_EL2 ret ENDPROC(__vgic_v3_get_ich_vtr_el2) From 099fcd90255453a5976a42c5f9421a6a7ad7ce3d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Jul 2014 11:42:18 +0100 Subject: [PATCH 231/287] KVM: arm64: GICv3: mandate page-aligned GICV region Just like GICv2 was fixed in 63afbe7a0ac1 (kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform), mandate the GICV region to be both aligned on a page boundary and its size to be a multiple of page size. This prevents a guest from being able to poke at regions where we have no idea what is sitting there. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit fb3ec67942e92e5713e05b7691b277d0a0c0575d) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v3.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index f01d44685720..1c2c8eef0599 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -215,6 +215,22 @@ int vgic_v3_probe(struct device_node *vgic_node, ret = -ENXIO; goto out; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out; + } + + if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { + kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", + (unsigned long long)resource_size(&vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out; + } + vgic->vcpu_base = vcpu_res.start; vgic->vctrl_base = NULL; vgic->type = VGIC_V3; From dad44b58644918b98d32ed35f28e919e08b0f896 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Aug 2014 12:00:36 +0100 Subject: [PATCH 232/287] arm64: KVM: fix 64bit CP15 VM access for 32bit guests Commit f0a3eaff71b8 (ARM64: KVM: fix big endian issue in access_vm_reg for 32bit guest) changed the way we handle CP15 VM accesses, so that all 64bit accesses are done via vcpu_sys_reg. This looks like a good idea as it solves indianness issues in an elegant way, except for one small detail: the register index is doesn't refer to the same array! We end up corrupting some random data structure instead. Fix this by reverting to the original code, except for the introduction of a vcpu_cp15_64_high macro that deals with the endianness thing. Tested on Juno with 32bit SMP guests. Cc: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit dedf97e8ff2c7513b1370e36b56e08b6bd0f0290) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 6 ++++-- arch/arm64/kvm/sys_regs.c | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 79812be4f25f..e10c45a578e3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -149,9 +149,11 @@ struct kvm_vcpu_arch { #define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) #ifdef CONFIG_CPU_BIG_ENDIAN -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)]) +#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r)) +#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1) #else -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)]) +#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1) +#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r)) #endif struct kvm_vm_stat { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a4fd5267c65b..5805e7c4a4dd 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -135,10 +135,13 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, BUG_ON(!p->is_write); val = *vcpu_reg(vcpu, p->Rt); - if (!p->is_aarch32 || !p->is_32bit) + if (!p->is_aarch32) { vcpu_sys_reg(vcpu, r->reg) = val; - else + } else { + if (!p->is_32bit) + vcpu_cp15_64_high(vcpu, r->reg) = val >> 32; vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; + } return true; } From 74afc9fff6cdb9df6cd6ee43ddd59465f48aed6d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:09 +1000 Subject: [PATCH 233/287] KVM: Don't keep reference to irq routing table in irqfd struct This makes the irqfd code keep a copy of the irq routing table entry for each irqfd, rather than a reference to the copy in the actual irq routing table maintained in kvm/virt/irqchip.c. This will enable us to change the routing table structure in future, or even not have a routing table at all on some platforms. The synchronization that was previously achieved using srcu_dereference on the read side is now achieved using a seqcount_t structure. That ensures that we don't get a halfway-updated copy of the structure if we read it while another thread is updating it. We still use srcu_read_lock/unlock around the read side so that when changing the routing table we can be sure that after calling synchronize_srcu, nothing will be using the old routing. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 56f89f3629ffd1a21d38c3d0bea23deac0e284ce) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b51c19ffd8fd..16bf26183225 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "iodev.h" @@ -75,7 +76,8 @@ struct _irqfd { struct kvm *kvm; wait_queue_t wait; /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry __rcu *irq_entry; + struct kvm_kernel_irq_routing_entry irq_entry; + seqcount_t irq_entry_sc; /* Used for level IRQ fast-path */ int gsi; struct work_struct inject; @@ -223,16 +225,20 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); unsigned long flags = (unsigned long)key; - struct kvm_kernel_irq_routing_entry *irq; + struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; + unsigned seq; int idx; if (flags & POLLIN) { idx = srcu_read_lock(&kvm->irq_srcu); - irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); + do { + seq = read_seqcount_begin(&irqfd->irq_entry_sc); + irq = irqfd->irq_entry; + } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, + if (irq.type == KVM_IRQ_ROUTING_MSI) + kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); else schedule_work(&irqfd->inject); @@ -277,18 +283,20 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, { struct kvm_kernel_irq_routing_entry *e; - if (irqfd->gsi >= irq_rt->nr_rt_entries) { - rcu_assign_pointer(irqfd->irq_entry, NULL); - return; - } + write_seqcount_begin(&irqfd->irq_entry_sc); + + irqfd->irq_entry.type = 0; + if (irqfd->gsi >= irq_rt->nr_rt_entries) + goto out; hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) - rcu_assign_pointer(irqfd->irq_entry, e); - else - rcu_assign_pointer(irqfd->irq_entry, NULL); + irqfd->irq_entry = *e; } + + out: + write_seqcount_end(&irqfd->irq_entry_sc); } static int @@ -310,6 +318,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) INIT_LIST_HEAD(&irqfd->list); INIT_WORK(&irqfd->inject, irqfd_inject); INIT_WORK(&irqfd->shutdown, irqfd_shutdown); + seqcount_init(&irqfd->irq_entry_sc); file = eventfd_fget(args->fd); if (IS_ERR(file)) { @@ -466,14 +475,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { /* - * This rcu_assign_pointer is needed for when + * This clearing of irq_entry.type is needed for when * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock). - * It is paired with synchronize_srcu done by caller - * of that function. */ - rcu_assign_pointer(irqfd->irq_entry, NULL); + write_seqcount_begin(&irqfd->irq_entry_sc); + irqfd->irq_entry.type = 0; + write_seqcount_end(&irqfd->irq_entry_sc); irqfd_deactivate(irqfd); } } From 28bcfc22334c1214d603e5136b627d02f6eadb5e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:10 +1000 Subject: [PATCH 234/287] KVM: irqchip: Provide and use accessors for irq routing table This provides accessor functions for the KVM interrupt mappings, in order to reduce the amount of code that accesses the fields of the kvm_irq_routing_table struct, and restrict that code to one file, virt/kvm/irqchip.c. The new functions are kvm_irq_map_gsi(), which maps from a global interrupt number to a set of IRQ routing entries, and kvm_irq_map_chip_pin, which maps from IRQ chip and pin numbers to a global interrupt number. This also moves the update of kvm_irq_routing_table::chip[][] into irqchip.c, out of the various kvm_set_routing_entry implementations. That means that none of the kvm_set_routing_entry implementations need the kvm_irq_routing_table argument anymore, so this removes it. This does not change any locking or data lifetime rules. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 8ba918d488caded2c4368b0b922eb905fe3bb101) Signed-off-by: Christoffer Dall --- arch/powerpc/kvm/mpic.c | 4 +--- include/linux/kvm_host.h | 8 ++++++-- virt/kvm/eventfd.c | 10 ++++++---- virt/kvm/irq_comm.c | 20 +++++++++---------- virt/kvm/irqchip.c | 42 ++++++++++++++++++++++++++++++++-------- 5 files changed, 56 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 2861ae9eaae6..b58d61039015 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1822,8 +1822,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return 0; } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -1835,7 +1834,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin; if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1fb1bd4593a5..f91b5cda1d28 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -709,6 +709,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); +int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, + struct kvm_irq_routing_table *irq_rt, int gsi); +int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, + unsigned irqchip, unsigned pin); + int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); @@ -898,8 +903,7 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 16bf26183225..a75227f7d487 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -282,20 +282,22 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, struct kvm_irq_routing_table *irq_rt) { struct kvm_kernel_irq_routing_entry *e; + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + int i, n_entries; + + n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); irqfd->irq_entry.type = 0; - if (irqfd->gsi >= irq_rt->nr_rt_entries) - goto out; - hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { + e = entries; + for (i = 0; i < n_entries; ++i, ++e) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) irqfd->irq_entry = *e; } - out: write_seqcount_end(&irqfd->irq_entry_sc); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a228ee82bad2..175844593243 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -160,6 +160,7 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, */ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) { + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; @@ -177,14 +178,13 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) { - if (likely(e->type == KVM_IRQ_ROUTING_MSI)) - ret = kvm_set_msi_inatomic(e, kvm); - else - ret = -EWOULDBLOCK; - break; - } + if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + e = &entries[0]; + if (likely(e->type == KVM_IRQ_ROUTING_MSI)) + ret = kvm_set_msi_inatomic(e, kvm); + else + ret = -EWOULDBLOCK; + } srcu_read_unlock(&kvm->irq_srcu, idx); return ret; } @@ -272,8 +272,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -304,7 +303,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin + delta; if (e->irqchip.pin >= max_pin) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b43c275775cd..f4648dd94888 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,13 +31,37 @@ #include #include "irq.h" +int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, + struct kvm_irq_routing_table *irq_rt, int gsi) +{ + struct kvm_kernel_irq_routing_entry *e; + int n = 0; + + if (gsi < irq_rt->nr_rt_entries) { + hlist_for_each_entry(e, &irq_rt->map[gsi], link) { + entries[n] = *e; + ++n; + } + } + + return n; +} + +int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, + unsigned irqchip, unsigned pin) +{ + return irq_rt->chip[irqchip][pin]; +} + bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -54,13 +78,15 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -115,8 +141,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0, idx; + struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i, idx; struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -127,9 +153,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) - irq_set[i++] = *e; + i = kvm_irq_map_gsi(irq_set, irq_rt, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -171,9 +195,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, e->gsi = ue->gsi; e->type = ue->type; - r = kvm_set_routing_entry(rt, e, ue); + r = kvm_set_routing_entry(e, ue); if (r) goto out; + if (e->type == KVM_IRQ_ROUTING_IRQCHIP) + rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; From 7563524573fb8d7e723cec38329edf1b3968593c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:11 +1000 Subject: [PATCH 235/287] KVM: Move all accesses to kvm::irq_routing into irqchip.c Now that struct _irqfd does not keep a reference to storage pointed to by the irq_routing field of struct kvm, we can move the statement that updates it out from under the irqfds.lock and put it in kvm_set_irq_routing() instead. That means we then have to take a srcu_read_lock on kvm->irq_srcu around the irqfd_update call in kvm_irqfd_assign(), since holding the kvm->irqfds.lock no longer ensures that that the routing can't change. Combined with changing kvm_irq_map_gsi() and kvm_irq_map_chip_pin() to take a struct kvm * argument instead of the pointer to the routing table, this allows us to to move all references to kvm->irq_routing into irqchip.c. That in turn allows us to move the definition of the kvm_irq_routing_table struct into irqchip.c as well. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 9957c86d659a4d5a2bed25ccbd3bfc9c3f25e658) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 35 +++++++---------------------------- virt/kvm/eventfd.c | 22 +++++++++------------- virt/kvm/irq_comm.c | 6 ++---- virt/kvm/irqchip.c | 39 +++++++++++++++++++++++++-------------- 4 files changed, 43 insertions(+), 59 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f91b5cda1d28..65b6b8d793b5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -301,24 +301,7 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - -struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; - struct kvm_kernel_irq_routing_entry *rt_entries; - u32 nr_rt_entries; - /* - * Array indexed by gsi. Each entry contains list of irq chips - * the gsi is connected to. - */ - struct hlist_head map[0]; -}; - -#else - -struct kvm_irq_routing_table {}; - -#endif +struct kvm_irq_routing_table; #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 @@ -377,8 +360,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP /* - * Update side is protected by irq_lock and, - * if configured, irqfds.lock. + * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; @@ -709,10 +691,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); -int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, - struct kvm_irq_routing_table *irq_rt, int gsi); -int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, - unsigned irqchip, unsigned pin); +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi); +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); @@ -923,7 +904,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); -void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *); +void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { @@ -945,10 +926,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) static inline void kvm_irqfd_release(struct kvm *kvm) {} #ifdef CONFIG_HAVE_KVM_IRQCHIP -static inline void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +static inline void kvm_irq_routing_update(struct kvm *kvm) { - rcu_assign_pointer(kvm->irq_routing, irq_rt); } #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a75227f7d487..71133644f465 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -278,14 +278,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, - struct kvm_irq_routing_table *irq_rt) +static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; int i, n_entries; - n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); + n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); @@ -304,12 +303,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct kvm_irq_routing_table *irq_rt; struct _irqfd *irqfd, *tmp; struct file *file = NULL; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; unsigned int events; + int idx; irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) @@ -403,9 +402,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) goto fail; } - irq_rt = rcu_dereference_protected(kvm->irq_routing, - lockdep_is_held(&kvm->irqfds.lock)); - irqfd_update(kvm, irqfd, irq_rt); + idx = srcu_read_lock(&kvm->irq_srcu); + irqfd_update(kvm, irqfd); + srcu_read_unlock(&kvm->irq_srcu, idx); events = file->f_op->poll(file, &irqfd->pt); @@ -539,20 +538,17 @@ kvm_irqfd_release(struct kvm *kvm) } /* - * Change irq_routing and irqfd. + * Take note of a change in irq routing. * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. */ -void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +void kvm_irq_routing_update(struct kvm *kvm) { struct _irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); - rcu_assign_pointer(kvm->irq_routing, irq_rt); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) - irqfd_update(kvm, irqfd, irq_rt); + irqfd_update(kvm, irqfd); spin_unlock_irq(&kvm->irqfds.lock); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 175844593243..963b8995a9e8 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -163,7 +163,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; - struct kvm_irq_routing_table *irq_rt; int idx; trace_kvm_set_irq(irq, level, irq_source_id); @@ -177,8 +176,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) * which is limited to 1:1 GSI mapping. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { e = &entries[0]; if (likely(e->type == KVM_IRQ_ROUTING_MSI)) ret = kvm_set_msi_inatomic(e, kvm); @@ -264,7 +262,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, int idx, gsi; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index f4648dd94888..04faac50cef5 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,12 +31,26 @@ #include #include "irq.h" -int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, - struct kvm_irq_routing_table *irq_rt, int gsi) +struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; + struct kvm_kernel_irq_routing_entry *rt_entries; + u32 nr_rt_entries; + /* + * Array indexed by gsi. Each entry contains list of irq chips + * the gsi is connected to. + */ + struct hlist_head map[0]; +}; + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi) { + struct kvm_irq_routing_table *irq_rt; struct kvm_kernel_irq_routing_entry *e; int n = 0; + irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, + lockdep_is_held(&kvm->irq_lock)); if (gsi < irq_rt->nr_rt_entries) { hlist_for_each_entry(e, &irq_rt->map[gsi], link) { entries[n] = *e; @@ -47,21 +61,21 @@ int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, return n; } -int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, - unsigned irqchip, unsigned pin) +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; + + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); return irq_rt->chip[irqchip][pin]; } bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -78,15 +92,13 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -143,7 +155,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; int ret = -1, i, idx; - struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -152,8 +163,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, * writes to the unused one. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - i = kvm_irq_map_gsi(irq_set, irq_rt, irq); + i = kvm_irq_map_gsi(kvm, irq_set, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -250,7 +260,8 @@ int kvm_set_irq_routing(struct kvm *kvm, mutex_lock(&kvm->irq_lock); old = kvm->irq_routing; - kvm_irq_routing_update(kvm, new); + rcu_assign_pointer(kvm->irq_routing, new); + kvm_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); synchronize_srcu_expedited(&kvm->irq_srcu); From c905880e253b8e2f77db26dcb647a1a3bb359b11 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:12 +1000 Subject: [PATCH 236/287] KVM: Move irq notifier implementation into eventfd.c This moves the functions kvm_irq_has_notifier(), kvm_notify_acked_irq(), kvm_register_irq_ack_notifier() and kvm_unregister_irq_ack_notifier() from irqchip.c to eventfd.c. The reason for doing this is that those functions are used in connection with IRQFDs, which are implemented in eventfd.c. In future we will want to use IRQFDs on platforms that don't implement the GSI routing implemented in irqchip.c, so we won't be compiling in irqchip.c, but we still need the irq notifiers. The implementation is unchanged. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit e4d57e1ee1ab59f0cef0272800ac6c52e0ec814a) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/irqchip.c | 61 -------------------------------------------- 2 files changed, 63 insertions(+), 61 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 71133644f465..d2b1653c97dd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -34,7 +34,9 @@ #include #include #include +#include +#include "irq.h" #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING @@ -830,3 +832,64 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return kvm_assign_ioeventfd(kvm, args); } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 04faac50cef5..7f256f31df10 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -69,67 +69,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) return irq_rt->chip[irqchip][pin]; } -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; From f65b953a7232ae8bb497b226363629f9e49b4792 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:13 +1000 Subject: [PATCH 237/287] KVM: Give IRQFD its own separate enabling Kconfig option Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING. So that we can have the IRQFD code compiled in without having the IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING, and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING also select HAVE_KVM_IRQFD. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 297e21053a52f060944e9f0de4c64fad9bcd72fc) Signed-off-by: Christoffer Dall --- arch/ia64/kvm/Kconfig | 1 + arch/powerpc/kvm/Kconfig | 1 + arch/x86/kvm/Kconfig | 1 + include/linux/kvm_host.h | 8 ++++---- virt/kvm/Kconfig | 3 +++ virt/kvm/eventfd.c | 6 +++--- virt/kvm/kvm_main.c | 2 +- 7 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 990b86420cc6..3d50ea955c4c 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -25,6 +25,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select KVM_APIC_ARCHITECTURE select KVM_MMIO diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index eb643f862579..60019a6fd6bb 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -155,6 +155,7 @@ config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI help diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b89c5db2b832..bdccfb62aa0d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,6 +27,7 @@ config KVM select MMU_NOTIFIER select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 65b6b8d793b5..35319fe693f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -413,7 +413,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else @@ -888,20 +888,20 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); - #else static inline void kvm_free_irq_routing(struct kvm *kvm) {} #endif +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); + #ifdef CONFIG_HAVE_KVM_EVENTFD void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); -#ifdef CONFIG_HAVE_KVM_IRQCHIP +#ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); void kvm_irq_routing_update(struct kvm *); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 13f2d19793e3..fc0c5e603eb4 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM config HAVE_KVM_IRQCHIP bool +config HAVE_KVM_IRQFD + bool + config HAVE_KVM_IRQ_ROUTING bool diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index d2b1653c97dd..8be5b6545770 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -39,7 +39,7 @@ #include "irq.h" #include "iodev.h" -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -450,7 +450,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) void kvm_eventfd_init(struct kvm *kvm) { -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -459,7 +459,7 @@ kvm_eventfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * shutdown any irqfd's that match fd+gsi */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1edb15da7acf..848a6afab165 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2330,7 +2330,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD case KVM_CAP_IRQFD_RESAMPLE: #endif case KVM_CAP_CHECK_EXTENSION_VM: From 57f984d2c64c8c1e597d021567a379f4f3920b35 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 6 Aug 2014 14:24:45 +0200 Subject: [PATCH 238/287] KVM: Move more code under CONFIG_HAVE_KVM_IRQFD Commits e4d57e1ee1ab (KVM: Move irq notifier implementation into eventfd.c, 2014-06-30) included the irq notifier code unconditionally in eventfd.c, while it was under CONFIG_HAVE_KVM_IRQCHIP before. Similarly, commit 297e21053a52 (KVM: Give IRQFD its own separate enabling Kconfig option, 2014-06-30) moved code from CONFIG_HAVE_IRQ_ROUTING to CONFIG_HAVE_KVM_IRQFD but forgot to move the pieces that used to be under CONFIG_HAVE_KVM_IRQCHIP. Together, this broke compilation without CONFIG_KVM_XICS. Fix by adding or changing the #ifdefs so that they point at CONFIG_HAVE_KVM_IRQFD. Signed-off-by: Paolo Bonzini (cherry picked from commit c77dcacb397519b6ade8f08201a4a90a7f4f751e) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 + include/trace/events/kvm.h | 8 +-- virt/kvm/eventfd.c | 122 ++++++++++++++++++------------------- virt/kvm/kvm_main.c | 2 + 4 files changed, 69 insertions(+), 65 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 35319fe693f8..75d911ca47bd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -364,6 +364,8 @@ struct kvm { */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; +#endif +#ifdef CONFIG_HAVE_KVM_IRQFD struct hlist_head irq_ack_notifier_list; #endif diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 131a0bda7aec..908925ace776 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit, __entry->errno < 0 ? -__entry->errno : __entry->reason) ); -#if defined(CONFIG_HAVE_KVM_IRQCHIP) +#if defined(CONFIG_HAVE_KVM_IRQFD) TRACE_EVENT(kvm_set_irq, TP_PROTO(unsigned int gsi, int level, int irq_source_id), TP_ARGS(gsi, level, irq_source_id), @@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq, TP_printk("gsi %u level %d source %d", __entry->gsi, __entry->level, __entry->irq_source_id) ); -#endif +#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */ #if defined(__KVM_HAVE_IOAPIC) #define kvm_deliver_mode \ @@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq, #endif /* defined(__KVM_HAVE_IOAPIC) */ -#if defined(CONFIG_HAVE_KVM_IRQCHIP) +#if defined(CONFIG_HAVE_KVM_IRQFD) TRACE_EVENT(kvm_ack_irq, TP_PROTO(unsigned int irqchip, unsigned int pin), @@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq, #endif ); -#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ +#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 8be5b6545770..67563284f7b9 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -445,6 +445,67 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) kfree(irqfd); return ret; } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} #endif void @@ -832,64 +893,3 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return kvm_assign_ioeventfd(kvm, args); } - -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 848a6afab165..50f947301fa7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -464,6 +464,8 @@ static struct kvm *kvm_create_vm(unsigned long type) #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); +#endif +#ifdef CONFIG_HAVE_KVM_IRQFD INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif From 7fdbe25673a0bcd4c38968ec2cd048a21f1fbb93 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 19 Aug 2014 16:45:56 +0200 Subject: [PATCH 239/287] KVM: avoid unnecessary synchronize_rcu We dont have to wait for a grace period if there is no oldpid that we are going to free. putpid also checks for NULL, so this patch only fences synchronize_rcu. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 7103f60de8bed21a0ad5d15d2ad5b7a333dda201) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 50f947301fa7..aa5057e7a1b9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu) struct pid *oldpid = vcpu->pid; struct pid *newpid = get_task_pid(current, PIDTYPE_PID); rcu_assign_pointer(vcpu->pid, newpid); - synchronize_rcu(); + if (oldpid) + synchronize_rcu(); put_pid(oldpid); } cpu = get_cpu(); From ae38cfa961f9605be43060001ccc41d39427cb64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 21 Aug 2014 18:08:05 +0200 Subject: [PATCH 240/287] KVM: add kvm_arch_sched_in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce preempt notifiers for architecture specific code. Advantage over creating a new notifier in every arch is slightly simpler code and guaranteed call order with respect to kvm_sched_in. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini (cherry picked from commit e790d9ef6405633b007339d746b709aed43a928d) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 2 ++ 6 files changed, 20 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 0e3d3dc8eea2..d55786c579bf 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index aacf3e35404f..e4c719d49e16 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -533,6 +533,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_subarch_vcpu_uninit(vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_BOOKE diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9133f869b070..46392afd043d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -325,6 +325,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) /* Nothing todo */ } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_regs(&vcpu->arch.host_fpregs); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 46a8c74fd431..a348daad61fd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6809,6 +6809,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) static_key_slow_dec(&kvm_no_apic_vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { if (type) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 75d911ca47bd..4c12f314aab2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -600,6 +600,8 @@ void kvm_arch_exit(void); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aa5057e7a1b9..2659da911d99 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3063,6 +3063,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) if (vcpu->preempted) vcpu->preempted = false; + kvm_arch_sched_in(vcpu, cpu); + kvm_arch_vcpu_load(vcpu, cpu); } From 7ec68cffdc4807a95ae1db300bf857084f0c94da Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Aug 2014 12:15:00 +0200 Subject: [PATCH 241/287] KVM: Introduce gfn_to_hva_memslot_prot To support read-only memory regions on arm and arm64, we have a need to resolve a gfn to an hva given a pointer to a memslot to avoid looping through the memslots twice and to reuse the hva error checking of gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and refactor gfn_to_hva_prot() to use this function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4c12f314aab2..59d50379c2e4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -504,6 +504,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, + bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2659da911d99..84cee09da140 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1076,9 +1076,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); * If writable is set to false, the hva returned by this function is only * allowed to be read. */ -unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, + gfn_t gfn, bool *writable) { - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); if (!kvm_is_error_hva(hva) && writable) @@ -1087,6 +1087,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) return hva; } +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +{ + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + + return gfn_to_hva_memslot_prot(slot, gfn, writable); +} + static int kvm_read_hva(void *data, void __user *hva, int len) { return __copy_from_user(data, hva, len); From 7e7ef8405025cd7af730fce40ace2a9c77ee9970 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Aug 2014 12:18:04 +0200 Subject: [PATCH 242/287] arm/arm64: KVM: Support KVM_CAP_READONLY_MEM When userspace loads code and data in a read-only memory regions, KVM needs to be able to handle this on arm and arm64. Specifically this is used when running code directly from a read-only flash device; the common scenario is a UEFI blob loaded with the -bios option in QEMU. Note that the MMIO exit on writes to a read-only memory is ABI and can be used to emulate block-erase style flash devices. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 98047888bb9fd57734028c44ec17413ddd623958) Signed-off-by: Christoffer Dall --- arch/arm/include/uapi/asm/kvm.h | 1 + arch/arm/kvm/arm.c | 1 + arch/arm/kvm/mmu.c | 22 ++++++++-------------- arch/arm64/include/uapi/asm/kvm.h | 1 + 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index e6ebdd3471e5..51257fda254b 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -25,6 +25,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE +#define __KVM_HAVE_READONLY_MEM #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d55786c579bf..35dc889df3d2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -188,6 +188,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_READONLY_MEM: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16e7994bf347..62f5642153f9 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -747,14 +747,13 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_memory_slot *memslot, + struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) { int ret; bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; - unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; @@ -863,7 +862,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) unsigned long fault_status; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; - bool is_iabt; + unsigned long hva; + bool is_iabt, write_fault, writable; gfn_t gfn; int ret, idx; @@ -884,7 +884,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) idx = srcu_read_lock(&vcpu->kvm->srcu); gfn = fault_ipa >> PAGE_SHIFT; - if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { + memslot = gfn_to_memslot(vcpu->kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); + write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); + if (kvm_is_error_hva(hva) || (write_fault && !writable)) { if (is_iabt) { /* Prefetch Abort on I/O address */ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); @@ -892,13 +895,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - if (fault_status != FSC_FAULT) { - kvm_err("Unsupported fault status on io memory: %#lx\n", - fault_status); - ret = -EFAULT; - goto out_unlock; - } - /* * The IPA is reported as [MAX:12], so we need to * complement it with the bottom 12 bits from the @@ -910,9 +906,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - memslot = gfn_to_memslot(vcpu->kvm, gfn); - - ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; out_unlock: diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index e633ff8cdec8..f4ec5a674d05 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -37,6 +37,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE +#define __KVM_HAVE_READONLY_MEM #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) From 276359920df8032e689497456c0a3c6dcc1b5100 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:20 +0100 Subject: [PATCH 243/287] KVM: ARM/arm64: fix non-const declaration of function returning const Sparse kicks up about a type mismatch for kvm_target_cpu: arch/arm64/kvm/guest.c:271:25: error: symbol 'kvm_target_cpu' redeclared with different type (originally declared at ./arch/arm64/include/asm/kvm_host.h:45) - different modifiers so fix this by adding the missing const attribute to the function declaration. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 6951e48bff0b55d2a8e825a953fc1f8e3a34bf1c) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6dfb404f6c46..fcb12a6f7db5 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -42,7 +42,7 @@ struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); -int kvm_target_cpu(void); +int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_reset_coprocs(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e10c45a578e3..44094e559848 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -42,7 +42,7 @@ #define KVM_VCPU_MAX_FEATURES 3 struct kvm_vcpu; -int kvm_target_cpu(void); +int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); From 0251cb8ae768874a988c39a908fbcc8dfd82429a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:21 +0100 Subject: [PATCH 244/287] KVM: ARM/arm64: fix broken __percpu annotation Running sparse results in a bunch of noisy address space mismatches thanks to the broken __percpu annotation on kvm_get_running_vcpus. This function returns a pcpu pointer to a pointer, not a pointer to a pcpu pointer. This patch fixes the annotation, which kills the warnings from sparse. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 4000be423cb01a8d09de878bb8184511c49d4238) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 35dc889df3d2..79268a12a49e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -82,7 +82,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void) /** * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. */ -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) { return &kvm_arm_running_vcpu; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 44094e559848..50431d36732b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -193,7 +193,7 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) } struct kvm_vcpu *kvm_arm_get_running_vcpu(void); -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); u64 kvm_call_hyp(void *hypfn, ...); From 563d813862bf2c44e1e9f0d59facf8e0ce303355 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:22 +0100 Subject: [PATCH 245/287] KVM: ARM/arm64: avoid returning negative error code as bool is_valid_cache returns true if the specified cache is valid. Unfortunately, if the parameter passed it out of range, we return -ENOENT, which ends up as true leading to potential hilarity. This patch returns false on the failure path instead. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 18d457661fb9fa69352822ab98d39331c3d0e571) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 37a0fe1bb9bb..7928dbdf2102 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val) u32 level, ctype; if (val >= CSSELR_MAX) - return -ENOENT; + return false; /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ level = (val >> 1); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5805e7c4a4dd..4cc3b719208e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val) u32 level, ctype; if (val >= CSSELR_MAX) - return -ENOENT; + return false; /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ level = (val >> 1); From 1453b5c1052996b983f6d9ae043dec64e089c0c8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:23 +0100 Subject: [PATCH 246/287] KVM: ARM/arm64: return -EFAULT if copy_from_user fails in set_timer_reg We currently return the number of bytes not copied if set_timer_reg fails, which is almost certainly not what userspace would like. This patch returns -EFAULT instead. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit bd218bce92d3868ba4fe5e9e3eb8199d2aa614af) Signed-off-by: Christoffer Dall --- arch/arm/kvm/guest.c | 2 +- arch/arm64/kvm/guest.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 813e49258690..cc0b78769bd8 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); if (ret != 0) - return ret; + return -EFAULT; return kvm_arm_timer_set_reg(vcpu, reg->id, val); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 8d1ec2887a26..76794692c20b 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); if (ret != 0) - return ret; + return -EFAULT; return kvm_arm_timer_set_reg(vcpu, reg->id, val); } From 02b1b15d1265067d246eb8017c27c75d2fac5dae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:24 +0100 Subject: [PATCH 247/287] KVM: vgic: return int instead of bool when checking I/O ranges vgic_ioaddr_overlap claims to return a bool, but in reality it returns an int. Shut sparse up by fixing the type signature. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 1fa451bcc67fa921a04c5fac8dbcde7844d54512) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 73eba793b17f..d1cfe672b9d7 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1690,7 +1690,7 @@ int kvm_vgic_create(struct kvm *kvm) return ret; } -static bool vgic_ioaddr_overlap(struct kvm *kvm) +static int vgic_ioaddr_overlap(struct kvm *kvm) { phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; From 8ec715912ec282c0c560407fe72ae80b14c8d320 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:25 +0100 Subject: [PATCH 248/287] KVM: vgic: declare probe function pointer as const We extract the vgic probe function from the of_device_id data pointer, which is const. Kill the sparse warning by ensuring that the local function pointer is also marked as const. Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit de56fb1923ca11f428bf557870e0faa99f38762e) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index d1cfe672b9d7..efe6eee2e7eb 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1557,8 +1557,8 @@ static const struct of_device_id vgic_ids[] = { int kvm_vgic_hyp_init(void) { const struct of_device_id *matched_id; - int (*vgic_probe)(struct device_node *,const struct vgic_ops **, - const struct vgic_params **); + const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); struct device_node *vgic_node; int ret; From d3b49fbe5764b06372df1dea9c9bea5b8373f49c Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 26 Aug 2014 14:00:37 +0200 Subject: [PATCH 249/287] KVM: Unconditionally export KVM_CAP_READONLY_MEM The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that userspace can, at run-time, determine if a feature is supported or not. This allows KVM to being supporting a new feature with a new kernel version without any need to update user space. Unfortunately, since the definition of KVM_CAP_READONLY_MEM was guarded by #ifdef __KVM_HAVE_READONLY_MEM, such discovery still required a user space update. Therefore, unconditionally export KVM_CAP_READONLY_MEM and change the in-kernel conditional to rely on __KVM_HAVE_READONLY_MEM. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini (cherry picked from commit 0f8a4de3e088797576ac76200b634b802e5c7781) Signed-off-by: Christoffer Dall --- include/uapi/linux/kvm.h | 2 -- virt/kvm/kvm_main.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1af686a82703..023ec6132966 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -662,9 +662,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_GET_SMMU_INFO 78 #define KVM_CAP_S390_COW 79 #define KVM_CAP_PPC_ALLOC_HTAB 80 -#ifdef __KVM_HAVE_READONLY_MEM #define KVM_CAP_READONLY_MEM 81 -#endif #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 84cee09da140..0cb02c749622 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -709,7 +709,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) { u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; -#ifdef KVM_CAP_READONLY_MEM +#ifdef __KVM_HAVE_READONLY_MEM valid_flags |= KVM_MEM_READONLY; #endif From 177e51e89403b6843d068a1dae873650053b32a4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 26 Aug 2014 14:00:38 +0200 Subject: [PATCH 250/287] KVM: Unconditionally export KVM_CAP_USER_NMI The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that userspace can, at run-time, determine if a feature is supported or not. This allows KVM to being supporting a new feature with a new kernel version without any need to update user space. Unfortunately, since the definition of KVM_CAP_USER_NMI was guarded by #ifdef __KVM_HAVE_USER_NMI, such discovery still required a user space update. Therefore, unconditionally export KVM_CAP_USER_NMI and change the the typo in the comment for the IOCTL number definition as well. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini (cherry picked from commit 44b5ce73c99c389817be71b9161bceb197d40ecb) Signed-off-by: Christoffer Dall --- include/uapi/linux/kvm.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 023ec6132966..bbc1f8a09eb8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -578,9 +578,7 @@ struct kvm_ppc_smmu_info { #endif /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 -#ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 -#endif #ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 #endif @@ -995,7 +993,7 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) -/* Available with KVM_CAP_NMI */ +/* Available with KVM_CAP_USER_NMI */ #define KVM_NMI _IO(KVMIO, 0x9a) /* Available with KVM_CAP_SET_GUEST_DEBUG */ #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) From 48bc31d532d18a5e64c5ef1f20be161d0b834d11 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 29 Aug 2014 14:01:17 +0200 Subject: [PATCH 251/287] KVM: forward declare structs in kvm_types.h Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid "'struct foo' declared inside parameter list" warnings (and consequent breakage due to conflicting types). Move them from individual files to a generic place in linux/kvm_types.h. Signed-off-by: Paolo Bonzini (cherry picked from commit 656473003bc7e056c3bbd4a4d9832dad01e86f76) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 7 ++----- arch/arm64/include/asm/kvm_host.h | 6 ++---- arch/ia64/include/asm/kvm_host.h | 3 --- arch/mips/include/asm/kvm_host.h | 5 ----- arch/powerpc/include/asm/kvm_host.h | 5 ----- arch/s390/include/asm/kvm_host.h | 3 +++ arch/x86/include/asm/kvm_host.h | 4 ---- include/linux/kvm_host.h | 6 ------ include/linux/kvm_types.h | 14 ++++++++++++++ 9 files changed, 21 insertions(+), 32 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index fcb12a6f7db5..0bc5295bbfdf 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,8 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include +#include #include #include #include @@ -40,7 +42,6 @@ #include -struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -149,20 +150,17 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); u64 kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); -struct kvm_one_reg; int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 50431d36732b..ac99093d004e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -22,6 +22,8 @@ #ifndef __ARM64_KVM_HOST_H__ #define __ARM64_KVM_HOST_H__ +#include +#include #include #include #include @@ -41,7 +43,6 @@ #define KVM_VCPU_MAX_FEATURES 3 -struct kvm_vcpu; int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); @@ -164,18 +165,15 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 989dd3fe8de1..65088aa016c0 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -238,9 +238,6 @@ struct kvm_vm_data { #define KVM_NR_PAGE_SIZES 1 #define KVM_PAGES_PER_HPAGE(x) 1 -struct kvm; -struct kvm_vcpu; - struct kvm_mmio_req { uint64_t addr; /* physical address */ uint64_t size; /* size in bytes */ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 4d6fa0bf1305..d56ee50d6885 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -71,11 +71,6 @@ #define CAUSEB_DC 27 #define CAUSEF_DC (_ULCAST_(1) << 27) -struct kvm; -struct kvm_run; -struct kvm_vcpu; -struct kvm_interrupt; - extern atomic_t kvm_mips_instance; extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index af326cde7cb6..0a3238026fd7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -53,7 +53,6 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -81,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* Physical Address Mask - allowed range of real mode RAM access */ #define KVM_PAM 0x0fffffffffffffffULL -struct kvm; -struct kvm_run; -struct kvm_vcpu; - struct lppaca; struct slb_shadow; struct dtl_entry; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 16bd5d169cdb..42913475fc6c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,8 +13,11 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H + +#include #include #include +#include #include #include #include diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 53db582487c9..bd6f3529453d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -103,10 +103,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm; -struct kvm_async_pf; - enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 59d50379c2e4..b8cb3c2d893e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -129,8 +129,6 @@ static inline bool is_error_page(struct page *page) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -struct kvm; -struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; extern spinlock_t kvm_lock; @@ -301,8 +299,6 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -struct kvm_irq_routing_table; - #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -994,8 +990,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) extern bool kvm_rebooting; -struct kvm_device_ops; - struct kvm_device { struct kvm_device_ops *ops; struct kvm *kvm; diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b0bcce0ddc95..b606bb689a3e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -17,6 +17,20 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +struct kvm; +struct kvm_async_pf; +struct kvm_device_ops; +struct kvm_interrupt; +struct kvm_irq_routing_table; +struct kvm_memory_slot; +struct kvm_one_reg; +struct kvm_run; +struct kvm_userspace_memory_region; +struct kvm_vcpu; +struct kvm_vcpu_init; + +enum kvm_mr_change; + #include /* From 3dcac226202ec154b5c29bcd968e7b894e61c031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 28 Aug 2014 15:13:02 +0200 Subject: [PATCH 252/287] KVM: static inline empty kvm_arch functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using static inline is going to save few bytes and cycles. For example on powerpc, the difference is 700 B after stripping. (5 kB before) This patch also deals with two overlooked empty functions: kvm_arch_flush_shadow was not removed from arch/mips/kvm/mips.c 2df72e9bc KVM: split kvm_arch_flush_shadow and kvm_arch_sched_in never made it into arch/ia64/kvm/kvm-ia64.c. e790d9ef6 KVM: add kvm_arch_sched_in Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini (cherry picked from commit 0865e636aef751966e6e0f8950a26bc7391e923c) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 6 ++++ arch/arm/kvm/arm.c | 19 ------------- arch/arm64/include/asm/kvm_host.h | 6 ++++ arch/ia64/include/asm/kvm_host.h | 12 ++++++++ arch/ia64/kvm/kvm-ia64.c | 30 -------------------- arch/mips/include/asm/kvm_host.h | 11 ++++++++ arch/powerpc/include/asm/kvm_host.h | 8 ++++++ arch/powerpc/kvm/powerpc.c | 28 ------------------- arch/s390/include/asm/kvm_host.h | 14 ++++++++++ arch/s390/kvm/kvm-s390.c | 43 ----------------------------- 10 files changed, 57 insertions(+), 120 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0bc5295bbfdf..2b17f6ab9642 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -230,4 +230,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) int kvm_perf_init(void); int kvm_perf_teardown(void); +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 79268a12a49e..882f7e856b6b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - void kvm_arch_check_processor_compat(void *rtn) { *(int *)rtn = 0; } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} /** * kvm_arch_init_vm - initializes a VM data structure @@ -285,14 +274,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ -} - -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac99093d004e..eaf689c48a59 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -242,4 +242,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) } } +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 65088aa016c0..cf03097176b1 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -596,6 +596,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu); struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) {} +static inline void kvm_arch_hardware_unsetup(void) {} + #endif /* __ASSEMBLY__*/ #endif diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index b48aa69e35e1..b1e074d299ea 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1363,10 +1363,6 @@ static void kvm_release_vm_pages(struct kvm *kvm) } } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); @@ -1375,10 +1371,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_release_vm_pages(kvm); } -void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { if (cpu != vcpu->cpu) { @@ -1467,7 +1459,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kfree(vcpu->arch.apic); } - long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1550,21 +1541,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, @@ -1596,14 +1578,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, return 0; } -void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) -{ - return; -} - void kvm_arch_flush_shadow_all(struct kvm *kvm) { kvm_flush_remote_tlbs(kvm); @@ -1852,10 +1826,6 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) { return __apic_accept_irq(vcpu, irq->vector); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index d56ee50d6885..279376e90c7f 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -654,5 +654,16 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size); extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0a3238026fd7..90a9c0e4952c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -623,4 +623,12 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_exit(void) {} + #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e4c719d49e16..7a75d6dbf610 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -251,19 +251,11 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - void kvm_arch_check_processor_compat(void *rtn) { *(int *)rtn = kvmppc_core_check_processor_compat(); @@ -296,10 +288,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->lock); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -421,10 +409,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return kvmppc_core_create_memslot(kvm, slot, npages); } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, @@ -441,10 +425,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvmppc_core_commit_memory_region(kvm, mem, old); } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { @@ -533,10 +513,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_subarch_vcpu_uninit(vcpu); } -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_BOOKE @@ -1134,7 +1110,3 @@ int kvm_arch_init(void *opaque) { return 0; } - -void kvm_arch_exit(void) -{ -} diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 42913475fc6c..d45e11dac5cf 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -269,4 +269,18 @@ struct kvm_arch{ }; extern int sie64a(struct kvm_s390_sie_block *, u64 *); + +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_check_processor_compat(void *rtn) {} +static inline void kvm_arch_exit(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) {} + #endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 46392afd043d..1fe3bc10c87f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -92,10 +92,6 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; @@ -105,19 +101,11 @@ void kvm_arch_hardware_unsetup(void) { } -void kvm_arch_check_processor_compat(void *rtn) -{ -} - int kvm_arch_init(void *opaque) { return 0; } -void kvm_arch_exit(void) -{ -} - /* Section: device related */ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -289,10 +277,6 @@ static void kvm_free_vcpus(struct kvm *kvm) mutex_unlock(&kvm->lock); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); @@ -320,15 +304,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - /* Nothing todo */ -} - -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_regs(&vcpu->arch.host_fpregs); @@ -975,21 +950,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -1035,15 +1001,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, return; } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} - static int __init kvm_s390_init(void) { int ret; From df6fef6a404701ff6249330215c31f2a2b041d7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 28 Aug 2014 15:13:03 +0200 Subject: [PATCH 253/287] KVM: remove garbage arg to *hardware_{en,dis}able MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the beggining was on_each_cpu(), which required an unused argument to kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten. Remove unnecessary arguments that stem from this. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini (cherry picked from commit 13a34e067eab24fec882e1834fbf2cc31911d474) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm/kvm/arm.c | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/ia64/kvm/kvm-ia64.c | 4 ++-- arch/mips/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/include/asm/kvm_host.h | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 12 ++++++------ include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 15 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 2b17f6ab9642..46e5d4da1989 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) int kvm_perf_init(void); int kvm_perf_teardown(void); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 882f7e856b6b..c8ff64b54459 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -87,7 +87,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) return &kvm_arm_running_vcpu; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index eaf689c48a59..bcde41905746 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) } } -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index b1e074d299ea..c9aa236dc29b 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { long status; long tmp_base; @@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { long status; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 279376e90c7f..5e3f4b0f18c8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -654,7 +654,7 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size); extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 90a9c0e4952c..f391f3fbde8b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -623,7 +623,7 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7a75d6dbf610..ea4cfdc991da 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -246,7 +246,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d45e11dac5cf..99971dfc6b9a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -270,7 +270,7 @@ struct kvm_arch{ extern int sie64a(struct kvm_s390_sie_block *, u64 *); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_check_processor_compat(void *rtn) {} static inline void kvm_arch_exit(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1fe3bc10c87f..412fbc5dc688 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -86,7 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { static unsigned long long *facilities; /* Section: not file related */ -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { /* every s390 is virtualization enabled ;-) */ return 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bd6f3529453d..5137dca9e9d6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -634,8 +634,8 @@ struct msr_data { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - int (*hardware_enable)(void *dummy); - void (*hardware_disable)(void *dummy); + int (*hardware_enable)(void); + void (*hardware_disable)(void); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a14a6eaf871d..934befea3e36 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -606,7 +606,7 @@ static int has_svm(void) return 1; } -static void svm_hardware_disable(void *garbage) +static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) @@ -617,7 +617,7 @@ static void svm_hardware_disable(void *garbage) amd_pmu_disable_virt(); } -static int svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void) { struct svm_cpu_data *sd; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5402c94ab768..af423252c265 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2566,7 +2566,7 @@ static void kvm_cpu_vmxon(u64 addr) : "memory", "cc"); } -static int hardware_enable(void *garbage) +static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2630,7 +2630,7 @@ static void kvm_cpu_vmxoff(void) asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); } -static void hardware_disable(void *garbage) +static void hardware_disable(void) { if (vmm_exclusive) { vmclear_local_loaded_vmcss(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a348daad61fd..37d9503b81ea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -239,7 +239,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); -static void drop_user_return_notifiers(void *ignore) +static void drop_user_return_notifiers(void) { unsigned int cpu = smp_processor_id(); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); @@ -6603,7 +6603,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) kvm_rip_write(vcpu, 0); } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -6614,7 +6614,7 @@ int kvm_arch_hardware_enable(void *garbage) bool stable, backwards_tsc = false; kvm_shared_msr_cpu_online(); - ret = kvm_x86_ops->hardware_enable(garbage); + ret = kvm_x86_ops->hardware_enable(); if (ret != 0) return ret; @@ -6694,10 +6694,10 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { - kvm_x86_ops->hardware_disable(garbage); - drop_user_return_notifiers(garbage); + kvm_x86_ops->hardware_disable(); + drop_user_return_notifiers(); } int kvm_arch_hardware_setup(void) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8cb3c2d893e..ce4cf2f1c419 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -608,8 +608,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_hardware_enable(void *garbage); -void kvm_arch_hardware_disable(void *garbage); +int kvm_arch_hardware_enable(void); +void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0cb02c749622..556049f120b7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk) cpumask_set_cpu(cpu, cpus_hardware_enabled); - r = kvm_arch_hardware_enable(NULL); + r = kvm_arch_hardware_enable(); if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); @@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk) if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; cpumask_clear_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_disable(NULL); + kvm_arch_hardware_disable(); } static void hardware_disable(void) From 25523c743ec5bf8ebe707ac8a8112eac0a2d42b6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Sep 2014 21:13:31 +0200 Subject: [PATCH 254/287] KVM: remove redundant check of in_spin_loop The expression `vcpu->spin_loop.in_spin_loop' is always true, because it is evaluated only when the condition `!vcpu->spin_loop.in_spin_loop' is false. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 34656113182b704682e23d1363417536addfec97) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 556049f120b7..025f8300b6f0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1774,8 +1774,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) bool eligible; eligible = !vcpu->spin_loop.in_spin_loop || - (vcpu->spin_loop.in_spin_loop && - vcpu->spin_loop.dy_eligible); + vcpu->spin_loop.dy_eligible; if (vcpu->spin_loop.in_spin_loop) kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); From c4e4c7bf26b78b6cb8fa985c19854c8f6e9abc20 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Sep 2014 21:13:32 +0200 Subject: [PATCH 255/287] KVM: remove redundant assigment of return value in kvm_dev_ioctl The first statement of kvm_dev_ioctl is long r = -EINVAL; No need to reassign the same value. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit a13f533b2f1d53a7c0baa7490498caeab7bc8ba5) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 025f8300b6f0..d201466921bf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2611,7 +2611,6 @@ static long kvm_dev_ioctl(struct file *filp, switch (ioctl) { case KVM_GET_API_VERSION: - r = -EINVAL; if (arg) goto out; r = KVM_API_VERSION; @@ -2623,7 +2622,6 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_vm_ioctl_check_extension_generic(NULL, arg); break; case KVM_GET_VCPU_MMAP_SIZE: - r = -EINVAL; if (arg) goto out; r = PAGE_SIZE; /* struct kvm_run */ From e665e3aae1e8c2e47e467c13c10e5b92c4be61a5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Sep 2014 21:13:33 +0200 Subject: [PATCH 256/287] KVM: remove redundant assignments in __kvm_set_memory_region __kvm_set_memory_region sets r to EINVAL very early. Doing it again is not necessary. The same is true later on, where r is assigned -ENOMEM twice. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit f2a25160887e00434ce1361007009120e1fecbda) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d201466921bf..3ed27192a849 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -778,7 +778,6 @@ int __kvm_set_memory_region(struct kvm *kvm, base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; - r = -EINVAL; if (npages > KVM_MEM_MAX_NR_PAGES) goto out; @@ -792,7 +791,6 @@ int __kvm_set_memory_region(struct kvm *kvm, new.npages = npages; new.flags = mem->flags; - r = -EINVAL; if (npages) { if (!old.npages) change = KVM_MR_CREATE; @@ -848,7 +846,6 @@ int __kvm_set_memory_region(struct kvm *kvm, } if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { - r = -ENOMEM; slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) From cf0dfca545678c6dc257a75094f13515e0bf6c69 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 9 Sep 2014 11:27:09 +0100 Subject: [PATCH 257/287] ARM/arm64: KVM: fix use of WnR bit in kvm_is_write_fault() The ISS encoding for an exception from a Data Abort has a WnR bit[6] that indicates whether the Data Abort was caused by a read or a write instruction. While there are several fields in the encoding that are only valid if the ISV bit[24] is set, WnR is not one of them, so we can read it unconditionally. Instead of fixing both implementations of kvm_is_write_fault() in place, reimplement it just once using kvm_vcpu_dabt_iswrite(), which already does the right thing with respect to the WnR bit. Also fix up the callers to pass 'vcpu' Acked-by: Laszlo Ersek Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier (cherry picked from commit a7d079cea2dffb112e26da2566dd84c0ef1fce97) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 11 ----------- arch/arm/kvm/mmu.c | 12 ++++++++++-- arch/arm64/include/asm/kvm_mmu.h | 13 ------------- 3 files changed, 10 insertions(+), 26 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f5f72f..3f688b458143 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) flush_pmd_entry(pte); } -static inline bool kvm_is_write_fault(unsigned long hsr) -{ - unsigned long hsr_ec = hsr >> HSR_EC_SHIFT; - if (hsr_ec == HSR_EC_IABT) - return false; - else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR)) - return false; - else - return true; -} - static inline void kvm_clean_pgd(pgd_t *pgd) { clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 62f5642153f9..bb06f76a8f89 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -746,6 +746,14 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) return false; } +static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -760,7 +768,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pfn_t pfn; pgprot_t mem_type = PAGE_S2; - write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); + write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; @@ -886,7 +894,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) gfn = fault_ipa >> PAGE_SHIFT; memslot = gfn_to_memslot(vcpu->kvm, gfn); hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); - write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); + write_fault = kvm_is_write_fault(vcpu); if (kvm_is_error_hva(hva) || (write_fault && !writable)) { if (is_iabt) { /* Prefetch Abort on I/O address */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8e138c7c53ac..737da742b293 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -93,19 +93,6 @@ void kvm_clear_hyp_idmap(void); #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) -static inline bool kvm_is_write_fault(unsigned long esr) -{ - unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT; - - if (esr_ec == ESR_EL2_EC_IABT) - return false; - - if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR)) - return false; - - return true; -} - static inline void kvm_clean_pgd(pgd_t *pgd) {} static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} static inline void kvm_clean_pte(pte_t *pte) {} From cdead48c82db0e501edfce2c85b7834de2960d6f Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 1 Sep 2014 09:36:08 +0100 Subject: [PATCH 258/287] KVM: EVENTFD: remove inclusion of irq.h No more needed. irq.h would be void on ARM. Acked-by: Paolo Bonzini Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier (cherry picked from commit 0ba09511ddc3ff0b462f37b4fe4b9c4dccc054ec) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 67563284f7b9..71ed39941b9c 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,7 +36,6 @@ #include #include -#include "irq.h" #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQFD From 86318858caaa5a5e5b58f7bd2624c2a953eddf76 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:33 +0100 Subject: [PATCH 259/287] KVM: device: add simple registration mechanism for kvm_device_ops kvm_ioctl_create_device currently has knowledge of all the device types and their associated ops. This is fairly inflexible when adding support for new in-kernel device emulations, so move what we currently have out into a table, which can support dynamic registration of ops by new drivers for virtual hardware. Cc: Alex Williamson Cc: Alex Graf Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Marc Zyngier Acked-by: Cornelia Huck Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini (cherry picked from commit d60eacb07053142bfb9b41582074a89a790a9d46) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 21 ++++++++++---- virt/kvm/kvm_main.c | 60 ++++++++++++++++++++++++---------------- 3 files changed, 53 insertions(+), 29 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ce4cf2f1c419..fc7b4270bc4a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1022,6 +1022,7 @@ struct kvm_device_ops { void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); +int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index bbc1f8a09eb8..00d2c69a3cb6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -848,14 +848,25 @@ struct kvm_device_attr { __u64 addr; /* userspace address of attr data */ }; -#define KVM_DEV_TYPE_FSL_MPIC_20 1 -#define KVM_DEV_TYPE_FSL_MPIC_42 2 -#define KVM_DEV_TYPE_XICS 3 -#define KVM_DEV_TYPE_VFIO 4 #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 -#define KVM_DEV_TYPE_ARM_VGIC_V2 5 + +enum kvm_device_type { + KVM_DEV_TYPE_FSL_MPIC_20 = 1, +#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20 + KVM_DEV_TYPE_FSL_MPIC_42, +#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42 + KVM_DEV_TYPE_XICS, +#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS + KVM_DEV_TYPE_VFIO, +#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO + KVM_DEV_TYPE_ARM_VGIC_V2, +#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 + KVM_DEV_TYPE_FLIC, +#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC + KVM_DEV_TYPE_MAX, +}; /* * ioctls for VM fds diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3ed27192a849..df006d12c14b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2261,6 +2261,37 @@ struct kvm_device *kvm_device_from_filp(struct file *filp) return filp->private_data; } +static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { +#ifdef CONFIG_KVM_MPIC + [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, + [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, +#endif + +#ifdef CONFIG_KVM_XICS + [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, +#endif + +#ifdef CONFIG_KVM_VFIO + [KVM_DEV_TYPE_VFIO] = &kvm_vfio_ops, +#endif + +#ifdef CONFIG_KVM_ARM_VGIC + [KVM_DEV_TYPE_ARM_VGIC_V2] = &kvm_arm_vgic_v2_ops, +#endif +}; + +int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) +{ + if (type >= ARRAY_SIZE(kvm_device_ops_table)) + return -ENOSPC; + + if (kvm_device_ops_table[type] != NULL) + return -EEXIST; + + kvm_device_ops_table[type] = ops; + return 0; +} + static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_create_device *cd) { @@ -2269,31 +2300,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm, bool test = cd->flags & KVM_CREATE_DEVICE_TEST; int ret; - switch (cd->type) { -#ifdef CONFIG_KVM_MPIC - case KVM_DEV_TYPE_FSL_MPIC_20: - case KVM_DEV_TYPE_FSL_MPIC_42: - ops = &kvm_mpic_ops; - break; -#endif -#ifdef CONFIG_KVM_XICS - case KVM_DEV_TYPE_XICS: - ops = &kvm_xics_ops; - break; -#endif -#ifdef CONFIG_KVM_VFIO - case KVM_DEV_TYPE_VFIO: - ops = &kvm_vfio_ops; - break; -#endif -#ifdef CONFIG_KVM_ARM_VGIC - case KVM_DEV_TYPE_ARM_VGIC_V2: - ops = &kvm_arm_vgic_v2_ops; - break; -#endif - default: + if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) + return -ENODEV; + + ops = kvm_device_ops_table[cd->type]; + if (ops == NULL) return -ENODEV; - } if (test) return 0; From 9e95eca3b761209d08e97144de3962f355ee2078 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:34 +0100 Subject: [PATCH 260/287] KVM: ARM: vgic: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the ARM VGIC, instead of relying on the static table. Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini (cherry picked from commit c06a841bf36340e9e917ce60d11a6425ac85d0bd) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 - virt/kvm/arm/vgic.c | 157 ++++++++++++++++++++------------------- virt/kvm/kvm_main.c | 4 - 3 files changed, 79 insertions(+), 83 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fc7b4270bc4a..ed20bc0d209b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1027,7 +1027,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; -extern struct kvm_device_ops kvm_arm_vgic_v2_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index efe6eee2e7eb..8c95b2468cd8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1522,83 +1522,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -static void vgic_init_maintenance_interrupt(void *info) -{ - enable_percpu_irq(vgic->maint_irq, 0); -} - -static int vgic_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - vgic_init_maintenance_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(vgic->maint_irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block vgic_cpu_nb = { - .notifier_call = vgic_cpu_notify, -}; - -static const struct of_device_id vgic_ids[] = { - { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, - { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, - {}, -}; - -int kvm_vgic_hyp_init(void) -{ - const struct of_device_id *matched_id; - const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, - const struct vgic_params **); - struct device_node *vgic_node; - int ret; - - vgic_node = of_find_matching_node_and_match(NULL, - vgic_ids, &matched_id); - if (!vgic_node) { - kvm_err("error: no compatible GIC node found\n"); - return -ENODEV; - } - - vgic_probe = matched_id->data; - ret = vgic_probe(vgic_node, &vgic_ops, &vgic); - if (ret) - return ret; - - ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, - "vgic", kvm_get_running_vcpus()); - if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); - return ret; - } - - ret = __register_cpu_notifier(&vgic_cpu_nb); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - - /* Callback into for arch code for setup */ - vgic_arch_setup(vgic); - - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - - return 0; - -out_free_irq: - free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); - return ret; -} - /** * kvm_vgic_init - Initialize global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct @@ -2062,7 +1985,7 @@ static int vgic_create(struct kvm_device *dev, u32 type) return kvm_vgic_create(dev->kvm); } -struct kvm_device_ops kvm_arm_vgic_v2_ops = { +static struct kvm_device_ops kvm_arm_vgic_v2_ops = { .name = "kvm-arm-vgic", .create = vgic_create, .destroy = vgic_destroy, @@ -2070,3 +1993,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = { .get_attr = vgic_get_attr, .has_attr = vgic_has_attr, }; + +static void vgic_init_maintenance_interrupt(void *info) +{ + enable_percpu_irq(vgic->maint_irq, 0); +} + +static int vgic_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + vgic_init_maintenance_interrupt(NULL); + break; + case CPU_DYING: + case CPU_DYING_FROZEN: + disable_percpu_irq(vgic->maint_irq); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block vgic_cpu_nb = { + .notifier_call = vgic_cpu_notify, +}; + +static const struct of_device_id vgic_ids[] = { + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, + {}, +}; + +int kvm_vgic_hyp_init(void) +{ + const struct of_device_id *matched_id; + int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); + struct device_node *vgic_node; + int ret; + + vgic_node = of_find_matching_node_and_match(NULL, + vgic_ids, &matched_id); + if (!vgic_node) { + kvm_err("error: no compatible GIC node found\n"); + return -ENODEV; + } + + vgic_probe = matched_id->data; + ret = vgic_probe(vgic_node, &vgic_ops, &vgic); + if (ret) + return ret; + + ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, + "vgic", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); + return ret; + } + + ret = __register_cpu_notifier(&vgic_cpu_nb); + if (ret) { + kvm_err("Cannot register vgic CPU notifier\n"); + goto out_free_irq; + } + + /* Callback into for arch code for setup */ + vgic_arch_setup(vgic); + + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + + return kvm_register_device_ops(&kvm_arm_vgic_v2_ops, + KVM_DEV_TYPE_ARM_VGIC_V2); + +out_free_irq: + free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); + return ret; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index df006d12c14b..111d560418f3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2274,10 +2274,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { #ifdef CONFIG_KVM_VFIO [KVM_DEV_TYPE_VFIO] = &kvm_vfio_ops, #endif - -#ifdef CONFIG_KVM_ARM_VGIC - [KVM_DEV_TYPE_ARM_VGIC_V2] = &kvm_arm_vgic_v2_ops, -#endif }; int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) From 7bd2f48101c8f43cd36e9920c7315df8575b61a8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:36 +0100 Subject: [PATCH 261/287] KVM: VFIO: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the VFIO kvm device, instead of relying on the static table. This is achieved by a module_init call to register the ops with KVM. Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Alex Williamson Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini (cherry picked from commit 80ce1639727e9d38729c34f162378508c307ca25) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 4 ---- virt/kvm/vfio.c | 22 +++++++++++++++------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ed20bc0d209b..f64e941a4213 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1026,7 +1026,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; -extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 111d560418f3..f019669674a5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2270,10 +2270,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { #ifdef CONFIG_KVM_XICS [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, #endif - -#ifdef CONFIG_KVM_VFIO - [KVM_DEV_TYPE_VFIO] = &kvm_vfio_ops, -#endif }; int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 597c258245ea..475487e238e1 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -189,6 +189,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ } +static int kvm_vfio_create(struct kvm_device *dev, u32 type); + +static struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, + .destroy = kvm_vfio_destroy, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, +}; + static int kvm_vfio_create(struct kvm_device *dev, u32 type) { struct kvm_device *tmp; @@ -211,10 +221,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) return 0; } -struct kvm_device_ops kvm_vfio_ops = { - .name = "kvm-vfio", - .create = kvm_vfio_create, - .destroy = kvm_vfio_destroy, - .set_attr = kvm_vfio_set_attr, - .has_attr = kvm_vfio_has_attr, -}; +static int __init kvm_vfio_ops_init(void) +{ + return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); +} +module_init(kvm_vfio_ops_init); From fa0603dfed451534fda7856d1301a6f81ae5de82 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 9 Jun 2014 12:27:18 +0200 Subject: [PATCH 262/287] arm/arm64: KVM: Rename irq_state to irq_pending The irq_state field on the distributor struct is ambiguous in its meaning; the comment says it's the level of the input put, but that doesn't make much sense for edge-triggered interrupts. The code actually uses this state variable to check if the interrupt is in the pending state on the distributor so clarify the comment and rename the actual variable and accessor methods. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 227844f53864077ccaefe01d0960fcccc03445ce) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 4 ++-- virt/kvm/arm/vgic.c | 52 +++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 35b0c121bb65..388d442eecb5 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,8 +140,8 @@ struct vgic_dist { /* Interrupt enabled (one bit per IRQ) */ struct vgic_bitmap irq_enabled; - /* Interrupt 'pin' level */ - struct vgic_bitmap irq_state; + /* Interrupt state is pending on the distributor */ + struct vgic_bitmap irq_pending; /* Level-triggered interrupt in progress */ struct vgic_bitmap irq_active; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8c95b2468cd8..3d04a6de6f23 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -37,7 +37,7 @@ * * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if * something is pending - * - VGIC pending interrupts are stored on the vgic.irq_state vgic + * - VGIC pending interrupts are stored on the vgic.irq_pending vgic * bitmap (this bitmap is updated by both user land ioctls and guest * mmio ops, and other in-kernel peripherals such as the * arch. timers) and indicate the 'wire' state. @@ -45,8 +45,8 @@ * recalculated * - To calculate the oracle, we need info for each cpu from * compute_pending_for_cpu, which considers: - * - PPI: dist->irq_state & dist->irq_enable - * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target + * - PPI: dist->irq_pending & dist->irq_enable + * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target * - irq_spi_target is a 'formatted' version of the GICD_ICFGR * registers, stored on each vcpu. We only keep one bit of * information per interrupt, making sure that only one vcpu can @@ -221,21 +221,21 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); + return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); } -static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) +static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); + vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); } -static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) +static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); + vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0); } static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) @@ -409,7 +409,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); @@ -425,7 +425,7 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); @@ -651,7 +651,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set(vcpu, lr.irq); + vgic_dist_irq_set_pending(vcpu, lr.irq); if (lr.irq < VGIC_NR_SGIS) dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; lr.state &= ~LR_STATE_PENDING; @@ -932,7 +932,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) kvm_for_each_vcpu(c, vcpu, kvm) { if (target_cpus & 1) { /* Flag the SGI as pending */ - vgic_dist_irq_set(vcpu, sgi); + vgic_dist_irq_set_pending(vcpu, sgi); dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); } @@ -952,11 +952,11 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; pend_shared = vcpu->arch.vgic_cpu.pending_shared; - pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); + pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); - pending = vgic_bitmap_get_shared_map(&dist->irq_state); + pending = vgic_bitmap_get_shared_map(&dist->irq_pending); enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); bitmap_and(pend_shared, pend_shared, @@ -1160,7 +1160,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) * our emulated gic and can get rid of them. */ if (!sources) { - vgic_dist_irq_clear(vcpu, irq); + vgic_dist_irq_clear_pending(vcpu, irq); vgic_cpu_irq_clear(vcpu, irq); return true; } @@ -1175,7 +1175,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) if (vgic_queue_irq(vcpu, 0, irq)) { if (vgic_irq_is_edge(vcpu, irq)) { - vgic_dist_irq_clear(vcpu, irq); + vgic_dist_irq_clear_pending(vcpu, irq); vgic_cpu_irq_clear(vcpu, irq); } else { vgic_irq_set_active(vcpu, irq); @@ -1376,7 +1376,7 @@ static void vgic_kick_vcpus(struct kvm *kvm) static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) { - int is_edge = vgic_irq_is_edge(vcpu, irq); + int edge_triggered = vgic_irq_is_edge(vcpu, irq); int state = vgic_dist_irq_is_pending(vcpu, irq); /* @@ -1384,26 +1384,26 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) * - edge triggered and we have a rising edge * - level triggered and we change level */ - if (is_edge) + if (edge_triggered) return level > state; else return level != state; } -static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, +static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; - int is_edge, is_level; + int edge_triggered, level_triggered; int enabled; bool ret = true; spin_lock(&dist->lock); vcpu = kvm_get_vcpu(kvm, cpuid); - is_edge = vgic_irq_is_edge(vcpu, irq_num); - is_level = !is_edge; + edge_triggered = vgic_irq_is_edge(vcpu, irq_num); + level_triggered = !edge_triggered; if (!vgic_validate_injection(vcpu, irq_num, level)) { ret = false; @@ -1418,9 +1418,9 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); if (level) - vgic_dist_irq_set(vcpu, irq_num); + vgic_dist_irq_set_pending(vcpu, irq_num); else - vgic_dist_irq_clear(vcpu, irq_num); + vgic_dist_irq_clear_pending(vcpu, irq_num); enabled = vgic_irq_is_enabled(vcpu, irq_num); @@ -1429,7 +1429,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, goto out; } - if (is_level && vgic_irq_is_active(vcpu, irq_num)) { + if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) { /* * Level interrupt in progress, will be picked up * when EOId. @@ -1466,7 +1466,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) + if (vgic_update_irq_pending(kvm, cpuid, irq_num, level)) vgic_kick_vcpus(kvm); return 0; From 25b3fb8068b572c2b9f549b17f513092309aa610 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 9 Jun 2014 12:55:13 +0200 Subject: [PATCH 263/287] arm/arm64: KVM: Rename irq_active to irq_queued We have a special bitmap on the distributor struct to keep track of when level-triggered interrupts are queued on the list registers. This was named irq_active, which is confusing, because the active state of an interrupt as per the GIC spec is a different thing, not specifically related to edge-triggered/level-triggered configurations but rather indicates an interrupt which has been ack'ed but not yet eoi'ed. Rename the bitmap and the corresponding accessor functions to irq_queued to clarify what this is actually used for. Signed-off-by: Christoffer Dall (cherry picked from commit dbf20f9d8105cca531614c8bff9a74351e8e67e7) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 4 ++-- virt/kvm/arm/vgic.c | 33 +++++++++++++++++++-------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 388d442eecb5..7d8e61fa9928 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -143,8 +143,8 @@ struct vgic_dist { /* Interrupt state is pending on the distributor */ struct vgic_bitmap irq_pending; - /* Level-triggered interrupt in progress */ - struct vgic_bitmap irq_active; + /* Level-triggered interrupt queued on VCPU interface */ + struct vgic_bitmap irq_queued; /* Interrupt priority. Not used yet. */ struct vgic_bytemap irq_priority; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 3d04a6de6f23..769cc7177f10 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -60,12 +60,12 @@ * the 'line' again. This is achieved as such: * * - When a level interrupt is moved onto a vcpu, the corresponding - * bit in irq_active is set. As long as this bit is set, the line + * bit in irq_queued is set. As long as this bit is set, the line * will be ignored for further interrupts. The interrupt is injected * into the vcpu with the GICH_LR_EOI bit set (generate a * maintenance interrupt on EOI). * - When the interrupt is EOIed, the maintenance interrupt fires, - * and clears the corresponding bit in irq_active. This allow the + * and clears the corresponding bit in irq_queued. This allows the * interrupt line to be sampled again. */ @@ -196,25 +196,25 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); } -static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); + return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); } -static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); + vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1); } -static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); + vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) @@ -256,6 +256,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) vcpu->arch.vgic_cpu.pending_shared); } +static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) +{ + return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); +} + static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) { return le32_to_cpu(*((u32 *)mmio->data)) & mask; @@ -1079,8 +1084,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { vgic_retire_lr(lr, vlr.irq, vcpu); - if (vgic_irq_is_active(vcpu, vlr.irq)) - vgic_irq_clear_active(vcpu, vlr.irq); + if (vgic_irq_is_queued(vcpu, vlr.irq)) + vgic_irq_clear_queued(vcpu, vlr.irq); } } } @@ -1170,7 +1175,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) { - if (vgic_irq_is_active(vcpu, irq)) + if (!vgic_can_sample_irq(vcpu, irq)) return true; /* level interrupt, already queued */ if (vgic_queue_irq(vcpu, 0, irq)) { @@ -1178,7 +1183,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) vgic_dist_irq_clear_pending(vcpu, irq); vgic_cpu_irq_clear(vcpu, irq); } else { - vgic_irq_set_active(vcpu, irq); + vgic_irq_set_queued(vcpu, irq); } return true; @@ -1262,7 +1267,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - vgic_irq_clear_active(vcpu, vlr.irq); + vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; vgic_set_lr(vcpu, lr, vlr); @@ -1429,7 +1434,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, goto out; } - if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) { + if (!vgic_can_sample_irq(vcpu, irq_num)) { /* * Level interrupt in progress, will be picked up * when EOId. From 520445af8ef7b1e8d17b692c0957b3802b90d98a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 22:37:33 +0200 Subject: [PATCH 264/287] arm/arm64: KVM: vgic: Clear queued flags on unqueue If we unqueue a level-triggered interrupt completely, and the LR does not stick around in the active state (and will therefore no longer generate a maintenance interrupt), then we should clear the queued flag so that the vgic can actually queue this level-triggered interrupt at a later time and deal with its pending state then. Note: This should actually be properly fixed to handle the active state on the distributor. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit cced50c9280ef7ca1af48080707a170efa1adfa0) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 769cc7177f10..c7d068976069 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -667,8 +667,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * active), then the LR does not hold any useful info and can * be marked as free for other use. */ - if (!(lr.state & LR_STATE_MASK)) + if (!(lr.state & LR_STATE_MASK)) { vgic_retire_lr(i, lr.irq, vcpu); + vgic_irq_clear_queued(vcpu, lr.irq); + } /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); From fe8a7fe10d123135d9f8fee2eb0b0c14b31d609d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 21:54:51 +0200 Subject: [PATCH 265/287] arm/arm64: KVM: vgic: Improve handling of GICD_I{CS}PENDRn Writes to GICD_ISPENDRn and GICD_ICPENDRn are currently not handled correctly for level-triggered interrupts. The spec states that for level-triggered interrupts, writes to the GICD_ISPENDRn activate the output of a flip-flop which is in turn or'ed with the actual input interrupt signal. Correspondingly, writes to GICD_ICPENDRn simply deactivates the output of that flip-flop, but does not (of course) affect the external input signal. Reads from GICC_IAR will also deactivate the flip-flop output. This requires us to track the state of the level-input separately from the state in the flip-flop. We therefore introduce two new variables on the distributor struct to track these two states. Astute readers may notice that this is introducing more state than required (because an OR of the two states gives you the pending state), but the remaining vgic code uses the pending bitmap for optimized operations to figure out, at the end of the day, if an interrupt is pending or not on the distributor side. Refactoring the code to consider the two state variables all the places where we currently access the precomputed pending value, did not look pretty. Signed-off-by: Christoffer Dall (cherry picked from commit faa1b46c3e9f4d40359aee04ff275eea5f4cae3a) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 16 +++++- virt/kvm/arm/vgic.c | 119 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 123 insertions(+), 12 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7d8e61fa9928..f074539c6ac5 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,9 +140,23 @@ struct vgic_dist { /* Interrupt enabled (one bit per IRQ) */ struct vgic_bitmap irq_enabled; - /* Interrupt state is pending on the distributor */ + /* Level-triggered interrupt external input is asserted */ + struct vgic_bitmap irq_level; + + /* + * Interrupt state is pending on the distributor + */ struct vgic_bitmap irq_pending; + /* + * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered + * interrupts. Essentially holds the state of the flip-flop in + * Figure 4-10 on page 4-101 in ARM IHI 0048B.b. + * Once set, it is only cleared for level-triggered interrupts on + * guest ACKs (when we queue it) or writes to GICD_ICPENDRn. + */ + struct vgic_bitmap irq_soft_pend; + /* Level-triggered interrupt queued on VCPU interface */ struct vgic_bitmap irq_queued; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c7d068976069..07e97de1851b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -67,6 +67,11 @@ * - When the interrupt is EOIed, the maintenance interrupt fires, * and clears the corresponding bit in irq_queued. This allows the * interrupt line to be sampled again. + * - Note that level-triggered interrupts can also be set to pending from + * writes to GICD_ISPENDRn and lowering the external input line does not + * cause the interrupt to become inactive in such a situation. + * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become + * inactive as long as the external input line is held high. */ #define VGIC_ADDR_UNDEF (-1) @@ -217,6 +222,41 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } +static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq); +} + +static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1); +} + +static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0); +} + +static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq); +} + +static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); +} + static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -414,11 +454,26 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, - vcpu->vcpu_id, offset); + u32 *reg; + u32 level_mask; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset); + level_mask = (~(*reg)); + + /* Mark both level and edge triggered irqs as pending */ + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + if (mmio->is_write) { + /* Set the soft-pending flag only for level-triggered irqs */ + reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + *reg &= level_mask; + vgic_update_state(vcpu->kvm); return true; } @@ -430,11 +485,27 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, - vcpu->vcpu_id, offset); + u32 *level_active; + u32 *reg; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { + /* Re-set level triggered level-active interrupts */ + level_active = vgic_bitmap_get_reg(&dist->irq_level, + vcpu->vcpu_id, offset); + reg = vgic_bitmap_get_reg(&dist->irq_pending, + vcpu->vcpu_id, offset); + *reg |= *level_active; + + /* Clear soft-pending flags */ + reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + vgic_update_state(vcpu->kvm); return true; } @@ -1268,17 +1339,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; vgic_set_lr(vcpu, lr, vlr); + /* + * If the IRQ was EOIed it was also ACKed and we we + * therefore assume we can clear the soft pending + * state (should it had been set) for this interrupt. + * + * Note: if the IRQ soft pending state was set after + * the IRQ was acked, it actually shouldn't be + * cleared, but we have no way of knowing that unless + * we start trapping ACKs when the soft-pending state + * is set. + */ + vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { vgic_cpu_irq_set(vcpu, vlr.irq); level_pending = true; } else { + vgic_dist_irq_clear_pending(vcpu, vlr.irq); vgic_cpu_irq_clear(vcpu, vlr.irq); } @@ -1384,17 +1470,19 @@ static void vgic_kick_vcpus(struct kvm *kvm) static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) { int edge_triggered = vgic_irq_is_edge(vcpu, irq); - int state = vgic_dist_irq_is_pending(vcpu, irq); /* * Only inject an interrupt if: * - edge triggered and we have a rising edge * - level triggered and we change level */ - if (edge_triggered) + if (edge_triggered) { + int state = vgic_dist_irq_is_pending(vcpu, irq); return level > state; - else + } else { + int state = vgic_dist_irq_get_level(vcpu, irq); return level != state; + } } static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, @@ -1424,10 +1512,19 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); - if (level) + if (level) { + if (level_triggered) + vgic_dist_irq_set_level(vcpu, irq_num); vgic_dist_irq_set_pending(vcpu, irq_num); - else - vgic_dist_irq_clear_pending(vcpu, irq_num); + } else { + if (level_triggered) { + vgic_dist_irq_clear_level(vcpu, irq_num); + if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) + vgic_dist_irq_clear_pending(vcpu, irq_num); + } else { + vgic_dist_irq_clear_pending(vcpu, irq_num); + } + } enabled = vgic_irq_is_enabled(vcpu, irq_num); From dd684bca5c33f00bdb31d2fb9819d6581489104f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 22:30:45 +0200 Subject: [PATCH 266/287] arm/arm64: KVM: vgic: Fix SGI writes to GICD_I{CS}PENDR0 Writes to GICD_ISPENDR0 and GICD_ICPENDR0 ignore all settings of the pending state for SGIs. Make sure the implementation handles this correctly. Signed-off-by: Christoffer Dall (cherry picked from commit 9da48b5502622f9f0e49df957521ec43a0c9f4c1) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 07e97de1851b..4936a68d4b9b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -454,7 +454,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg; + u32 *reg, orig; u32 level_mask; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -463,6 +463,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, /* Mark both level and edge triggered irqs as pending */ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); + orig = *reg; vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); @@ -474,6 +475,12 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); *reg &= level_mask; + /* Ignore writes to SGIs */ + if (offset < 2) { + *reg &= ~0xffff; + *reg |= orig & 0xffff; + } + vgic_update_state(vcpu->kvm); return true; } @@ -486,10 +493,11 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, phys_addr_t offset) { u32 *level_active; - u32 *reg; + u32 *reg, orig; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); + orig = *reg; vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { @@ -500,6 +508,12 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, vcpu->vcpu_id, offset); *reg |= *level_active; + /* Ignore writes to SGIs */ + if (offset < 2) { + *reg &= ~0xffff; + *reg |= orig & 0xffff; + } + /* Clear soft-pending flags */ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, vcpu->vcpu_id, offset); From ce492b1937416dc0f6da42a887b5190b1ca9b4fe Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 22:34:04 +0200 Subject: [PATCH 267/287] arm/arm64: KVM: vgic: Clarify and correct vgic documentation The VGIC virtual distributor implementation documentation was written a very long time ago, before the true nature of the beast had been partially absorbed into my bloodstream. Clarify the docs. Plus, it fixes an actual bug. ICFRn, pfff. Signed-off-by: Christoffer Dall (cherry picked from commit 7e362919a59e6fc60e08ad1cf0b047291d1ca2e9) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 4936a68d4b9b..ff88dbcacc29 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -36,21 +36,22 @@ * How the whole thing works (courtesy of Christoffer Dall): * * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if - * something is pending - * - VGIC pending interrupts are stored on the vgic.irq_pending vgic - * bitmap (this bitmap is updated by both user land ioctls and guest - * mmio ops, and other in-kernel peripherals such as the - * arch. timers) and indicate the 'wire' state. + * something is pending on the CPU interface. + * - Interrupts that are pending on the distributor are stored on the + * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land + * ioctls and guest mmio ops, and other in-kernel peripherals such as the + * arch. timers). * - Every time the bitmap changes, the irq_pending_on_cpu oracle is * recalculated * - To calculate the oracle, we need info for each cpu from * compute_pending_for_cpu, which considers: * - PPI: dist->irq_pending & dist->irq_enable * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target - * - irq_spi_target is a 'formatted' version of the GICD_ICFGR + * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn * registers, stored on each vcpu. We only keep one bit of * information per interrupt, making sure that only one vcpu can * accept the interrupt. + * - If any of the above state changes, we must recalculate the oracle. * - The same is true when injecting an interrupt, except that we only * consider a single interrupt at a time. The irq_spi_cpu array * contains the target CPU for each SPI. From aef14f44d897ca82917d6e5d5ad847c4a9e24c40 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:00 +0100 Subject: [PATCH 268/287] KVM: ARM: vgic: plug irq injection race As it stands, nothing prevents userspace from injecting an interrupt before the guest's GIC is actually initialized. This goes unnoticed so far (as everything is pretty much statically allocated), but ends up exploding in a spectacular way once we switch to a more dynamic allocation (the GIC data structure isn't there yet). The fix is to test for the "ready" flag in the VGIC distributor before trying to inject the interrupt. Note that in order to avoid breaking userspace, we have to ignore what is essentially an error. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall (cherry picked from commit 71afaba4a2e98bb7bdeba5078370ab43d46e67a1) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index ff88dbcacc29..5744a49d7680 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1585,7 +1585,8 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - if (vgic_update_irq_pending(kvm, cpuid, irq_num, level)) + if (likely(vgic_initialized(kvm)) && + vgic_update_irq_pending(kvm, cpuid, irq_num, level)) vgic_kick_vcpus(kvm); return 0; From 38cb2f7b9a9ea6312429da05575a4bf6e697b573 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:01 +0100 Subject: [PATCH 269/287] arm/arm64: KVM: vgic: switch to dynamic allocation So far, all the VGIC data structures are statically defined by the *maximum* number of vcpus and interrupts it supports. It means that we always have to oversize it to cater for the worse case. Start by changing the data structures to be dynamically sizeable, and allocate them at runtime. The sizes are still very static though. Signed-off-by: Marc Zyngier (cherry picked from commit c1bfb577addd4867a82c4f235824a315d5afb94a) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 3 + include/kvm/arm_vgic.h | 76 ++++++++++--- virt/kvm/arm/vgic.c | 243 +++++++++++++++++++++++++++++++++++------ 3 files changed, 269 insertions(+), 53 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c8ff64b54459..9e374158363a 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -161,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm->vcpus[i] = NULL; } } + + kvm_vgic_destroy(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -243,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); + kvm_vgic_vcpu_destroy(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f074539c6ac5..fd1b8f252da1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -54,19 +54,33 @@ * - a bunch of shared interrupts (SPI) */ struct vgic_bitmap { - union { - u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); - } percpu[VGIC_MAX_CPUS]; - union { - u32 reg[VGIC_NR_SHARED_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); - } shared; + /* + * - One UL per VCPU for private interrupts (assumes UL is at + * least 32 bits) + * - As many UL as necessary for shared interrupts. + * + * The private interrupts are accessed via the "private" + * field, one UL per vcpu (the state for vcpu n is in + * private[n]). The shared interrupts are accessed via the + * "shared" pointer (IRQn state is at bit n-32 in the bitmap). + */ + unsigned long *private; + unsigned long *shared; }; struct vgic_bytemap { - u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; - u32 shared[VGIC_NR_SHARED_IRQS / 4]; + /* + * - 8 u32 per VCPU for private interrupts + * - As many u32 as necessary for shared interrupts. + * + * The private interrupts are accessed via the "private" + * field, (the state for vcpu n is in private[n*8] to + * private[n*8 + 7]). The shared interrupts are accessed via + * the "shared" pointer (IRQn state is at byte (n-32)%4 of the + * shared[(n-32)/4] word). + */ + u32 *private; + u32 *shared; }; struct kvm_vcpu; @@ -127,6 +141,9 @@ struct vgic_dist { bool in_kernel; bool ready; + int nr_cpus; + int nr_irqs; + /* Virtual control interface mapping */ void __iomem *vctrl_base; @@ -166,15 +183,36 @@ struct vgic_dist { /* Level/edge triggered */ struct vgic_bitmap irq_cfg; - /* Source CPU per SGI and target CPU */ - u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; + /* + * Source CPU per SGI and target CPU: + * + * Each byte represent a SGI observable on a VCPU, each bit of + * this byte indicating if the corresponding VCPU has + * generated this interrupt. This is a GICv2 feature only. + * + * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are + * the SGIs observable on VCPUn. + */ + u8 *irq_sgi_sources; - /* Target CPU for each IRQ */ - u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; - struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; + /* + * Target CPU for each SPI: + * + * Array of available SPI, each byte indicating the target + * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32]. + */ + u8 *irq_spi_cpu; + + /* + * Reverse lookup of irq_spi_cpu for faster compute pending: + * + * Array of bitmaps, one per VCPU, describing if IRQn is + * routed to a particular VCPU. + */ + struct vgic_bitmap *irq_spi_target; /* Bitmap indicating which CPU has something pending */ - unsigned long irq_pending_on_cpu; + unsigned long *irq_pending_on_cpu; #endif }; @@ -204,11 +242,11 @@ struct vgic_v3_cpu_if { struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ - u8 vgic_irq_lr_map[VGIC_NR_IRQS]; + u8 *vgic_irq_lr_map; /* Pending interrupts on this VCPU */ DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); + unsigned long *pending_shared; /* Bitmap of used/free list registers */ DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); @@ -239,7 +277,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); +void kvm_vgic_destroy(struct kvm *kvm); int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5744a49d7680..102dde58c549 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -95,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); static void vgic_kick_vcpus(struct kvm *kvm); +static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); @@ -105,10 +106,8 @@ static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; /* - * struct vgic_bitmap contains unions that provide two views of - * the same data. In one case it is an array of registers of - * u32's, and in the other case it is a bitmap of unsigned - * longs. + * struct vgic_bitmap contains a bitmap made of unsigned longs, but + * extracts u32s out of them. * * This does not work on 64-bit BE systems, because the bitmap access * will store two consecutive 32-bit words with the higher-addressed @@ -124,23 +123,45 @@ static const struct vgic_params *vgic; #define REG_OFFSET_SWIZZLE 0 #endif +static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs) +{ + int nr_longs; + + nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); + + b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL); + if (!b->private) + return -ENOMEM; + + b->shared = b->private + nr_cpus; + + return 0; +} + +static void vgic_free_bitmap(struct vgic_bitmap *b) +{ + kfree(b->private); + b->private = NULL; + b->shared = NULL; +} + static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) { offset >>= 2; if (!offset) - return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); + return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE; else - return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); + return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE); } static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, int cpuid, int irq) { if (irq < VGIC_NR_PRIVATE_IRQS) - return test_bit(irq, x->percpu[cpuid].reg_ul); + return test_bit(irq, x->private + cpuid); - return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); + return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); } static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, @@ -149,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, unsigned long *reg; if (irq < VGIC_NR_PRIVATE_IRQS) { - reg = x->percpu[cpuid].reg_ul; + reg = x->private + cpuid; } else { - reg = x->shared.reg_ul; + reg = x->shared; irq -= VGIC_NR_PRIVATE_IRQS; } @@ -163,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) { - if (unlikely(cpuid >= VGIC_MAX_CPUS)) - return NULL; - return x->percpu[cpuid].reg_ul; + return x->private + cpuid; } static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) { - return x->shared.reg_ul; + return x->shared; +} + +static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs) +{ + int size; + + size = nr_cpus * VGIC_NR_PRIVATE_IRQS; + size += nr_irqs - VGIC_NR_PRIVATE_IRQS; + + x->private = kzalloc(size, GFP_KERNEL); + if (!x->private) + return -ENOMEM; + + x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32); + return 0; +} + +static void vgic_free_bytemap(struct vgic_bytemap *b) +{ + kfree(b->private); + b->private = NULL; + b->shared = NULL; } static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) { - offset >>= 2; - BUG_ON(offset > (VGIC_NR_IRQS / 4)); - if (offset < 8) - return x->percpu[cpuid] + offset; - else - return x->shared + offset - 8; + u32 *reg; + + if (offset < VGIC_NR_PRIVATE_IRQS) { + reg = x->private; + offset += cpuid * VGIC_NR_PRIVATE_IRQS; + } else { + reg = x->shared; + offset -= VGIC_NR_PRIVATE_IRQS; + } + + return reg + (offset / sizeof(u32)); } #define VGIC_CFG_LEVEL 0 @@ -744,7 +790,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) */ vgic_dist_irq_set_pending(vcpu, lr.irq); if (lr.irq < VGIC_NR_SGIS) - dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; + *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source; lr.state &= ~LR_STATE_PENDING; vgic_set_lr(vcpu, i, lr); @@ -778,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, /* Copy source SGIs from distributor side */ for (sgi = min_sgi; sgi <= max_sgi; sgi++) { int shift = 8 * (sgi - min_sgi); - reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; + reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift; } mmio_data_write(mmio, ~0, reg); @@ -802,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, /* Clear pending SGIs on the distributor */ for (sgi = min_sgi; sgi <= max_sgi; sgi++) { u8 mask = reg >> (8 * (sgi - min_sgi)); + u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); if (set) { - if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) + if ((*src & mask) != mask) updated = true; - dist->irq_sgi_sources[vcpu_id][sgi] |= mask; + *src |= mask; } else { - if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) + if (*src & mask) updated = true; - dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; + *src &= ~mask; } } @@ -993,6 +1040,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return true; } +static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) +{ + return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; +} + static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) { struct kvm *kvm = vcpu->kvm; @@ -1026,7 +1078,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) if (target_cpus & 1) { /* Flag the SGI as pending */ vgic_dist_irq_set_pending(vcpu, sgi); - dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; + *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); } @@ -1073,14 +1125,14 @@ static void vgic_update_state(struct kvm *kvm) int c; if (!dist->enabled) { - set_bit(0, &dist->irq_pending_on_cpu); + set_bit(0, dist->irq_pending_on_cpu); return; } kvm_for_each_vcpu(c, vcpu, kvm) { if (compute_pending_for_cpu(vcpu)) { pr_debug("CPU%d has pending interrupts\n", c); - set_bit(c, &dist->irq_pending_on_cpu); + set_bit(c, dist->irq_pending_on_cpu); } } } @@ -1237,14 +1289,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) int vcpu_id = vcpu->vcpu_id; int c; - sources = dist->irq_sgi_sources[vcpu_id][irq]; + sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { if (vgic_queue_irq(vcpu, c, irq)) clear_bit(c, &sources); } - dist->irq_sgi_sources[vcpu_id][irq] = sources; + *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; /* * If the sources bitmap has been cleared it means that we @@ -1332,7 +1384,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * us. Claim we don't have anything pending. We'll * adjust that if needed while exiting. */ - clear_bit(vcpu_id, &dist->irq_pending_on_cpu); + clear_bit(vcpu_id, dist->irq_pending_on_cpu); } } @@ -1430,7 +1482,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Check if we still have something up our sleeve... */ pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); if (level_pending || pending < vgic->nr_lr) - set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); + set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) @@ -1464,7 +1516,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) if (!irqchip_in_kernel(vcpu->kvm)) return 0; - return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); + return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } static void vgic_kick_vcpus(struct kvm *kvm) @@ -1559,7 +1611,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, if (level) { vgic_cpu_irq_set(vcpu, irq_num); - set_bit(cpuid, &dist->irq_pending_on_cpu); + set_bit(cpuid, dist->irq_pending_on_cpu); } out: @@ -1603,6 +1655,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + kfree(vgic_cpu->pending_shared); + kfree(vgic_cpu->vgic_irq_lr_map); + vgic_cpu->pending_shared = NULL; + vgic_cpu->vgic_irq_lr_map = NULL; +} + +static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; + vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); + + if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { + kvm_vgic_vcpu_destroy(vcpu); + return -ENOMEM; + } + + return 0; +} + /** * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state * @vcpu: pointer to the vcpu struct @@ -1642,6 +1720,97 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +void kvm_vgic_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_destroy(vcpu); + + vgic_free_bitmap(&dist->irq_enabled); + vgic_free_bitmap(&dist->irq_level); + vgic_free_bitmap(&dist->irq_pending); + vgic_free_bitmap(&dist->irq_soft_pend); + vgic_free_bitmap(&dist->irq_queued); + vgic_free_bitmap(&dist->irq_cfg); + vgic_free_bytemap(&dist->irq_priority); + if (dist->irq_spi_target) { + for (i = 0; i < dist->nr_cpus; i++) + vgic_free_bitmap(&dist->irq_spi_target[i]); + } + kfree(dist->irq_sgi_sources); + kfree(dist->irq_spi_cpu); + kfree(dist->irq_spi_target); + kfree(dist->irq_pending_on_cpu); + dist->irq_sgi_sources = NULL; + dist->irq_spi_cpu = NULL; + dist->irq_spi_target = NULL; + dist->irq_pending_on_cpu = NULL; +} + +/* + * Allocate and initialize the various data structures. Must be called + * with kvm->lock held! + */ +static int vgic_init_maps(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int nr_cpus, nr_irqs; + int ret, i; + + nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS; + nr_irqs = dist->nr_irqs = VGIC_NR_IRQS; + + ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); + ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); + + if (ret) + goto out; + + dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL); + dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL); + dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus, + GFP_KERNEL); + dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), + GFP_KERNEL); + if (!dist->irq_sgi_sources || + !dist->irq_spi_cpu || + !dist->irq_spi_target || + !dist->irq_pending_on_cpu) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < nr_cpus; i++) + ret |= vgic_init_bitmap(&dist->irq_spi_target[i], + nr_cpus, nr_irqs); + + if (ret) + goto out; + + kvm_for_each_vcpu(i, vcpu, kvm) { + ret = vgic_vcpu_init_maps(vcpu, nr_irqs); + if (ret) { + kvm_err("VGIC: Failed to allocate vcpu memory\n"); + break; + } + } + +out: + if (ret) + kvm_vgic_destroy(kvm); + + return ret; +} + /** * kvm_vgic_init - Initialize global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct @@ -1722,6 +1891,10 @@ int kvm_vgic_create(struct kvm *kvm) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + ret = vgic_init_maps(kvm); + if (ret) + kvm_err("Unable to allocate maps\n"); + out_unlock: for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); From ac2409584bc7bb3b8cff23884e65cf341860d674 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:02 +0100 Subject: [PATCH 270/287] arm/arm64: KVM: vgic: Parametrize VGIC_NR_SHARED_IRQS Having a dynamic number of supported interrupts means that we cannot relly on VGIC_NR_SHARED_IRQS being fixed anymore. Instead, make it take the distributor structure as a parameter, so it can return the right value. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit fb65ab63b8cae510ea1e43e68b5da2f9980aa6d5) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 1 - virt/kvm/arm/vgic.c | 16 +++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index fd1b8f252da1..b2f9936df319 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -29,7 +29,6 @@ #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) #define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 102dde58c549..a24fdacf5381 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1086,11 +1086,17 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) } } +static int vgic_nr_shared_irqs(struct vgic_dist *dist) +{ + return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; +} + static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; unsigned long *pending, *enabled, *pend_percpu, *pend_shared; unsigned long pending_private, pending_shared; + int nr_shared = vgic_nr_shared_irqs(dist); int vcpu_id; vcpu_id = vcpu->vcpu_id; @@ -1103,15 +1109,15 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) pending = vgic_bitmap_get_shared_map(&dist->irq_pending); enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); + bitmap_and(pend_shared, pending, enabled, nr_shared); bitmap_and(pend_shared, pend_shared, vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - VGIC_NR_SHARED_IRQS); + nr_shared); pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); - pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); + pending_shared = find_first_bit(pend_shared, nr_shared); return (pending_private < VGIC_NR_PRIVATE_IRQS || - pending_shared < VGIC_NR_SHARED_IRQS); + pending_shared < vgic_nr_shared_irqs(dist)); } /* @@ -1368,7 +1374,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) } /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { + for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) overflow = 1; } From 3e0fb55b4af81f2364b1969bdbf31bf67883a4d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:03 +0100 Subject: [PATCH 271/287] arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS We now have the information about the number of CPU interfaces in the distributor itself. Let's get rid of VGIC_MAX_CPUS, and just rely on KVM_MAX_VCPUS where we don't have the choice. Yet. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit fc675e355e705a046df7b635d3f3330c0ad94569) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 3 +-- virt/kvm/arm/vgic.c | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b2f9936df319..3b73d7845124 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -29,13 +29,12 @@ #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) #define VGIC_V3_MAX_LRS 16 /* Sanity checks... */ -#if (VGIC_MAX_CPUS > 8) +#if (KVM_MAX_VCPUS > 8) #error Invalid number of CPU interfaces #endif diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a24fdacf5381..df0700bd0f23 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1297,7 +1297,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); - for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { + for_each_set_bit(c, &sources, dist->nr_cpus) { if (vgic_queue_irq(vcpu, c, irq)) clear_bit(c, &sources); } @@ -1700,7 +1700,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int i; - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) + if (vcpu->vcpu_id >= dist->nr_cpus) return -EBUSY; for (i = 0; i < VGIC_NR_IRQS; i++) { @@ -1767,7 +1767,7 @@ static int vgic_init_maps(struct kvm *kvm) int nr_cpus, nr_irqs; int ret, i; - nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS; + nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS; nr_irqs = dist->nr_irqs = VGIC_NR_IRQS; ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); From 35a089f06fa6d1f02265be8b1e31e3775003d5c8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:04 +0100 Subject: [PATCH 272/287] arm/arm64: KVM: vgic: handle out-of-range MMIO accesses Now that we can (almost) dynamically size the number of interrupts, we're facing an interesting issue: We have to evaluate at runtime whether or not an access hits a valid register, based on the sizing of this particular instance of the distributor. Furthermore, the GIC spec says that accessing a reserved register is RAZ/WI. For this, add a new field to our range structure, indicating the number of bits a single interrupts uses. That allows us to find out whether or not the access is in range. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit c3c918361adcceb816c92b21dd95d2b46fb96a8f) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 3 ++- virt/kvm/arm/vgic.c | 56 +++++++++++++++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 3b73d7845124..2767f939f47c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -32,6 +32,7 @@ #define VGIC_V2_MAX_LRS (1 << 6) #define VGIC_V3_MAX_LRS 16 +#define VGIC_MAX_IRQS 1024 /* Sanity checks... */ #if (KVM_MAX_VCPUS > 8) @@ -42,7 +43,7 @@ #error "VGIC_NR_IRQS must be a multiple of 32" #endif -#if (VGIC_NR_IRQS > 1024) +#if (VGIC_NR_IRQS > VGIC_MAX_IRQS) #error "VGIC_NR_IRQS must be <= 1024" #endif diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index df0700bd0f23..de975c908301 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -895,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, struct mmio_range { phys_addr_t base; unsigned long len; + int bits_per_irq; bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset); }; @@ -903,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, + .bits_per_irq = 0, .handle_mmio = handle_mmio_misc, }, { .base = GIC_DIST_IGROUP, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_DIST_ENABLE_SET, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_set_enable_reg, }, { .base = GIC_DIST_ENABLE_CLEAR, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_enable_reg, }, { .base = GIC_DIST_PENDING_SET, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_set_pending_reg, }, { .base = GIC_DIST_PENDING_CLEAR, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_pending_reg, }, { .base = GIC_DIST_ACTIVE_SET, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_DIST_ACTIVE_CLEAR, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_DIST_PRI, - .len = VGIC_NR_IRQS, + .len = VGIC_MAX_IRQS, + .bits_per_irq = 8, .handle_mmio = handle_mmio_priority_reg, }, { .base = GIC_DIST_TARGET, - .len = VGIC_NR_IRQS, + .len = VGIC_MAX_IRQS, + .bits_per_irq = 8, .handle_mmio = handle_mmio_target_reg, }, { .base = GIC_DIST_CONFIG, - .len = VGIC_NR_IRQS / 4, + .len = VGIC_MAX_IRQS / 4, + .bits_per_irq = 2, .handle_mmio = handle_mmio_cfg_reg, }, { @@ -990,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges, return NULL; } +static bool vgic_validate_access(const struct vgic_dist *dist, + const struct mmio_range *range, + unsigned long offset) +{ + int irq; + + if (!range->bits_per_irq) + return true; /* Not an irq-based access */ + + irq = offset * 8 / range->bits_per_irq; + if (irq >= dist->nr_irqs) + return false; + + return true; +} + /** * vgic_handle_mmio - handle an in-kernel MMIO access * @vcpu: pointer to the vcpu performing the access @@ -1029,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, spin_lock(&vcpu->kvm->arch.vgic.lock); offset = mmio->phys_addr - range->base - base; - updated_state = range->handle_mmio(vcpu, mmio, offset); + if (vgic_validate_access(dist, range, offset)) { + updated_state = range->handle_mmio(vcpu, mmio, offset); + } else { + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + updated_state = false; + } spin_unlock(&vcpu->kvm->arch.vgic.lock); kvm_prepare_mmio(run, mmio); kvm_handle_mmio_return(vcpu, run); From ddfab003a2dbff1e226a24860e8f4f91a7d3d131 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:05 +0100 Subject: [PATCH 273/287] arm/arm64: KVM: vgic: kill VGIC_NR_IRQS Nuke VGIC_NR_IRQS entierly, now that the distributor instance contains the number of IRQ allocated to this GIC. Also add VGIC_NR_IRQS_LEGACY to preserve the current API. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 5fb66da64064d0cb8dcce4cc8bf4cb1b921b13a0) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 6 +++--- virt/kvm/arm/vgic.c | 17 +++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2767f939f47c..aa20d4a7242f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -25,7 +25,7 @@ #include #include -#define VGIC_NR_IRQS 256 +#define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) @@ -39,11 +39,11 @@ #error Invalid number of CPU interfaces #endif -#if (VGIC_NR_IRQS & 31) +#if (VGIC_NR_IRQS_LEGACY & 31) #error "VGIC_NR_IRQS must be a multiple of 32" #endif -#if (VGIC_NR_IRQS > VGIC_MAX_IRQS) +#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS) #error "VGIC_NR_IRQS must be <= 1024" #endif diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index de975c908301..49501bbc2709 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -439,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, case 4: /* GICD_TYPER */ reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - reg |= (VGIC_NR_IRQS >> 5) - 1; + reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); break; @@ -1277,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_lr vlr; int lr; /* Sanitize the input... */ BUG_ON(sgi_source_id & ~7); BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); - BUG_ON(irq >= VGIC_NR_IRQS); + BUG_ON(irq >= dist->nr_irqs); kvm_debug("Queue IRQ%d\n", irq); @@ -1515,7 +1516,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) vlr = vgic_get_lr(vcpu, lr); - BUG_ON(vlr.irq >= VGIC_NR_IRQS); + BUG_ON(vlr.irq >= dist->nr_irqs); vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } @@ -1737,7 +1738,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) if (vcpu->vcpu_id >= dist->nr_cpus) return -EBUSY; - for (i = 0; i < VGIC_NR_IRQS; i++) { + for (i = 0; i < dist->nr_irqs; i++) { if (i < VGIC_NR_PPIS) vgic_bitmap_set_irq_val(&dist->irq_enabled, vcpu->vcpu_id, i, 1); @@ -1802,7 +1803,11 @@ static int vgic_init_maps(struct kvm *kvm) int ret, i; nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS; - nr_irqs = dist->nr_irqs = VGIC_NR_IRQS; + + if (!dist->nr_irqs) + dist->nr_irqs = VGIC_NR_IRQS_LEGACY; + + nr_irqs = dist->nr_irqs; ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); @@ -1886,7 +1891,7 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) + for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) vgic_set_target_reg(kvm, 0, i); kvm->arch.vgic.ready = true; From b9ca28a414c2e5dfb707bcba625eb76f334c787f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:06 +0100 Subject: [PATCH 274/287] arm/arm64: KVM: vgic: delay vgic allocation until init time It is now quite easy to delay the allocation of the vgic tables until we actually require it to be up and running (when the first vcpu is kicking around, or someones tries to access the GIC registers). This allow us to allocate memory for the exact number of CPUs we have. As nobody configures the number of interrupts just yet, use a fallback to VGIC_NR_IRQS_LEGACY. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 4956f2bc1fdee4bc336532f3f34635a8534cedfd) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 7 ------- include/kvm/arm_vgic.h | 1 - virt/kvm/arm/vgic.c | 42 +++++++++++++++++++++++++++++------------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9e374158363a..072a2084005c 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -261,16 +261,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { - int ret; - /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; - /* Set up VGIC */ - ret = kvm_vgic_vcpu_init(vcpu); - if (ret) - return ret; - /* Set up the timer */ kvm_timer_vcpu_init(vcpu); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index aa20d4a7242f..2f2aac8448a4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -277,7 +277,6 @@ int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); void kvm_vgic_destroy(struct kvm *kvm); -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 49501bbc2709..e7bca4bb7fd1 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1729,15 +1729,12 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to * this vcpu and enable the VGIC for this VCPU */ -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int i; - if (vcpu->vcpu_id >= dist->nr_cpus) - return -EBUSY; - for (i = 0; i < dist->nr_irqs; i++) { if (i < VGIC_NR_PPIS) vgic_bitmap_set_irq_val(&dist->irq_enabled, @@ -1757,8 +1754,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->nr_lr = vgic->nr_lr; vgic_enable(vcpu); - - return 0; } void kvm_vgic_destroy(struct kvm *kvm) @@ -1802,8 +1797,17 @@ static int vgic_init_maps(struct kvm *kvm) int nr_cpus, nr_irqs; int ret, i; - nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS; + if (dist->nr_cpus) /* Already allocated */ + return 0; + nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); + if (!nr_cpus) /* No vcpus? Can't be good... */ + return -EINVAL; + + /* + * If nobody configured the number of interrupts, use the + * legacy one. + */ if (!dist->nr_irqs) dist->nr_irqs = VGIC_NR_IRQS_LEGACY; @@ -1849,6 +1853,9 @@ static int vgic_init_maps(struct kvm *kvm) } } + for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) + vgic_set_target_reg(kvm, 0, i); + out: if (ret) kvm_vgic_destroy(kvm); @@ -1867,6 +1874,7 @@ static int vgic_init_maps(struct kvm *kvm) */ int kvm_vgic_init(struct kvm *kvm) { + struct kvm_vcpu *vcpu; int ret = 0, i; if (!irqchip_in_kernel(kvm)) @@ -1884,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } + ret = vgic_init_maps(kvm); + if (ret) { + kvm_err("Unable to allocate maps\n"); + goto out; + } + ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { @@ -1891,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_init(vcpu); kvm->arch.vgic.ready = true; out: + if (ret) + kvm_vgic_destroy(kvm); mutex_unlock(&kvm->lock); return ret; } @@ -1936,10 +1952,6 @@ int kvm_vgic_create(struct kvm *kvm) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - ret = vgic_init_maps(kvm); - if (ret) - kvm_err("Unable to allocate maps\n"); - out_unlock: for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); @@ -2140,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + ret = vgic_init_maps(dev->kvm); + if (ret) + goto out; + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { ret = -EINVAL; goto out; From e1fde0a1e746c70d026c1388c9d1c0bce278ae01 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:07 +0100 Subject: [PATCH 275/287] arm/arm64: KVM: vgic: make number of irqs a configurable attribute In order to make the number of interrupts configurable, use the new fancy device management API to add KVM_DEV_ARM_VGIC_GRP_NR_IRQS as a VGIC configurable attribute. Userspace can now specify the exact size of the GIC (by increments of 32 interrupts). Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit a98f26f183801685ef57333de4bafd4bbc692c7c) Signed-off-by: Christoffer Dall --- .../virtual/kvm/devices/arm-vgic.txt | 10 +++++ arch/arm/include/uapi/asm/kvm.h | 1 + arch/arm64/include/uapi/asm/kvm.h | 1 + virt/kvm/arm/vgic.c | 37 +++++++++++++++++++ 4 files changed, 49 insertions(+) diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 7f4e91b1316b..df8b0c7540b6 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -71,3 +71,13 @@ Groups: Errors: -ENODEV: Getting or setting this register is not yet supported -EBUSY: One or more VCPUs are running + + KVM_DEV_ARM_VGIC_GRP_NR_IRQS + Attributes: + A value describing the number of interrupts (SGI, PPI and SPI) for + this GIC instance, ranging from 64 to 1024, in increments of 32. + + Errors: + -EINVAL: Value set is out of the expected range + -EBUSY: Value has already be set, or GIC has already been initialized + with default values. diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 51257fda254b..09ee408c1a67 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -174,6 +174,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f4ec5a674d05..8e38878c87c6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -160,6 +160,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e7bca4bb7fd1..43b56c696752 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2253,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return vgic_attr_regs_access(dev, attr, ®, true); } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 val; + int ret = 0; + + if (get_user(val, uaddr)) + return -EFAULT; + + /* + * We require: + * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs + * - at most 1024 interrupts + * - a multiple of 32 interrupts + */ + if (val < (VGIC_NR_PRIVATE_IRQS + 32) || + val > VGIC_MAX_IRQS || + (val & 31)) + return -EINVAL; + + mutex_lock(&dev->kvm->lock); + + if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) + ret = -EBUSY; + else + dev->kvm->arch.vgic.nr_irqs = val; + + mutex_unlock(&dev->kvm->lock); + + return ret; + } } @@ -2289,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = put_user(reg, uaddr); break; } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); + break; + } } @@ -2325,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; return vgic_has_attr_regs(vgic_cpu_ranges, offset); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; } return -ENXIO; } From b6c20297368fc6c782c0e5709f0d261bfdee2f6e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 25 Sep 2014 18:41:07 +0200 Subject: [PATCH 276/287] arm/arm64: KVM: Fix set_clear_sgi_pend_reg offset The sgi values calculated in read_set_clear_sgi_pend_reg() and write_set_clear_sgi_pend_reg() were horribly incorrectly multiplied by 4 with catastrophic results in that subfunctions ended up overwriting memory not allocated for the expected purpose. This showed up as bugs in kfree() and the kernel complaining a lot of you turn on memory debugging. This addresses: http://marc.info/?l=kvm&m=141164910007868&w=2 Reported-by: Shannon Zhao Signed-off-by: Christoffer Dall (cherry picked from commit 0fea6d7628ed6e25a9ee1b67edf7c859718d39e8) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 43b56c696752..506693152e47 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -816,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int sgi; - int min_sgi = (offset & ~0x3) * 4; + int min_sgi = (offset & ~0x3); int max_sgi = min_sgi + 3; int vcpu_id = vcpu->vcpu_id; u32 reg = 0; @@ -837,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int sgi; - int min_sgi = (offset & ~0x3) * 4; + int min_sgi = (offset & ~0x3); int max_sgi = min_sgi + 3; int vcpu_id = vcpu->vcpu_id; u32 reg; From 17434ac66579bb31d6a129edb7dec7989b0a7a37 Mon Sep 17 00:00:00 2001 From: Joel Schopp Date: Wed, 9 Jul 2014 11:17:04 -0500 Subject: [PATCH 277/287] arm/arm64: KVM: Fix VTTBR_BADDR_MASK and pgd alloc The current aarch64 calculation for VTTBR_BADDR_MASK masks only 39 bits and not all the bits in the PA range. This is clearly a bug that manifests itself on systems that allocate memory in the higher address space range. [ Modified from Joel's original patch to be based on PHYS_MASK_SHIFT instead of a hard-coded value and to move the alignment check of the allocation to mmu.c. Also added a comment explaining why we hardcode the IPA range and changed the stage-2 pgd allocation to be based on the 40 bit IPA range instead of the maximum possible 48 bit PA range. - Christoffer ] Reviewed-by: Catalin Marinas Signed-off-by: Joel Schopp Signed-off-by: Christoffer Dall (cherry picked from commit dbff124e29fa24aff9705b354b5f4648cd96e0bb) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 4 ++-- arch/arm64/include/asm/kvm_arm.h | 13 ++++++++++++- arch/arm64/include/asm/kvm_mmu.h | 5 ++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 072a2084005c..15111ca6fe30 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -410,9 +410,9 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm->arch.pgd); + BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; - kvm->arch.vttbr |= vmid; + kvm->arch.vttbr = pgd_phys | vmid; spin_unlock(&kvm_vmid_lock); } diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index cc83520459ed..7fd3e27e3ccc 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -122,6 +122,17 @@ #define VTCR_EL2_T0SZ_MASK 0x3f #define VTCR_EL2_T0SZ_40B 24 +/* + * We configure the Stage-2 page tables to always restrict the IPA space to be + * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are + * not known to exist and will break with this configuration. + * + * Note that when using 4K pages, we concatenate two first level page tables + * together. + * + * The magic numbers used for VTTBR_X in this patch can be found in Tables + * D4-23 and D4-25 in ARM DDI 0487A.b. + */ #ifdef CONFIG_ARM64_64K_PAGES /* * Stage2 translation configuration: @@ -149,7 +160,7 @@ #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) #define VTTBR_VMID_SHIFT (48LLU) #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 737da742b293..a030d163840b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -59,10 +59,9 @@ #define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) /* - * Align KVM with the kernel's view of physical memory. Should be - * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration. + * We currently only support a 40bit IPA. */ -#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT +#define KVM_PHYS_SHIFT (40) #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) From 7f79ef21e90a97b535ca0f181929754cd72bda10 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 26 Sep 2014 12:29:34 +0200 Subject: [PATCH 278/287] arm/arm64: KVM: Report correct FSC for unsupported fault types When we catch something that's not a permission fault or a translation fault, we log the unsupported FSC in the kernel log, but we were masking off the bottom bits of the FSC which was not very helpful. Also correctly report the FSC for data and instruction faults rather than telling people it was a DFCS, which doesn't exist in the ARM ARM. Reviewed-by: Peter Maydell Signed-off-by: Christoffer Dall (cherry picked from commit 0496daa5cf99741ce8db82686b4c7446a37feabb) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm/kvm/mmu.c | 8 +++++--- arch/arm64/include/asm/kvm_emulate.h | 5 +++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 69b746955fca..b9db269c6e61 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -148,6 +148,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu) } static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_FSC; +} + +static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; } diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index bb06f76a8f89..eea03069161b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -882,10 +882,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - fault_status = kvm_vcpu_trap_get_fault(vcpu); + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { - kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", - kvm_vcpu_trap_get_class(vcpu), fault_status); + kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", + kvm_vcpu_trap_get_class(vcpu), + (unsigned long)kvm_vcpu_trap_get_fault(vcpu), + (unsigned long)kvm_vcpu_get_hsr(vcpu)); return -EFAULT; } diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fdc3e21abd8d..5674a55b5518 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -173,6 +173,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) } static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC; +} + +static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; } From 2dc5e2cf90915059fbc39ef380605aef6adaef47 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 22 Sep 2014 15:52:48 +0100 Subject: [PATCH 279/287] arm: kvm: fix CPU hotplug On some platforms with no power management capabilities, the hotplug implementation is allowed to return from a smp_ops.cpu_die() call as a function return. Upon a CPU onlining event, the KVM CPU notifier tries to reinstall the hyp stub, which fails on platform where no reset took place following a hotplug event, with the message: CPU1: smp_ops.cpu_die() returned, trying to resuscitate CPU1: Booted secondary processor Kernel panic - not syncing: unexpected prefetch abort in Hyp mode at: 0x80409540 unexpected data abort in Hyp mode at: 0x80401fe8 unexpected HVC/SVC trap in Hyp mode at: 0x805c6170 since KVM code is trying to reinstall the stub on a system where it is already configured. To prevent this issue, this patch adds a check in the KVM hotplug notifier that detects if the HYP stub really needs re-installing when a CPU is onlined and skips the installation call if the stub is already in place, which means that the CPU has not been reset. Signed-off-by: Vladimir Murzin Acked-by: Lorenzo Pieralisi Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 37a34ac1d4775aafbc73b9db53c7daebbbc67e6a) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 15111ca6fe30..d0c8ee654bbf 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -808,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self, switch (action) { case CPU_STARTING: case CPU_STARTING_FROZEN: - cpu_init_hyp_mode(NULL); + if (__hyp_get_vectors() == hyp_default_vectors) + cpu_init_hyp_mode(NULL); break; } From ad4ef3e73e40166a585d71f36a521f9f6d5db3c4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 11:53:28 +0200 Subject: [PATCH 280/287] Revert "arm, kvm: fix double lock on cpu_add_remove_lock" This reverts commit d77503eadd2f16f2900b9be79a1dc6f37e8cd579. The whole register cpu hotplug fix series has not been applied, so LSK is released without this fix. If we ever include that series in LSK later, then this can be fixed later too. Signed-off-by: Ming Lei Reviewed-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki (cherry picked from commit 553f809e23f00976caea7a1ebdabaa58a6383e7d) Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 2 +- virt/kvm/arm/vgic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 22fa819a9b6a..5081e809821f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void) host_vtimer_irq = ppi; - err = __register_cpu_notifier(&kvm_timer_cpu_nb); + err = register_cpu_notifier(&kvm_timer_cpu_nb); if (err) { kvm_err("Cannot register timer CPU notifier\n"); goto out_free; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 506693152e47..8e1dc03342c3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2444,7 +2444,7 @@ int kvm_vgic_hyp_init(void) return ret; } - ret = __register_cpu_notifier(&vgic_cpu_nb); + ret = register_cpu_notifier(&vgic_cpu_nb); if (ret) { kvm_err("Cannot register vgic CPU notifier\n"); goto out_free_irq; From 7ebda0194ff1872ef36d93cf309cc4043ffba4ee Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Jul 2013 15:02:11 -0700 Subject: [PATCH 281/287] include/linux/mm.h: add PAGE_ALIGNED() helper To test whether an address is aligned to PAGE_SIZE. Cc: HATAYAMA Daisuke Cc: "Eric W. Biederman" , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 0fa73b86ef0797ca4fde5334117ca0b330f08030) Signed-off-by: Christoffer Dall --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index e0c8528a41a4..f42c5baa47cc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -52,6 +52,9 @@ extern unsigned long sysctl_admin_reserve_kbytes; /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) +/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ +#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE) + /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way From 8032ebeefc762abdcc8f14f08e9e1d3839996063 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 25 Feb 2014 08:41:09 +0100 Subject: [PATCH 282/287] ARM: 7990/1: asm: rename logical shift macros push pull into lspush lspull Renames logical shift macros, 'push' and 'pull', defined in arch/arm/include/asm/assembler.h, into 'lspush' and 'lspull'. That eliminates name conflict between 'push' logical shift macro and 'push' instruction mnemonic. That allows assembler.h to be included in .S files that use 'push' instruction. Suggested-by: Will Deacon Signed-off-by: Victor Kamensky Acked-by: Nicolas Pitre Signed-off-by: Russell King (cherry picked from commit d98b90ea22b0a28d9d787769704a9cf1ea5a513a) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/assembler.h | 8 +- arch/arm/lib/copy_template.S | 36 ++--- arch/arm/lib/csumpartialcopygeneric.S | 96 ++++++------- arch/arm/lib/io-readsl.S | 12 +- arch/arm/lib/io-writesl.S | 12 +- arch/arm/lib/memmove.S | 36 ++--- arch/arm/lib/uaccess.S | 192 +++++++++++++------------- 7 files changed, 196 insertions(+), 196 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 05ee9eebad6b..9271457adac8 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -30,8 +30,8 @@ * Endian independent macros for shifting bytes within registers. */ #ifndef __ARMEB__ -#define pull lsr -#define push lsl +#define lspull lsr +#define lspush lsl #define get_byte_0 lsl #0 #define get_byte_1 lsr #8 #define get_byte_2 lsr #16 @@ -41,8 +41,8 @@ #define put_byte_2 lsl #16 #define put_byte_3 lsl #24 #else -#define pull lsl -#define push lsr +#define lspull lsl +#define lspush lsr #define get_byte_0 lsr #24 #define get_byte_1 lsr #16 #define get_byte_2 lsr #8 diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 805e3f8fb007..3bc8eb811a73 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -197,24 +197,24 @@ 12: PLD( pld [r1, #124] ) 13: ldr4w r1, r4, r5, r6, r7, abort=19f - mov r3, lr, pull #\pull + mov r3, lr, lspull #\pull subs r2, r2, #32 ldr4w r1, r8, r9, ip, lr, abort=19f - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push + orr r3, r3, r4, lspush #\push + mov r4, r4, lspull #\pull + orr r4, r4, r5, lspush #\push + mov r5, r5, lspull #\pull + orr r5, r5, r6, lspush #\push + mov r6, r6, lspull #\pull + orr r6, r6, r7, lspush #\push + mov r7, r7, lspull #\pull + orr r7, r7, r8, lspush #\push + mov r8, r8, lspull #\pull + orr r8, r8, r9, lspush #\push + mov r9, r9, lspull #\pull + orr r9, r9, ip, lspush #\push + mov ip, ip, lspull #\pull + orr ip, ip, lr, lspush #\push str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f bge 12b PLD( cmn r2, #96 ) @@ -225,10 +225,10 @@ 14: ands ip, r2, #28 beq 16f -15: mov r3, lr, pull #\pull +15: mov r3, lr, lspull #\pull ldr1w r1, lr, abort=21f subs ip, ip, #4 - orr r3, r3, lr, push #\push + orr r3, r3, lr, lspush #\push str1w r0, r3, abort=21f bgt 15b CALGN( cmp r2, #0 ) diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d620a5f22a09..d6e742d24007 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -141,7 +141,7 @@ FN_ENTRY tst len, #2 mov r5, r4, get_byte_0 beq .Lexit - adcs sum, sum, r4, push #16 + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 @@ -171,23 +171,23 @@ FN_ENTRY cmp ip, #2 beq .Lsrc2_aligned bhi .Lsrc3_aligned - mov r4, r5, pull #8 @ C = 0 + mov r4, r5, lspull #8 @ C = 0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 - mov r7, r7, pull #8 - orr r7, r7, r8, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 + mov r7, r7, lspull #8 + orr r7, r7, r8, lspush #24 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #8 + mov r4, r8, lspull #8 sub ip, ip, #16 teq ip, #0 bne 1b @@ -196,50 +196,50 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #8 + mov r4, r6, lspull #8 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #24 + orr r4, r4, r5, lspush #24 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #8 + mov r4, r5, lspull #8 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 tst len, #2 beq .Lexit - adcs sum, sum, r4, push #16 + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 mov r5, r4, get_byte_2 b .Lexit -.Lsrc2_aligned: mov r4, r5, pull #16 +.Lsrc2_aligned: mov r4, r5, lspull #16 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 - mov r7, r7, pull #16 - orr r7, r7, r8, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 + mov r7, r7, lspull #16 + orr r7, r7, r8, lspush #16 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #16 + mov r4, r8, lspull #16 sub ip, ip, #16 teq ip, #0 bne 1b @@ -248,20 +248,20 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #16 + mov r4, r6, lspull #16 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #16 + orr r4, r4, r5, lspush #16 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #16 + mov r4, r5, lspull #16 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 @@ -276,24 +276,24 @@ FN_ENTRY load1b r5 b .Lexit -.Lsrc3_aligned: mov r4, r5, pull #24 +.Lsrc3_aligned: mov r4, r5, lspull #24 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 - mov r7, r7, pull #24 - orr r7, r7, r8, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 + mov r7, r7, lspull #24 + orr r7, r7, r8, lspush #8 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #24 + mov r4, r8, lspull #24 sub ip, ip, #16 teq ip, #0 bne 1b @@ -302,20 +302,20 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #24 + mov r4, r6, lspull #24 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #8 + orr r4, r4, r5, lspush #8 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #24 + mov r4, r5, lspull #24 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 @@ -326,7 +326,7 @@ FN_ENTRY load1l r4 mov r5, r4, get_byte_0 strb r5, [dst], #1 - adcs sum, sum, r4, push #24 + adcs sum, sum, r4, lspush #24 mov r5, r4, get_byte_1 b .Lexit FN_EXIT diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 5fb97e7f9f4b..7a7430950c79 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -47,25 +47,25 @@ ENTRY(__raw_readsl) strb ip, [r1], #1 4: subs r2, r2, #1 - mov ip, r3, pull #24 + mov ip, r3, lspull #24 ldrne r3, [r0] - orrne ip, ip, r3, push #8 + orrne ip, ip, r3, lspush #8 strne ip, [r1], #4 bne 4b b 8f 5: subs r2, r2, #1 - mov ip, r3, pull #16 + mov ip, r3, lspull #16 ldrne r3, [r0] - orrne ip, ip, r3, push #16 + orrne ip, ip, r3, lspush #16 strne ip, [r1], #4 bne 5b b 7f 6: subs r2, r2, #1 - mov ip, r3, pull #8 + mov ip, r3, lspull #8 ldrne r3, [r0] - orrne ip, ip, r3, push #24 + orrne ip, ip, r3, lspush #24 strne ip, [r1], #4 bne 6b diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 8d3b7813725c..d0d104a0dd11 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -41,26 +41,26 @@ ENTRY(__raw_writesl) blt 5f bgt 6f -4: mov ip, r3, pull #16 +4: mov ip, r3, lspull #16 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #16 + orr ip, ip, r3, lspush #16 str ip, [r0] bne 4b mov pc, lr -5: mov ip, r3, pull #8 +5: mov ip, r3, lspull #8 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #24 + orr ip, ip, r3, lspush #24 str ip, [r0] bne 5b mov pc, lr -6: mov ip, r3, pull #24 +6: mov ip, r3, lspull #24 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #8 + orr ip, ip, r3, lspush #8 str ip, [r0] bne 6b mov pc, lr diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 938fc14f962d..d1fc0c0c342c 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -147,24 +147,24 @@ ENTRY(memmove) 12: PLD( pld [r1, #-128] ) 13: ldmdb r1!, {r7, r8, r9, ip} - mov lr, r3, push #\push + mov lr, r3, lspush #\push subs r2, r2, #32 ldmdb r1!, {r3, r4, r5, r6} - orr lr, lr, ip, pull #\pull - mov ip, ip, push #\push - orr ip, ip, r9, pull #\pull - mov r9, r9, push #\push - orr r9, r9, r8, pull #\pull - mov r8, r8, push #\push - orr r8, r8, r7, pull #\pull - mov r7, r7, push #\push - orr r7, r7, r6, pull #\pull - mov r6, r6, push #\push - orr r6, r6, r5, pull #\pull - mov r5, r5, push #\push - orr r5, r5, r4, pull #\pull - mov r4, r4, push #\push - orr r4, r4, r3, pull #\pull + orr lr, lr, ip, lspull #\pull + mov ip, ip, lspush #\push + orr ip, ip, r9, lspull #\pull + mov r9, r9, lspush #\push + orr r9, r9, r8, lspull #\pull + mov r8, r8, lspush #\push + orr r8, r8, r7, lspull #\pull + mov r7, r7, lspush #\push + orr r7, r7, r6, lspull #\pull + mov r6, r6, lspush #\push + orr r6, r6, r5, lspull #\pull + mov r5, r5, lspush #\push + orr r5, r5, r4, lspull #\pull + mov r4, r4, lspush #\push + orr r4, r4, r3, lspull #\pull stmdb r0!, {r4 - r9, ip, lr} bge 12b PLD( cmn r2, #96 ) @@ -175,10 +175,10 @@ ENTRY(memmove) 14: ands ip, r2, #28 beq 16f -15: mov lr, r3, push #\push +15: mov lr, r3, lspush #\push ldr r3, [r1, #-4]! subs ip, ip, #4 - orr lr, lr, r3, pull #\pull + orr lr, lr, r3, lspull #\pull str lr, [r0, #-4]! bgt 15b CALGN( cmp r2, #0 ) diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index 5c908b1cb8ed..e50520904b76 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -117,9 +117,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault .Lc2u_1fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_1nowords - mov r3, r7, pull #8 + mov r3, r7, lspull #8 ldr r7, [r1], #4 - orr r3, r3, r7, push #24 + orr r3, r3, r7, lspush #24 USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -131,30 +131,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_1rem8lp -.Lc2u_1cpy8lp: mov r3, r7, pull #8 +.Lc2u_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_1cpy8lp .Lc2u_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #24 + orrne r3, r3, r7, lspush #24 TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_1fupi @@ -172,9 +172,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault .Lc2u_2fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_2nowords - mov r3, r7, pull #16 + mov r3, r7, lspull #16 ldr r7, [r1], #4 - orr r3, r3, r7, push #16 + orr r3, r3, r7, lspush #16 USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -186,30 +186,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_2rem8lp -.Lc2u_2cpy8lp: mov r3, r7, pull #16 +.Lc2u_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_2cpy8lp .Lc2u_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #16 + orrne r3, r3, r7, lspush #16 TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_2fupi @@ -227,9 +227,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault .Lc2u_3fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_3nowords - mov r3, r7, pull #24 + mov r3, r7, lspull #24 ldr r7, [r1], #4 - orr r3, r3, r7, push #8 + orr r3, r3, r7, lspush #8 USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -241,30 +241,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_3rem8lp -.Lc2u_3cpy8lp: mov r3, r7, pull #24 +.Lc2u_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_3cpy8lp .Lc2u_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #8 + orrne r3, r3, r7, lspush #8 TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_3fupi @@ -382,9 +382,9 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault .Lcfu_1fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_1nowords - mov r3, r7, pull #8 + mov r3, r7, lspull #8 USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #24 + orr r3, r3, r7, lspush #24 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -396,30 +396,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault subs ip, ip, #16 blt .Lcfu_1rem8lp -.Lcfu_1cpy8lp: mov r3, r7, pull #8 +.Lcfu_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} bpl .Lcfu_1cpy8lp .Lcfu_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #24 + orrne r3, r3, r7, lspush #24 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_1fupi @@ -437,9 +437,9 @@ USER( TUSER( ldrne) r7, [r1], #4) @ May fault .Lcfu_2fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_2nowords - mov r3, r7, pull #16 + mov r3, r7, lspull #16 USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #16 + orr r3, r3, r7, lspush #16 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -452,30 +452,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault blt .Lcfu_2rem8lp -.Lcfu_2cpy8lp: mov r3, r7, pull #16 +.Lcfu_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} bpl .Lcfu_2cpy8lp .Lcfu_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #16 + orrne r3, r3, r7, lspush #16 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_2fupi @@ -493,9 +493,9 @@ USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault .Lcfu_3fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_3nowords - mov r3, r7, pull #24 + mov r3, r7, lspull #24 USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #8 + orr r3, r3, r7, lspush #8 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -507,30 +507,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault subs ip, ip, #16 blt .Lcfu_3rem8lp -.Lcfu_3cpy8lp: mov r3, r7, pull #24 +.Lcfu_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} @ Shouldnt fault - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} subs ip, ip, #16 bpl .Lcfu_3cpy8lp .Lcfu_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #8 + orrne r3, r3, r7, lspush #8 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_3fupi From 0db5306859a69c38c695400c34bde7b7df5f1806 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 May 2013 18:07:19 +0100 Subject: [PATCH 283/287] ARM: barrier: allow options to be passed to memory barrier instructions On ARMv7, the memory barrier instructions take an optional `option' field which can be used to constrain the effects of a memory barrier based on shareability and access type. This patch allows the caller to pass these options if required, and updates the smp_*() barriers to request inner-shareable barriers, affecting only stores for the _wmb variant. wmb() is also changed to use the -st version of dsb. Reported-by: Albin Tonnerre Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon (cherry picked from commit 3ea128065ed20d33bd02ff6dab689f88e38000be) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/assembler.h | 4 ++-- arch/arm/include/asm/barrier.h | 32 ++++++++++++++++---------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 9271457adac8..cab788045029 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -212,9 +212,9 @@ #ifdef CONFIG_SMP #if __LINUX_ARM_ARCH__ >= 7 .ifeqs "\mode","arm" - ALT_SMP(dmb) + ALT_SMP(dmb ish) .else - ALT_SMP(W(dmb)) + ALT_SMP(W(dmb) ish) .endif #elif __LINUX_ARM_ARCH__ == 6 ALT_SMP(mcr p15, 0, r0, c7, c10, 5) @ dmb diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 8dcd9c702d90..60f15e274e6d 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -14,27 +14,27 @@ #endif #if __LINUX_ARM_ARCH__ >= 7 -#define isb() __asm__ __volatile__ ("isb" : : : "memory") -#define dsb() __asm__ __volatile__ ("dsb" : : : "memory") -#define dmb() __asm__ __volatile__ ("dmb" : : : "memory") +#define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory") +#define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory") +#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6 -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ +#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ +#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ : : "r" (0) : "memory") #elif defined(CONFIG_CPU_FA526) -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ +#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") +#define dmb(x) __asm__ __volatile__ ("" : : : "memory") #else -#define isb() __asm__ __volatile__ ("" : : : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define isb(x) __asm__ __volatile__ ("" : : : "memory") +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") +#define dmb(x) __asm__ __volatile__ ("" : : : "memory") #endif #ifdef CONFIG_ARCH_HAS_BARRIERS @@ -42,7 +42,7 @@ #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) #define mb() do { dsb(); outer_sync(); } while (0) #define rmb() dsb() -#define wmb() mb() +#define wmb() do { dsb(st); outer_sync(); } while (0) #else #define mb() barrier() #define rmb() barrier() @@ -54,9 +54,9 @@ #define smp_rmb() barrier() #define smp_wmb() barrier() #else -#define smp_mb() dmb() -#define smp_rmb() dmb() -#define smp_wmb() dmb() +#define smp_mb() dmb(ish) +#define smp_rmb() smp_mb() +#define smp_wmb() dmb(ishst) #endif #define read_barrier_depends() do { } while(0) From 63d832d826b75fec2ed062e5a5dc107c5a5db158 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Wed, 31 Jul 2013 12:44:42 -0400 Subject: [PATCH 284/287] ARM: mm: Introduce virt_to_idmap() with an arch hook On some PAE systems (e.g. TI Keystone), memory is above the 32-bit addressable limit, and the interconnect provides an aliased view of parts of physical memory in the 32-bit addressable space. This alias is strictly for boot time usage, and is not otherwise usable because of coherency limitations. On such systems, the idmap mechanism needs to take this aliased mapping into account. This patch introduces virt_to_idmap() and a arch function pointer which can be populated by platform which needs it. Also populate necessary idmap spots with now available virt_to_idmap(). Avoided #ifdef approach to be compatible with multi-platform builds. Most architecture won't touch it and in that case virt_to_idmap() fall-back to existing virt_to_phys() macro. Cc: Russell King Acked-by: Nicolas Pitre Signed-off-by: Santosh Shilimkar (cherry picked from commit 4dc9a81715973cb137a14399420bb35b0ed7d6ef) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/memory.h | 16 ++++++++++++++++ arch/arm/kernel/smp.c | 4 ++-- arch/arm/mm/idmap.c | 5 +++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 57870ab313c5..21b458e6b0b8 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -157,6 +157,7 @@ */ #define __PV_BITS_31_24 0x81000000 +extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x); extern unsigned long __pv_phys_offset; #define PHYS_OFFSET __pv_phys_offset @@ -232,6 +233,21 @@ static inline void *phys_to_virt(phys_addr_t x) #define __va(x) ((void *)__phys_to_virt((unsigned long)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +/* + * These are for systems that have a hardware interconnect supported alias of + * physical memory for idmap purposes. Most cases should leave these + * untouched. + */ +static inline phys_addr_t __virt_to_idmap(unsigned long x) +{ + if (arch_virt_to_idmap) + return arch_virt_to_idmap(x); + else + return __virt_to_phys(x); +} + +#define virt_to_idmap(x) __virt_to_idmap((unsigned long)(x)) + /* * Virtual <-> DMA view memory address translations * Again, these are *only* valid on the kernel direct mapped RAM diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5919eb451bb9..5a8ad2c8eda0 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -87,8 +87,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) * its stack and the page tables. */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; - secondary_data.pgdir = virt_to_phys(idmap_pgd); - secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir); + secondary_data.pgdir = virt_to_idmap(idmap_pgd); + secondary_data.swapper_pg_dir = virt_to_idmap(swapper_pg_dir); __cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data)); outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1)); diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 83cb3ac27095..c0a1e48f6733 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -10,6 +10,7 @@ #include pgd_t *idmap_pgd; +phys_addr_t (*arch_virt_to_idmap) (unsigned long x); #ifdef CONFIG_ARM_LPAE static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, @@ -67,8 +68,8 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, unsigned long addr, end; unsigned long next; - addr = virt_to_phys(text_start); - end = virt_to_phys(text_end); + addr = virt_to_idmap(text_start); + end = virt_to_idmap(text_end); prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; From c4673ca7850dae7146ecc35c69e8f17f2337c7d2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 12 Sep 2014 22:17:23 +0200 Subject: [PATCH 285/287] mm: export symbol dependencies of is_zero_pfn() In order to make the static inline function is_zero_pfn() callable by modules, export its symbol dependencies 'zero_pfn' and (for s390 and mips) 'zero_page_mask'. We need this for KVM, as CONFIG_KVM is a tristate for all supported architectures except ARM and arm64, and testing a pfn whether it refers to the zero page is required to correctly distinguish the zero page from other special RAM ranges that may also have the PG_reserved bit set, but need to be treated as MMIO memory. Signed-off-by: Ard Biesheuvel Acked-by: Andrew Morton Signed-off-by: Paolo Bonzini (cherry picked from commit 0b70068e47e8f0c813a902dc3d6def601fd15acb) Signed-off-by: Christoffer Dall --- arch/mips/mm/init.c | 1 + arch/s390/mm/init.c | 1 + mm/memory.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9b973e0af9cb..d340d53c345b 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -74,6 +74,7 @@ */ unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); /* * Not static inline because used by IP27 special magic initialization code diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index eba15f18fd38..a4dfc0bd05db 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -43,6 +43,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); static void __init setup_zero_pages(void) { diff --git a/mm/memory.c b/mm/memory.c index 5a35443c01ad..20bb9e901781 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps); unsigned long zero_pfn __read_mostly; unsigned long highest_memmap_pfn __read_mostly; +EXPORT_SYMBOL(zero_pfn); + /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() */ From 331a9a729183832840d656036c5bc2b81942c294 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 12 Sep 2014 15:16:00 +0200 Subject: [PATCH 286/287] KVM: check for !is_zero_pfn() in kvm_is_mmio_pfn() Read-only memory ranges may be backed by the zero page, so avoid misidentifying it a a MMIO pfn. This fixes another issue I identified when testing QEMU+KVM_UEFI, where a read to an uninitialized emulated NOR flash brought in the zero page, but mapped as a read-write device region, because kvm_is_mmio_pfn() misidentifies it as a MMIO pfn due to its PG_reserved bit being set. Signed-off-by: Ard Biesheuvel Fixes: b88657674d39 ("ARM: KVM: user_mem_abort: support stage 2 MMIO page mapping") Signed-off-by: Paolo Bonzini (cherry picked from commit 85c8555ff07ef09261bd50d603cd4290cff5a8cc) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f019669674a5..b64d44219f27 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -110,7 +110,7 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); return true; } From b8a669d29702a8fb529f4fae450a86b8676b0e42 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 19 Sep 2014 09:40:41 +1000 Subject: [PATCH 287/287] KVM: correct null pid check in kvm_vcpu_yield_to() Correct a simple mistake of checking the wrong variable before a dereference, resulting in the dereference not being properly protected by rcu_dereference(). Signed-off-by: Sam Bobroff Signed-off-by: Paolo Bonzini (cherry picked from commit 27fbe64bfa63cfb9da025975b59d96568caa2d53) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b64d44219f27..9cae94206f41 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1728,7 +1728,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) rcu_read_lock(); pid = rcu_dereference(target->pid); if (pid) - task = get_pid_task(target->pid, PIDTYPE_PID); + task = get_pid_task(pid, PIDTYPE_PID); rcu_read_unlock(); if (!task) return ret;